Agent/AuctionServices/Scraper/Scraper.js

534 lines
15 KiB
JavaScript

// Scraper.js
'use strict';
const fs = require('node:fs');
const fetch = require('node-fetch');
const { config } = require('../../config');
const { Key } = require('../../.Key');
class Scraper {
_Name = ""
_Browser = null
_PAGE_MAIN = ""
_PAGE_LOGIN = ""
_USER = ""
_PWD = ""
_PATH_SESSION_FILE = ""
_PATH_TOKEN_FILE = ""
_BROWSER_TOOL = null
_Proxy = ""
_DebugMode = false
_JucundusUrl = ""
token = ""
constructor(Url) {
this.Url = Url;
this._JucundusUrl = config.jucundus.url;
this._PATH_TOKEN_FILE = ".session/token.json"
this.token = this.getTokenInCache();
}
async _getContext(browser) {
return new Promise(async (resolve, reject) => {
try {
if (fs.existsSync(this._PATH_SESSION_FILE)) {
resolve(await browser.newContext({
storageState: this._PATH_SESSION_FILE,
timezoneId: 'Europe/Paris',
extraHTTPHeaders: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0',
'Accept-Language': 'fr-FR,fr;q=0.9'
}
}));
} else {
resolve(await browser.newContext({
timezoneId: 'Europe/Paris',
extraHTTPHeaders: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0',
'Accept-Language': 'fr-FR,fr;q=0.9'
}
}));
}
}catch(e){
console.error('Error: '+e)
reject(new Error('Error: '+e))
}
})
}
async _saveSession(page) {
return new Promise(async (resolve, reject) => {
console.log('-- Save Session --')
await page.context().storageState({ path: this._PATH_SESSION_FILE });
resolve(page)
})
}
getPictures({ page, data}) {
}
getLotInfos({ page, data}) {}
getSaleInfos({ page, data}) {}
getLotList({ page, data}) {}
async Live({ page, data}) {}
getTokenInCache(){
if (fs.existsSync(this._PATH_TOKEN_FILE)) {
let rawdata = fs.readFileSync(this._PATH_TOKEN_FILE);
let token = JSON.parse(rawdata);
return token.token;
}
return "";
}
setTokenInCache(token){
let data = JSON.stringify({token: token});
fs.writeFileSync(this._PATH_TOKEN_FILE, data);
}
isTokenExpired(token) {
try {
const payload = JSON.parse(atob(token.split('.')[1]));
if (!payload || !payload.exp) {
return true;
}
const currentTime = Math.floor(Date.now() / 1000);
return payload.exp < currentTime;
} catch (error) {
console.error('Error decoding token:', error);
return true;
}
}
async isAgentConnected(){
return new Promise((resolve, reject) => {
fetch(this._JucundusUrl+'/api/user/agentConnected',{
method: 'GET',
headers: {
'Authorization': 'Bearer '+this.token
}
})
.then(response => {
if (!response.ok) {
console.log('isAgentConnected ? Agent not connected: '+response.statusText)
reject(false)
return;
}
console.log('isAgentConnected ? Agent connected')
resolve(true);
return;
})
.catch(error => {
console.log('isAgentConnected ? error Agent not connected: '+error)
reject(false)
return;
});
});
}
async checkJucundusConnexion(){
if(!this.token){
console.log('No token')
return false
}
if(this.isTokenExpired(this.token)){
console.log('Token expired')
return false
}
try {
const isConnected = await this.isAgentConnected();
if (!isConnected) {
console.log('Agent not connected');
return false;
}
} catch (error) {
console.log('Agent not connected');
return false;
}
return true;
}
async getNewToken(email, password) {
try {
const response = await fetch(this._JucundusUrl+'/authenticate', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ email, password })
});
if (!response.ok) {
throw new Error('Failed to retrieve new token');
}
const data = await response.json();
this.setTokenInCache(data.token);
return data.token;
} catch (error) {
console.error('Error retrieving new token:', error);
throw error;
}
}
async RequestJucundus(url, method, body = null){
console.log('RequestJucundus() '+url)
if (!await this.checkJucundusConnexion()) {
this.token = await this.getNewToken(config.jucundus.useremail, Key.jucundusPassword);
}
return new Promise((resolve, reject) => {
fetch(url,{
method: method,
headers: {
'Authorization': 'Bearer '+this.token,
'Content-Type': 'application/json'
},
body: body
})
.then(response => {
if (!response.ok) {
return response.json().then(err => {
throw new Error(err.error || 'Unknown error');
});
}
return response.json();
})
.then(data => {
console.log('RequestJucundus() data:' + JSON.stringify(data, null, 2))
resolve(data);
})
.catch(error => {
reject(error);
});
});
}
async JucundusCheckStop(){
//console.log('Check if Stop is asked')
// check if stop was asked
let url = encodeURIComponent(this.Url)
return new Promise((resolve, reject) => {
this.RequestJucundus(this._JucundusUrl+'/api/sale/getByUrl/'+url, 'GET')
.then(saleInfo => {
let status = saleInfo.status
//console.log('status : '+status)
if(status == 'askStop'){
console.log('Stop was asked')
// return to ready status
this.JucundusSetSaleStatus(saleInfo, 'end')
.then(
resolve(true)
);
} else {
resolve(false);
}
})
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
// return new Promise((resolve, reject) => {
// fetch(this._JucundusUrl+'/api/sale/getByUrl/'+url)
// .then(response => {
// if (!response.ok) {
// return response.json().then(err => {
// throw new Error(err.error || 'Unknown error');
// });
// }
// return response.json();
// })
// .then(saleInfo => {
// let status = saleInfo.status
// //console.log('status : '+status)
// if(status == 'askStop'){
// console.log('Stop was asked')
// // return to ready status
// this.JucundusSetSaleStatus(saleInfo, 'end')
// .then(
// resolve(true)
// );
// } else {
// resolve(false);
// }
// })
// .catch(error => {
// console.error(error);
// reject(new Error('Error: '+error))
// });
// })
}
async JucundusEndSale(code){
console.log('JucundusEndSale: '+code)
// check if stop was asked
let url = encodeURIComponent(this.Url)
return new Promise((resolve, reject) => {
// DEBUG
resolve(true)
if(code != 'end' && code != 'endOnError' && code != 'endOnRequest'){
console.error('Error: code must be end or endOnError or endOnRequest')
reject(new Error('Error: code must be end or endOnError or endOnRequest'))
}
this.RequestJucundus(this._JucundusUrl+'/api/sale/getByUrl/'+url, 'GET')
.then(saleInfo => {
// set end status
this.JucundusSetSaleStatus(saleInfo, code)
.then(
resolve(true)
);
})
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
async JucundusSetSaleStatus(saleInfo, status){
// change the status of the sale
saleInfo.status = status
return new Promise((resolve, reject) => {
this.RequestJucundus(this._JucundusUrl+'/api/sale/sale/'+saleInfo._id, 'PUT', JSON.stringify(saleInfo))
.then(resolve(true))
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
async JucunduNextItem(sale_id, timestamp, item_id, num_lot, title, description, EstimateLow, EstimateHigh, RawData){
console.log('JucunduNextItem', sale_id, timestamp, item_id, num_lot)
return new Promise((resolve, reject) => {
this.RequestJucundus(
this._JucundusUrl+'/api/lot/NextItem',
'POST',
JSON.stringify(
{
idPlatform: item_id,
idSalePlatform: sale_id,
platform: this._Name,
timestamp: timestamp,
lotNumber: num_lot,
title: title,
description: description,
EstimateLow: EstimateLow,
EstimateHigh: EstimateHigh,
RawData: RawData
}
))
.then(resolve(true))
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
async JucundusBid(item_id, timestamp, amount, auctioned_type){
console.log('JucundusBid', timestamp, item_id, amount, auctioned_type)
return new Promise((resolve, reject) => {
this.RequestJucundus(
this._JucundusUrl+'/api/lot/Bid',
'POST',
JSON.stringify(
{
idPlatform: item_id,
platform: this._Name,
timestamp: timestamp,
amount: amount,
auctioned_type: auctioned_type
}
))
.then(resolve(true))
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
async JucunduAuctionedItem(item_id, timestamp, amount, sold, auctioned_type){
console.log('JucunduAuctionedItem', timestamp, item_id, amount, sold)
return new Promise((resolve, reject) => {
this.RequestJucundus(
this._JucundusUrl+'/api/lot/AuctionedItem',
'POST',
JSON.stringify(
{
idPlatform: item_id,
platform: this._Name,
timestamp: timestamp,
amount: amount,
auctioned_type: auctioned_type,
sold: sold,
}
))
.then(resolve(true))
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
};
class ScraperTools {
_CONST_INTERENCHERES = 'interencheres'
_CONST_DROUOT = 'drouot'
detectPlatform(URL){
let Url = 'http'+String(URL).split("http")[1]
if(Url.includes('interencheres')){
return this._CONST_INTERENCHERES
}
if(Url.includes('drouot')){
return this._CONST_DROUOT
}
}
async clickLink(XPath = [], page, context){
for (let XPathString of XPath) {
try{
await page.waitForXPath(XPathString, { timeout: 1000 });
}catch(e){}
let Elements = await page.$x(XPathString);
if (Elements.length > 0) {
await Elements[0].evaluate(b => b.click());
return true;
}
}
console.error("Error: No Link found for "+context)
return false;
}
async getAttribute(XPath = [], page, attribute, context){
for (let XPathString of XPath) {
try{
await page.waitForXPath(XPathString, { timeout: 1000 });
}catch(e){}
let Elements = await page.$x(XPathString);
if (Elements.length > 0) {
const Attribute = await page.evaluate((element,attribute) => element.getAttribute(attribute), Elements[0],attribute);
if(Attribute != "" && Attribute != null && Attribute != undefined) {
return Attribute;
}
}
}
console.error("Error: No content found for the Attribute "+attribute+" for "+context)
return "";
}
async getTextContent(XPath = [], page, context, log = false){
for (let XPathString of XPath) {
try{
await page.waitForXPath(XPathString, { timeout: 1000 });
}catch(e){}
let Elements = await page.$x(XPathString);
if (Elements.length > 0) {
if(log) console.log(Elements)
let Content = await page.evaluate(el => el.textContent, Elements[0]);
if(Content != "" && Content != null && Content != undefined) {
return Content;
}
}
}
console.error("Error: No content found for "+context)
return "";
}
async ElementExists(XPath = [], page, context){
for (let XPathString of XPath) {
try{
let Elements = await page.$x(XPathString);
if (Elements.length > 0) {
return true;
}
}catch(e){}
}
return false;
}
async getTextContentElement(XPath = [], page, Element, context){
for (let XPathString of XPath) {
let El = await Element.$x(XPathString);
if (El.length > 0) {
let Content = await page.evaluate(el => el.textContent, El[0]);
if(Content != "" && Content != null && Content != undefined) {
return Content;
}
}
}
console.error("Error: No content found for "+context)
return "";
}
async getAttributeElement(XPath = [], page, Element, attribute, context){
for (let XPathString of XPath) {
let El = await Element.$x(XPathString);
if (El.length > 0) {
const Attribute = await page.evaluate((el, attr) => el.getAttribute(attr), El[0], attribute);
if(Attribute != "" && Attribute != null && Attribute != undefined) {
return Attribute;
}
}
}
console.error("Error: No content found for the Attribute "+attribute+" for "+context)
return "";
}
}
module.exports = {Scraper, ScraperTools};