534 lines
15 KiB
JavaScript
534 lines
15 KiB
JavaScript
// Scraper.js
|
|
'use strict';
|
|
const fs = require('node:fs');
|
|
const fetch = require('node-fetch');
|
|
const { config } = require('../../config');
|
|
const { Key } = require('../../.Key');
|
|
|
|
class Scraper {
|
|
|
|
_Name = ""
|
|
_Browser = null
|
|
_PAGE_MAIN = ""
|
|
_PAGE_LOGIN = ""
|
|
_USER = ""
|
|
_PWD = ""
|
|
|
|
_PATH_SESSION_FILE = ""
|
|
_PATH_TOKEN_FILE = ""
|
|
|
|
_BROWSER_TOOL = null
|
|
|
|
_Proxy = ""
|
|
_DebugMode = false
|
|
|
|
_JucundusUrl = ""
|
|
|
|
token = ""
|
|
|
|
constructor(Url) {
|
|
this.Url = Url;
|
|
this._JucundusUrl = config.jucundus.url;
|
|
|
|
this._PATH_TOKEN_FILE = ".session/token.json"
|
|
this.token = this.getTokenInCache();
|
|
}
|
|
|
|
|
|
async _getContext(browser) {
|
|
return new Promise(async (resolve, reject) => {
|
|
try {
|
|
|
|
if (fs.existsSync(this._PATH_SESSION_FILE)) {
|
|
resolve(await browser.newContext({
|
|
storageState: this._PATH_SESSION_FILE,
|
|
timezoneId: 'Europe/Paris',
|
|
extraHTTPHeaders: {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0',
|
|
'Accept-Language': 'fr-FR,fr;q=0.9'
|
|
}
|
|
}));
|
|
} else {
|
|
resolve(await browser.newContext({
|
|
timezoneId: 'Europe/Paris',
|
|
extraHTTPHeaders: {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0',
|
|
'Accept-Language': 'fr-FR,fr;q=0.9'
|
|
}
|
|
}));
|
|
}
|
|
|
|
}catch(e){
|
|
console.error('Error: '+e)
|
|
reject(new Error('Error: '+e))
|
|
}
|
|
})
|
|
}
|
|
|
|
async _saveSession(page) {
|
|
return new Promise(async (resolve, reject) => {
|
|
console.log('-- Save Session --')
|
|
await page.context().storageState({ path: this._PATH_SESSION_FILE });
|
|
resolve(page)
|
|
})
|
|
|
|
}
|
|
|
|
getPictures({ page, data}) {
|
|
}
|
|
|
|
getLotInfos({ page, data}) {}
|
|
|
|
getSaleInfos({ page, data}) {}
|
|
|
|
getLotList({ page, data}) {}
|
|
|
|
async Live({ page, data}) {}
|
|
|
|
getTokenInCache(){
|
|
if (fs.existsSync(this._PATH_TOKEN_FILE)) {
|
|
let rawdata = fs.readFileSync(this._PATH_TOKEN_FILE);
|
|
let token = JSON.parse(rawdata);
|
|
return token.token;
|
|
}
|
|
return "";
|
|
}
|
|
setTokenInCache(token){
|
|
let data = JSON.stringify({token: token});
|
|
fs.writeFileSync(this._PATH_TOKEN_FILE, data);
|
|
}
|
|
|
|
isTokenExpired(token) {
|
|
try {
|
|
const payload = JSON.parse(atob(token.split('.')[1]));
|
|
if (!payload || !payload.exp) {
|
|
return true;
|
|
}
|
|
const currentTime = Math.floor(Date.now() / 1000);
|
|
return payload.exp < currentTime;
|
|
} catch (error) {
|
|
console.error('Error decoding token:', error);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
async isAgentConnected(){
|
|
|
|
return new Promise((resolve, reject) => {
|
|
|
|
fetch(this._JucundusUrl+'/api/user/agentConnected',{
|
|
method: 'GET',
|
|
headers: {
|
|
'Authorization': 'Bearer '+this.token
|
|
}
|
|
})
|
|
.then(response => {
|
|
if (!response.ok) {
|
|
console.log('isAgentConnected ? Agent not connected: '+response.statusText)
|
|
reject(false)
|
|
return;
|
|
}
|
|
console.log('isAgentConnected ? Agent connected')
|
|
resolve(true);
|
|
return;
|
|
})
|
|
.catch(error => {
|
|
console.log('isAgentConnected ? error Agent not connected: '+error)
|
|
reject(false)
|
|
return;
|
|
});
|
|
});
|
|
}
|
|
|
|
async checkJucundusConnexion(){
|
|
|
|
if(!this.token){
|
|
console.log('No token')
|
|
return false
|
|
}
|
|
|
|
if(this.isTokenExpired(this.token)){
|
|
console.log('Token expired')
|
|
return false
|
|
}
|
|
|
|
try {
|
|
const isConnected = await this.isAgentConnected();
|
|
if (!isConnected) {
|
|
console.log('Agent not connected');
|
|
return false;
|
|
}
|
|
} catch (error) {
|
|
console.log('Agent not connected');
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
async getNewToken(email, password) {
|
|
|
|
try {
|
|
const response = await fetch(this._JucundusUrl+'/authenticate', {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json'
|
|
},
|
|
body: JSON.stringify({ email, password })
|
|
});
|
|
if (!response.ok) {
|
|
throw new Error('Failed to retrieve new token');
|
|
}
|
|
const data = await response.json();
|
|
this.setTokenInCache(data.token);
|
|
return data.token;
|
|
} catch (error) {
|
|
console.error('Error retrieving new token:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async RequestJucundus(url, method, body = null){
|
|
console.log('RequestJucundus() '+url)
|
|
if (!await this.checkJucundusConnexion()) {
|
|
this.token = await this.getNewToken(config.jucundus.useremail, Key.jucundusPassword);
|
|
}
|
|
|
|
return new Promise((resolve, reject) => {
|
|
|
|
fetch(url,{
|
|
method: method,
|
|
headers: {
|
|
'Authorization': 'Bearer '+this.token,
|
|
'Content-Type': 'application/json'
|
|
},
|
|
body: body
|
|
})
|
|
.then(response => {
|
|
if (!response.ok) {
|
|
return response.json().then(err => {
|
|
throw new Error(err.error || 'Unknown error');
|
|
});
|
|
}
|
|
return response.json();
|
|
})
|
|
.then(data => {
|
|
console.log('RequestJucundus() data:' + JSON.stringify(data, null, 2))
|
|
resolve(data);
|
|
})
|
|
.catch(error => {
|
|
reject(error);
|
|
});
|
|
});
|
|
}
|
|
|
|
async JucundusCheckStop(){
|
|
//console.log('Check if Stop is asked')
|
|
|
|
// check if stop was asked
|
|
let url = encodeURIComponent(this.Url)
|
|
|
|
return new Promise((resolve, reject) => {
|
|
this.RequestJucundus(this._JucundusUrl+'/api/sale/getByUrl/'+url, 'GET')
|
|
.then(saleInfo => {
|
|
let status = saleInfo.status
|
|
//console.log('status : '+status)
|
|
if(status == 'askStop'){
|
|
|
|
console.log('Stop was asked')
|
|
|
|
// return to ready status
|
|
this.JucundusSetSaleStatus(saleInfo, 'end')
|
|
.then(
|
|
resolve(true)
|
|
);
|
|
|
|
} else {
|
|
resolve(false);
|
|
}
|
|
})
|
|
.catch(error => {
|
|
console.error(error);
|
|
reject(new Error('Error: '+error))
|
|
});
|
|
})
|
|
|
|
|
|
// return new Promise((resolve, reject) => {
|
|
// fetch(this._JucundusUrl+'/api/sale/getByUrl/'+url)
|
|
// .then(response => {
|
|
// if (!response.ok) {
|
|
// return response.json().then(err => {
|
|
// throw new Error(err.error || 'Unknown error');
|
|
// });
|
|
// }
|
|
// return response.json();
|
|
// })
|
|
// .then(saleInfo => {
|
|
// let status = saleInfo.status
|
|
// //console.log('status : '+status)
|
|
// if(status == 'askStop'){
|
|
|
|
// console.log('Stop was asked')
|
|
|
|
// // return to ready status
|
|
// this.JucundusSetSaleStatus(saleInfo, 'end')
|
|
// .then(
|
|
// resolve(true)
|
|
// );
|
|
|
|
// } else {
|
|
// resolve(false);
|
|
// }
|
|
// })
|
|
// .catch(error => {
|
|
// console.error(error);
|
|
// reject(new Error('Error: '+error))
|
|
// });
|
|
// })
|
|
}
|
|
|
|
async JucundusEndSale(code){
|
|
console.log('JucundusEndSale: '+code)
|
|
|
|
// check if stop was asked
|
|
let url = encodeURIComponent(this.Url)
|
|
|
|
return new Promise((resolve, reject) => {
|
|
// DEBUG
|
|
resolve(true)
|
|
|
|
if(code != 'end' && code != 'endOnError' && code != 'endOnRequest'){
|
|
console.error('Error: code must be end or endOnError or endOnRequest')
|
|
reject(new Error('Error: code must be end or endOnError or endOnRequest'))
|
|
}
|
|
|
|
this.RequestJucundus(this._JucundusUrl+'/api/sale/getByUrl/'+url, 'GET')
|
|
.then(saleInfo => {
|
|
// set end status
|
|
this.JucundusSetSaleStatus(saleInfo, code)
|
|
.then(
|
|
resolve(true)
|
|
);
|
|
})
|
|
.catch(error => {
|
|
console.error(error);
|
|
reject(new Error('Error: '+error))
|
|
});
|
|
})
|
|
}
|
|
|
|
async JucundusSetSaleStatus(saleInfo, status){
|
|
|
|
// change the status of the sale
|
|
saleInfo.status = status
|
|
return new Promise((resolve, reject) => {
|
|
this.RequestJucundus(this._JucundusUrl+'/api/sale/sale/'+saleInfo._id, 'PUT', JSON.stringify(saleInfo))
|
|
.then(resolve(true))
|
|
.catch(error => {
|
|
console.error(error);
|
|
reject(new Error('Error: '+error))
|
|
});
|
|
})
|
|
}
|
|
|
|
async JucunduNextItem(sale_id, timestamp, item_id, num_lot, title, description, EstimateLow, EstimateHigh, RawData){
|
|
|
|
console.log('JucunduNextItem', sale_id, timestamp, item_id, num_lot)
|
|
|
|
return new Promise((resolve, reject) => {
|
|
this.RequestJucundus(
|
|
this._JucundusUrl+'/api/lot/NextItem',
|
|
'POST',
|
|
JSON.stringify(
|
|
{
|
|
idPlatform: item_id,
|
|
idSalePlatform: sale_id,
|
|
platform: this._Name,
|
|
timestamp: timestamp,
|
|
lotNumber: num_lot,
|
|
title: title,
|
|
description: description,
|
|
EstimateLow: EstimateLow,
|
|
EstimateHigh: EstimateHigh,
|
|
RawData: RawData
|
|
}
|
|
))
|
|
.then(resolve(true))
|
|
.catch(error => {
|
|
console.error(error);
|
|
reject(new Error('Error: '+error))
|
|
});
|
|
})
|
|
}
|
|
|
|
async JucundusBid(item_id, timestamp, amount, auctioned_type){
|
|
|
|
console.log('JucundusBid', timestamp, item_id, amount, auctioned_type)
|
|
return new Promise((resolve, reject) => {
|
|
this.RequestJucundus(
|
|
this._JucundusUrl+'/api/lot/Bid',
|
|
'POST',
|
|
JSON.stringify(
|
|
{
|
|
idPlatform: item_id,
|
|
platform: this._Name,
|
|
timestamp: timestamp,
|
|
amount: amount,
|
|
auctioned_type: auctioned_type
|
|
}
|
|
))
|
|
.then(resolve(true))
|
|
.catch(error => {
|
|
console.error(error);
|
|
reject(new Error('Error: '+error))
|
|
});
|
|
})
|
|
}
|
|
|
|
async JucunduAuctionedItem(item_id, timestamp, amount, sold, auctioned_type){
|
|
console.log('JucunduAuctionedItem', timestamp, item_id, amount, sold)
|
|
|
|
return new Promise((resolve, reject) => {
|
|
this.RequestJucundus(
|
|
this._JucundusUrl+'/api/lot/AuctionedItem',
|
|
'POST',
|
|
JSON.stringify(
|
|
{
|
|
idPlatform: item_id,
|
|
platform: this._Name,
|
|
timestamp: timestamp,
|
|
amount: amount,
|
|
auctioned_type: auctioned_type,
|
|
sold: sold,
|
|
}
|
|
))
|
|
.then(resolve(true))
|
|
.catch(error => {
|
|
console.error(error);
|
|
reject(new Error('Error: '+error))
|
|
});
|
|
})
|
|
|
|
}
|
|
|
|
};
|
|
|
|
class ScraperTools {
|
|
_CONST_INTERENCHERES = 'interencheres'
|
|
_CONST_DROUOT = 'drouot'
|
|
|
|
detectPlatform(URL){
|
|
let Url = 'http'+String(URL).split("http")[1]
|
|
|
|
if(Url.includes('interencheres')){
|
|
return this._CONST_INTERENCHERES
|
|
}
|
|
|
|
if(Url.includes('drouot')){
|
|
return this._CONST_DROUOT
|
|
}
|
|
}
|
|
|
|
async clickLink(XPath = [], page, context){
|
|
|
|
for (let XPathString of XPath) {
|
|
try{
|
|
await page.waitForXPath(XPathString, { timeout: 1000 });
|
|
}catch(e){}
|
|
let Elements = await page.$x(XPathString);
|
|
if (Elements.length > 0) {
|
|
await Elements[0].evaluate(b => b.click());
|
|
return true;
|
|
}
|
|
}
|
|
console.error("Error: No Link found for "+context)
|
|
return false;
|
|
}
|
|
|
|
async getAttribute(XPath = [], page, attribute, context){
|
|
|
|
for (let XPathString of XPath) {
|
|
try{
|
|
await page.waitForXPath(XPathString, { timeout: 1000 });
|
|
}catch(e){}
|
|
let Elements = await page.$x(XPathString);
|
|
if (Elements.length > 0) {
|
|
const Attribute = await page.evaluate((element,attribute) => element.getAttribute(attribute), Elements[0],attribute);
|
|
if(Attribute != "" && Attribute != null && Attribute != undefined) {
|
|
return Attribute;
|
|
}
|
|
}
|
|
}
|
|
console.error("Error: No content found for the Attribute "+attribute+" for "+context)
|
|
return "";
|
|
}
|
|
|
|
async getTextContent(XPath = [], page, context, log = false){
|
|
|
|
for (let XPathString of XPath) {
|
|
try{
|
|
await page.waitForXPath(XPathString, { timeout: 1000 });
|
|
}catch(e){}
|
|
let Elements = await page.$x(XPathString);
|
|
if (Elements.length > 0) {
|
|
if(log) console.log(Elements)
|
|
let Content = await page.evaluate(el => el.textContent, Elements[0]);
|
|
if(Content != "" && Content != null && Content != undefined) {
|
|
return Content;
|
|
}
|
|
}
|
|
}
|
|
console.error("Error: No content found for "+context)
|
|
return "";
|
|
}
|
|
|
|
async ElementExists(XPath = [], page, context){
|
|
|
|
for (let XPathString of XPath) {
|
|
try{
|
|
let Elements = await page.$x(XPathString);
|
|
if (Elements.length > 0) {
|
|
return true;
|
|
}
|
|
}catch(e){}
|
|
|
|
}
|
|
return false;
|
|
}
|
|
|
|
async getTextContentElement(XPath = [], page, Element, context){
|
|
|
|
for (let XPathString of XPath) {
|
|
|
|
let El = await Element.$x(XPathString);
|
|
if (El.length > 0) {
|
|
let Content = await page.evaluate(el => el.textContent, El[0]);
|
|
if(Content != "" && Content != null && Content != undefined) {
|
|
return Content;
|
|
}
|
|
}
|
|
}
|
|
console.error("Error: No content found for "+context)
|
|
return "";
|
|
}
|
|
|
|
async getAttributeElement(XPath = [], page, Element, attribute, context){
|
|
for (let XPathString of XPath) {
|
|
|
|
let El = await Element.$x(XPathString);
|
|
if (El.length > 0) {
|
|
|
|
const Attribute = await page.evaluate((el, attr) => el.getAttribute(attr), El[0], attribute);
|
|
if(Attribute != "" && Attribute != null && Attribute != undefined) {
|
|
return Attribute;
|
|
}
|
|
}
|
|
}
|
|
console.error("Error: No content found for the Attribute "+attribute+" for "+context)
|
|
return "";
|
|
}
|
|
}
|
|
|
|
module.exports = {Scraper, ScraperTools}; |