Agent/AuctionServices/Scraper/Scraper.js

347 lines
9.8 KiB
JavaScript

// Scraper.js
'use strict';
const fs = require('node:fs');
const fetch = require('node-fetch');
class Scraper {
_Name = ""
_Browser = null
_PAGE_MAIN = ""
_PAGE_LOGIN = ""
_USER = ""
_PWD = ""
_PATH_SESSION_FILE = ""
_BROWSER_TOOL = null
_Proxy = ""
_DebugMode = false
_JucundusUrl = "http://host.docker.internal:3000"
constructor(Url) {
this.Url = Url;
}
async _getContext(browser) {
return new Promise(async (resolve, reject) => {
try {
if (fs.existsSync(this._PATH_SESSION_FILE)) {
resolve(await browser.newContext({
storageState: this._PATH_SESSION_FILE,
timezoneId: 'Europe/Paris',
extraHTTPHeaders: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0',
'Accept-Language': 'fr-FR,fr;q=0.9'
}
}));
} else {
resolve(await browser.newContext({
timezoneId: 'Europe/Paris',
extraHTTPHeaders: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0',
'Accept-Language': 'fr-FR,fr;q=0.9'
}
}));
}
}catch(e){
console.error('Error: '+e)
reject(new Error('Error: '+e))
}
})
}
async _saveSession(page) {
return new Promise(async (resolve, reject) => {
console.log('-- Save Session --')
await page.context().storageState({ path: this._PATH_SESSION_FILE });
resolve(page)
})
}
getPictures({ page, data}) {
}
getLotInfos({ page, data}) {}
getSaleInfos({ page, data}) {}
getLotList({ page, data}) {}
async Live({ page, data}) {}
async JucundusCheckStop(){
//console.log('Check if Stop is asked')
// check if stop was asked
let url = encodeURIComponent(this.Url)
return new Promise((resolve, reject) => {
fetch(this._JucundusUrl+'/api/sale/getByUrl/'+url)
.then(response => response.json())
.then(saleInfo => {
let status = saleInfo.status
//console.log('status : '+status)
if(status == 'askStop'){
console.log('Stop was asked')
// return to ready status
this.JucundusSetSaleStatus(saleInfo, 'ready')
.then(
resolve(true)
);
} else {
resolve(false);
}
})
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
async JucundusEndSale(){
console.log('JucundusEndSale')
// check if stop was asked
let url = encodeURIComponent(this.Url)
return new Promise((resolve, reject) => {
fetch(this._JucundusUrl+'/api/sale/getByUrl/'+url)
.then(response => response.json())
.then(saleInfo => {
// set end status
this.JucundusSetSaleStatus(saleInfo, 'end')
.then(
resolve(true)
);
})
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
async JucundusSetSaleStatus(saleInfo, status){
// change the status of the sale
saleInfo.status = status
return new Promise((resolve, reject) => {
fetch(this._JucundusUrl+'/api/sale/sale/'+saleInfo._id, {
method: 'PUT',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(saleInfo)})
.then(resolve(true))
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
async JucunduNextItem(sale_id, timestamp, item_id, num_lot, title, description, EstimateLow, EstimateHigh, RawData){
console.log('JucunduNextItem', sale_id, timestamp, item_id, num_lot)
return new Promise((resolve, reject) => {
fetch(this._JucundusUrl+'/api/lot/NextItem', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(
{
idPlatform: item_id,
idSalePlatform: sale_id,
platform: this._Name,
timestamp: timestamp,
lotNumber: num_lot,
title: title,
description: description,
EstimateLow: EstimateLow,
EstimateHigh: EstimateHigh,
RawData: RawData
}
)})
.then(resolve(true))
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
async JucundusBid(item_id, timestamp, amount, auctioned_type){
console.log('JucundusBid', timestamp, item_id, amount, auctioned_type)
return new Promise((resolve, reject) => {
fetch(this._JucundusUrl+'/api/lot/Bid', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(
{
idPlatform: item_id,
platform: this._Name,
timestamp: timestamp,
amount: amount,
auctioned_type: auctioned_type
}
)})
.then(resolve(true))
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
async JucunduAuctionedItem(item_id, timestamp, amount, sold, auctioned_type){
console.log('JucunduAuctionedItem', timestamp, item_id, amount, sold)
return new Promise((resolve, reject) => {
fetch(this._JucundusUrl+'/api/lot/AuctionedItem', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(
{
idPlatform: item_id,
platform: this._Name,
timestamp: timestamp,
amount: amount,
auctioned_type: auctioned_type,
sold: sold,
}
)})
.then(resolve(true))
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
};
class ScraperTools {
_CONST_INTERENCHERES = 'interencheres'
_CONST_DROUOT = 'drouot'
detectPlatform(URL){
let Url = 'http'+String(URL).split("http")[1]
if(Url.includes('interencheres')){
return this._CONST_INTERENCHERES
}
if(Url.includes('drouot')){
return this._CONST_DROUOT
}
}
async clickLink(XPath = [], page, context){
for (let XPathString of XPath) {
try{
await page.waitForXPath(XPathString, { timeout: 1000 });
}catch(e){}
let Elements = await page.$x(XPathString);
if (Elements.length > 0) {
await Elements[0].evaluate(b => b.click());
return true;
}
}
console.error("Error: No Link found for "+context)
return false;
}
async getAttribute(XPath = [], page, attribute, context){
for (let XPathString of XPath) {
try{
await page.waitForXPath(XPathString, { timeout: 1000 });
}catch(e){}
let Elements = await page.$x(XPathString);
if (Elements.length > 0) {
const Attribute = await page.evaluate((element,attribute) => element.getAttribute(attribute), Elements[0],attribute);
if(Attribute != "" && Attribute != null && Attribute != undefined) {
return Attribute;
}
}
}
console.error("Error: No content found for the Attribute "+attribute+" for "+context)
return "";
}
async getTextContent(XPath = [], page, context, log = false){
for (let XPathString of XPath) {
try{
await page.waitForXPath(XPathString, { timeout: 1000 });
}catch(e){}
let Elements = await page.$x(XPathString);
if (Elements.length > 0) {
if(log) console.log(Elements)
let Content = await page.evaluate(el => el.textContent, Elements[0]);
if(Content != "" && Content != null && Content != undefined) {
return Content;
}
}
}
console.error("Error: No content found for "+context)
return "";
}
async ElementExists(XPath = [], page, context){
for (let XPathString of XPath) {
try{
let Elements = await page.$x(XPathString);
if (Elements.length > 0) {
return true;
}
}catch(e){}
}
return false;
}
async getTextContentElement(XPath = [], page, Element, context){
for (let XPathString of XPath) {
let El = await Element.$x(XPathString);
if (El.length > 0) {
let Content = await page.evaluate(el => el.textContent, El[0]);
if(Content != "" && Content != null && Content != undefined) {
return Content;
}
}
}
console.error("Error: No content found for "+context)
return "";
}
async getAttributeElement(XPath = [], page, Element, attribute, context){
for (let XPathString of XPath) {
let El = await Element.$x(XPathString);
if (El.length > 0) {
const Attribute = await page.evaluate((el, attr) => el.getAttribute(attr), El[0], attribute);
if(Attribute != "" && Attribute != null && Attribute != undefined) {
return Attribute;
}
}
}
console.error("Error: No content found for the Attribute "+attribute+" for "+context)
return "";
}
}
module.exports = {Scraper, ScraperTools};