347 lines
9.8 KiB
JavaScript
347 lines
9.8 KiB
JavaScript
// Scraper.js
|
|
'use strict';
|
|
const fs = require('node:fs');
|
|
const fetch = require('node-fetch');
|
|
|
|
class Scraper {
|
|
|
|
_Name = ""
|
|
_Browser = null
|
|
_PAGE_MAIN = ""
|
|
_PAGE_LOGIN = ""
|
|
_USER = ""
|
|
_PWD = ""
|
|
|
|
_PATH_SESSION_FILE = ""
|
|
|
|
_BROWSER_TOOL = null
|
|
|
|
_Proxy = ""
|
|
_DebugMode = false
|
|
|
|
_JucundusUrl = "http://host.docker.internal:3000"
|
|
|
|
constructor(Url) {
|
|
this.Url = Url;
|
|
}
|
|
|
|
async _getContext(browser) {
|
|
return new Promise(async (resolve, reject) => {
|
|
try {
|
|
|
|
if (fs.existsSync(this._PATH_SESSION_FILE)) {
|
|
resolve(await browser.newContext({
|
|
storageState: this._PATH_SESSION_FILE,
|
|
timezoneId: 'Europe/Paris',
|
|
extraHTTPHeaders: {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0',
|
|
'Accept-Language': 'fr-FR,fr;q=0.9'
|
|
}
|
|
}));
|
|
} else {
|
|
resolve(await browser.newContext({
|
|
timezoneId: 'Europe/Paris',
|
|
extraHTTPHeaders: {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0',
|
|
'Accept-Language': 'fr-FR,fr;q=0.9'
|
|
}
|
|
}));
|
|
}
|
|
|
|
}catch(e){
|
|
console.error('Error: '+e)
|
|
reject(new Error('Error: '+e))
|
|
}
|
|
})
|
|
}
|
|
|
|
async _saveSession(page) {
|
|
return new Promise(async (resolve, reject) => {
|
|
console.log('-- Save Session --')
|
|
await page.context().storageState({ path: this._PATH_SESSION_FILE });
|
|
resolve(page)
|
|
})
|
|
|
|
}
|
|
|
|
getPictures({ page, data}) {
|
|
}
|
|
|
|
getLotInfos({ page, data}) {}
|
|
|
|
getSaleInfos({ page, data}) {}
|
|
|
|
getLotList({ page, data}) {}
|
|
|
|
|
|
async Live({ page, data}) {}
|
|
|
|
async JucundusCheckStop(){
|
|
//console.log('Check if Stop is asked')
|
|
|
|
// check if stop was asked
|
|
let url = encodeURIComponent(this.Url)
|
|
|
|
return new Promise((resolve, reject) => {
|
|
fetch(this._JucundusUrl+'/api/sale/getByUrl/'+url)
|
|
.then(response => response.json())
|
|
.then(saleInfo => {
|
|
let status = saleInfo.status
|
|
//console.log('status : '+status)
|
|
if(status == 'askStop'){
|
|
|
|
console.log('Stop was asked')
|
|
|
|
// return to ready status
|
|
this.JucundusSetSaleStatus(saleInfo, 'ready')
|
|
.then(
|
|
resolve(true)
|
|
);
|
|
|
|
} else {
|
|
resolve(false);
|
|
}
|
|
})
|
|
.catch(error => {
|
|
console.error(error);
|
|
reject(new Error('Error: '+error))
|
|
});
|
|
})
|
|
}
|
|
|
|
async JucundusEndSale(){
|
|
console.log('JucundusEndSale')
|
|
|
|
// check if stop was asked
|
|
let url = encodeURIComponent(this.Url)
|
|
|
|
return new Promise((resolve, reject) => {
|
|
fetch(this._JucundusUrl+'/api/sale/getByUrl/'+url)
|
|
.then(response => response.json())
|
|
.then(saleInfo => {
|
|
// set end status
|
|
this.JucundusSetSaleStatus(saleInfo, 'end')
|
|
.then(
|
|
resolve(true)
|
|
);
|
|
})
|
|
.catch(error => {
|
|
console.error(error);
|
|
reject(new Error('Error: '+error))
|
|
});
|
|
})
|
|
}
|
|
|
|
async JucundusSetSaleStatus(saleInfo, status){
|
|
// change the status of the sale
|
|
saleInfo.status = status
|
|
return new Promise((resolve, reject) => {
|
|
fetch(this._JucundusUrl+'/api/sale/sale/'+saleInfo._id, {
|
|
method: 'PUT',
|
|
headers: {'Content-Type': 'application/json'},
|
|
body: JSON.stringify(saleInfo)})
|
|
.then(resolve(true))
|
|
.catch(error => {
|
|
console.error(error);
|
|
reject(new Error('Error: '+error))
|
|
});
|
|
})
|
|
}
|
|
|
|
async JucunduNextItem(sale_id, timestamp, item_id, num_lot, title, description, EstimateLow, EstimateHigh, RawData){
|
|
console.log('JucunduNextItem', sale_id, timestamp, item_id, num_lot)
|
|
|
|
return new Promise((resolve, reject) => {
|
|
fetch(this._JucundusUrl+'/api/lot/NextItem', {
|
|
method: 'POST',
|
|
headers: {'Content-Type': 'application/json'},
|
|
body: JSON.stringify(
|
|
{
|
|
idPlatform: item_id,
|
|
idSalePlatform: sale_id,
|
|
platform: this._Name,
|
|
timestamp: timestamp,
|
|
lotNumber: num_lot,
|
|
title: title,
|
|
description: description,
|
|
EstimateLow: EstimateLow,
|
|
EstimateHigh: EstimateHigh,
|
|
RawData: RawData
|
|
}
|
|
)})
|
|
.then(resolve(true))
|
|
.catch(error => {
|
|
console.error(error);
|
|
reject(new Error('Error: '+error))
|
|
});
|
|
})
|
|
}
|
|
|
|
async JucundusBid(item_id, timestamp, amount, auctioned_type){
|
|
console.log('JucundusBid', timestamp, item_id, amount, auctioned_type)
|
|
return new Promise((resolve, reject) => {
|
|
fetch(this._JucundusUrl+'/api/lot/Bid', {
|
|
method: 'POST',
|
|
headers: {'Content-Type': 'application/json'},
|
|
body: JSON.stringify(
|
|
{
|
|
idPlatform: item_id,
|
|
platform: this._Name,
|
|
timestamp: timestamp,
|
|
amount: amount,
|
|
auctioned_type: auctioned_type
|
|
}
|
|
)})
|
|
.then(resolve(true))
|
|
.catch(error => {
|
|
console.error(error);
|
|
reject(new Error('Error: '+error))
|
|
});
|
|
})
|
|
}
|
|
|
|
async JucunduAuctionedItem(item_id, timestamp, amount, sold, auctioned_type){
|
|
console.log('JucunduAuctionedItem', timestamp, item_id, amount, sold)
|
|
|
|
return new Promise((resolve, reject) => {
|
|
fetch(this._JucundusUrl+'/api/lot/AuctionedItem', {
|
|
method: 'POST',
|
|
headers: {'Content-Type': 'application/json'},
|
|
body: JSON.stringify(
|
|
{
|
|
idPlatform: item_id,
|
|
platform: this._Name,
|
|
timestamp: timestamp,
|
|
amount: amount,
|
|
auctioned_type: auctioned_type,
|
|
sold: sold,
|
|
}
|
|
)})
|
|
.then(resolve(true))
|
|
.catch(error => {
|
|
console.error(error);
|
|
reject(new Error('Error: '+error))
|
|
});
|
|
})
|
|
|
|
}
|
|
|
|
};
|
|
|
|
class ScraperTools {
|
|
_CONST_INTERENCHERES = 'interencheres'
|
|
_CONST_DROUOT = 'drouot'
|
|
|
|
detectPlatform(URL){
|
|
let Url = 'http'+String(URL).split("http")[1]
|
|
|
|
if(Url.includes('interencheres')){
|
|
return this._CONST_INTERENCHERES
|
|
}
|
|
|
|
if(Url.includes('drouot')){
|
|
return this._CONST_DROUOT
|
|
}
|
|
}
|
|
|
|
async clickLink(XPath = [], page, context){
|
|
|
|
for (let XPathString of XPath) {
|
|
try{
|
|
await page.waitForXPath(XPathString, { timeout: 1000 });
|
|
}catch(e){}
|
|
let Elements = await page.$x(XPathString);
|
|
if (Elements.length > 0) {
|
|
await Elements[0].evaluate(b => b.click());
|
|
return true;
|
|
}
|
|
}
|
|
console.error("Error: No Link found for "+context)
|
|
return false;
|
|
}
|
|
|
|
async getAttribute(XPath = [], page, attribute, context){
|
|
|
|
for (let XPathString of XPath) {
|
|
try{
|
|
await page.waitForXPath(XPathString, { timeout: 1000 });
|
|
}catch(e){}
|
|
let Elements = await page.$x(XPathString);
|
|
if (Elements.length > 0) {
|
|
const Attribute = await page.evaluate((element,attribute) => element.getAttribute(attribute), Elements[0],attribute);
|
|
if(Attribute != "" && Attribute != null && Attribute != undefined) {
|
|
return Attribute;
|
|
}
|
|
}
|
|
}
|
|
console.error("Error: No content found for the Attribute "+attribute+" for "+context)
|
|
return "";
|
|
}
|
|
|
|
async getTextContent(XPath = [], page, context, log = false){
|
|
|
|
for (let XPathString of XPath) {
|
|
try{
|
|
await page.waitForXPath(XPathString, { timeout: 1000 });
|
|
}catch(e){}
|
|
let Elements = await page.$x(XPathString);
|
|
if (Elements.length > 0) {
|
|
if(log) console.log(Elements)
|
|
let Content = await page.evaluate(el => el.textContent, Elements[0]);
|
|
if(Content != "" && Content != null && Content != undefined) {
|
|
return Content;
|
|
}
|
|
}
|
|
}
|
|
console.error("Error: No content found for "+context)
|
|
return "";
|
|
}
|
|
|
|
async ElementExists(XPath = [], page, context){
|
|
|
|
for (let XPathString of XPath) {
|
|
try{
|
|
let Elements = await page.$x(XPathString);
|
|
if (Elements.length > 0) {
|
|
return true;
|
|
}
|
|
}catch(e){}
|
|
|
|
}
|
|
return false;
|
|
}
|
|
|
|
async getTextContentElement(XPath = [], page, Element, context){
|
|
|
|
for (let XPathString of XPath) {
|
|
|
|
let El = await Element.$x(XPathString);
|
|
if (El.length > 0) {
|
|
let Content = await page.evaluate(el => el.textContent, El[0]);
|
|
if(Content != "" && Content != null && Content != undefined) {
|
|
return Content;
|
|
}
|
|
}
|
|
}
|
|
console.error("Error: No content found for "+context)
|
|
return "";
|
|
}
|
|
|
|
async getAttributeElement(XPath = [], page, Element, attribute, context){
|
|
for (let XPathString of XPath) {
|
|
|
|
let El = await Element.$x(XPathString);
|
|
if (El.length > 0) {
|
|
|
|
const Attribute = await page.evaluate((el, attr) => el.getAttribute(attr), El[0], attribute);
|
|
if(Attribute != "" && Attribute != null && Attribute != undefined) {
|
|
return Attribute;
|
|
}
|
|
}
|
|
}
|
|
console.error("Error: No content found for the Attribute "+attribute+" for "+context)
|
|
return "";
|
|
}
|
|
}
|
|
|
|
module.exports = {Scraper, ScraperTools}; |