first commit

This commit is contained in:
Cyril Rouillon 2024-05-16 16:11:40 +02:00
commit fe9a8e2b20
32 changed files with 8427 additions and 0 deletions

5
.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
.vscode
agent/node_modules
scrapper/node_modules

View File

@ -0,0 +1,350 @@
// Interencheres.js
'use strict';
const { platform } = require('os');
const {Scraper} = require('../Scraper');
const DrouotData = require('./DrouotData');
class Drouot extends Scraper {
constructor(Url) {
super(Url);
this.platformData = new DrouotData();
this.platformData.getUrlInfo(Url).then((data) => {
if(data.lotID == 0 && data.saleID == 0){
throw new Error('Invalid URL');
}
});
this._Name = 'drouot'
this._PAGE_MAIN = "https://drouot.com/fr/"
this._PAGE_LOGIN = "https://auth.drouot.com/login"
this._USER = "jp.ranu@cogip.de"
this._PWD = "LYPYRKDUsSMH5BaWQxvH#"
this._PATH_SESSION_FILE = ".session/session_drouot.json"
}
getPictures = async ({ page, data}) => {
const PictList = await this.platformData.getPictures(page, this.Url);
console.log('PictList : '+PictList)
return PictList
}
getLotInfos = async ({ page, data}) => {
console.log("getLotInfos "+this._Name+": "+this.Url)
// Navigate the page to a URL
await page.goto(this.Url);
let idLot = await this.platformData.getLotID(this.Url);
console.log('idLot : '+idLot)
// ## LotNumber
let lotNumber = await this.platformData.getLotNumber(page);
console.log('lotNumber : '+lotNumber)
// ## Title
let title = await this.platformData.getLotTitle(page);
console.log('title : '+title)
// ## Estimate
let {EstimateLow, EstimateHigh} = await this.platformData.getEstimate(page);
console.log('EstimateLow : '+EstimateLow)
// ## Description
let Description = await this.platformData.getDescription(page);
//console.log('Description : '+Description)
// ## Fees
let {feesText, fees} = await this.platformData.getFees(page);
console.log('feesText : '+feesText)
console.log('fees : '+fees)
// ################
// ### SALE
let {id_sale, urlSale} = await this.platformData.getSaleID(page);
console.log('SellNumber : '+id_sale)
console.log('url : '+urlSale)
let LotInfos = {
idPlatform: idLot,
platform : this._Name,
url: this.Url,
title: title,
lotNumber: lotNumber,
EstimateLow: EstimateLow,
EstimateHigh: EstimateHigh,
Description: Description,
feesText: feesText,
fees: fees,
saleInfo: {
idSale: id_sale,
url: urlSale
}
}
console.log('LotInfos : '+LotInfos)
return LotInfos
}
getSaleInfos = async ({ page, data}) => {
console.log("getSaleInfos "+this._Name+": "+this.Url)
// Navigate the page to a URL
await page.goto(this.Url);
let {saleID, urlSale} = await this.platformData.getUrlInfo(this.Url);
console.log('saleID : '+saleID)
console.log('urlSale : '+urlSale)
// ## Title
let title = await this.platformData.getSaleTitle(page);
console.log('title : '+title)
// ## Date
let date = await this.platformData.getSaleDate(page);
console.log('date : '+date)
// ## Location
let location = await this.platformData.getSaleLocation(page);
console.log('location : '+location)
// ## SaleHouseName
let saleHouseName = await this.platformData.getSaleHouseName(page);
console.log('saleHouseName : '+saleHouseName)
// ## Status
// ready : ready to be followed
// following : sale is followed by the AuctionAgent
// askStop : sale is followed by the AuctionAgent and the user ask to stop following
// pause : the Sale is stopped by the Auction House and ready to restart
// end : the Sale is ended
let status = 'ready'
let SaleInfo = {
idPlatform: saleID,
platform : this._Name,
url: urlSale,
title: title,
date: date,
location: location,
saleHouseName: saleHouseName,
status: status
}
console.log('SaleInfo : ', JSON.stringify(SaleInfo, null, 2));
return SaleInfo
}
getLotList = async ({ page, data}) => {
console.log("getLotList "+this._Name+": "+this.Url)
// Navigate the page to a URL
await page.goto(this.Url);
const LotList = await this.platformData.getLotList(page);
console.log('LotList : '+LotList)
return LotList
}
async CheckAndConnect(page) {
return new Promise(async (resolve, reject) => {
await page.goto(this._PAGE_MAIN);
//get the Connexion button
const [Connexion] = await page.$x("//div[contains(@class, 'btn') and contains(@class, 'ghost') and contains(text(), 'Connexion')]");
console.log(Connexion)
// if Connection button found => Login
if (Connexion) {
console.log("-- Login --")
await page.goto(this._PAGE_LOGIN);
//get the Email field
//console.log("-- get Email Input --")
await page.type('#email', this._USER);
//console.log("-- get password Input --")
await page.type("#password", this._PWD);
//console.log("-- get ConnexionButton --")
const [ConnexionButton] = await page.$x("//button[contains(text(), 'Connexion')]");
await ConnexionButton.evaluate(b => b.click());
//console.log("-- Login wait --")
await page.waitForTimeout(1000);
//resolve(page)
const [ConnexionOK] = await page.$x("//button[contains(text(), 'Continuer en tant que')]");
if (ConnexionOK) {
console.log("-- Connection OK --")
await ConnexionOK.evaluate(b => b.click());
await page.waitForTimeout(1000);
await this._saveSession(page)
// const sessionData = await page.session.dumpString({
// storageProviders: [
// StorageProviderName.Cookie,
// StorageProviderName.LocalStorage,
// StorageProviderName.SessionStorage,
// StorageProviderName.IndexedDB,
// ],
// })
// fs.writeFileSync(this._PATH_SESSION_FILE, sessionData);
// console.log("-- Connection OK --")
resolve(page)
} else {
console.error("-- !!!! Connection ERROR !!!! --");
reject()
}
// Allready connected
} else {
console.log("-- Allready connected --")
resolve(page)
}
})
}
Live = async (browser) => {
console.log("Live "+this._Name+": "+this.Url)
const page = await browser.newPage();
page = await this.CheckAndConnect(page);
let CheckAskStop = null;
let Socket = null;
const StopLive = async (params) => {
clearInterval(CheckAskStop);
Socket.off('Network.webSocketFrameReceived', listener);
page.close()
}
const listener = async (params) => {
let payload = params.response.payloadData
if(payload.length>1 && payload.substring(0, 2) == '42'){
payload = JSON.parse(payload.substring(2))
//console.log(payload)
const type = payload[0];
const payloadData = payload[1];
switch (type) {
case 'startSale':
break;
case 'listAuctionedItems':
break;
case 'joinedSale':
await this.JucunduNextItem(
payloadData.sale_id,
payloadData.timestamp,
payloadData.item_id,
payloadData.order_number.primary,
payloadData.title,
payloadData.description,
payloadData.pricing.estimates.min,
payloadData.pricing.estimates.max,
payloadData
);
break;
case 'auctionedItem':
await this.JucunduAuctionedItem(
payloadData.item_id,
payloadData.timestamp,
payloadData.auctioned.amount,
payloadData.auctioned.sold,
payloadData.auctioned.type
);
break;
case 'nextItem':
await this.JucunduNextItem(
payloadData.sale_id,
payloadData.timestamp,
payloadData.item_id,
payloadData.order_number.primary,
payloadData.title,
payloadData.description,
payloadData.pricing.estimates.min,
payloadData.pricing.estimates.max,
payloadData
);
break;
case 'bid':
await this.JucundusBid(
payloadData.item_id,
payloadData.timestamp,
payloadData.amount,
payloadData.auctioned_type
);
break;
case 'pauseSale':
console.error('** Pause **');
console.log(payloadData);
// await this.JucundusEndSale()
// StopLive()
break;
case 'endSale':
// await this.JucundusEndSale()
// StopLive()
break;
default:
console.error('Unknown data type:', type);
console.log(payloadData);
}
}
};
try{
await page.goto(this.Url);
Socket = await page.target().createCDPSession();
await Socket.send('Network.enable');
await Socket.send('Page.enable');
Socket.on('Network.webSocketFrameReceived', listener);
console.log('Listener set up for Network.webSocketFrameReceived event');
// check if stop was asked
CheckAskStop = setInterval(async () => {
this.JucundusCheckStop()
.then(AskStop => {
if(AskStop){
StopLive()
}
})
}, 10000); // 10000 milliseconds = 10 seconds
}catch(e){
console.log('Error : '+e)
throw new Error('Error: '+e)
}
}
};
module.exports = Drouot

View File

@ -0,0 +1,185 @@
// Drouot.js
'use strict';
const {Scraper} = require('../Scraper');
class Drouot extends Scraper {
constructor(Url) {
super(Url);
this._Name = 'drouot'
this._PAGE_MAIN = "https://drouot.com/fr/"
this._PAGE_LOGIN = "https://auth.drouot.com/login"
this._USER = "jp.ranu@cogip.de"
this._PWD = "LYPYRKDUsSMH5BaWQxvH#"
this._PATH_SESSION_FILE = ".session/session_drouot.json"
}
getPictures = async ({ page, data}) => {
console.log("getPictures "+this._Name+": "+this.Url)
//add the picture to the list
let PictList = []
page.on('response', async response => {
const url = response.url();
if (response.request().resourceType() === 'image' && url.match("size=fullHD")) {
response.buffer().then(file => {
console.log("push "+url)
PictList.push(url)
});
}
});
function checkDup() {
const toFindDuplicates = array => array.filter((item, index) => array.indexOf(item) !== index)
const duplicateElements = toFindDuplicates(PictList);
// if dupplicate pictures added in the array
if (duplicateElements.length > 0) {
// remove diplucated content
PictList = PictList.filter(function (elem, pos) {
return PictList.indexOf(elem) == pos;
})
// stop the process
return false
// no dupplicated picture
} else {
// continue the process
return true
}
}
// Navigate the page to a URL
await page.goto(this.Url);
console.log("goto "+this.Url)
await page.waitForTimeout(500);
//get the Next link
const [ButtonNext] = await page.$x("//*[@id='next']");
if (ButtonNext) {
let condition = true
do {
console.log("click")
await ButtonNext.evaluate(b => b.click());
await page.waitForTimeout(500);
condition = checkDup();
} while (condition);
}
return PictList
}
async CheckAndConnect(page) {
return new Promise(async (resolve, reject) => {
await page.goto(this._PAGE_MAIN);
//get the Connexion button
const [Connexion] = await page.$x("//div[contains(@class, 'btn') and contains(@class, 'ghost') and contains(text(), 'Connexion')]");
console.log(Connexion)
// if Connection button found => Login
if (Connexion) {
console.log("-- Login --")
await page.goto(this._PAGE_LOGIN);
//get the Email field
//console.log("-- get Email Input --")
await page.type('#email', this._USER);
//console.log("-- get password Input --")
await page.type("#password", this._PWD);
//console.log("-- get ConnexionButton --")
const [ConnexionButton] = await page.$x("//button[contains(text(), 'Connexion')]");
await ConnexionButton.evaluate(b => b.click());
//console.log("-- Login wait --")
await page.waitForTimeout(1000);
//resolve(page)
const [ConnexionOK] = await page.$x("//button[contains(text(), 'Continuer en tant que')]");
if (ConnexionOK) {
console.log("-- Connection OK --")
await ConnexionOK.evaluate(b => b.click());
await page.waitForTimeout(1000);
await this._saveSession(page)
// const sessionData = await page.session.dumpString({
// storageProviders: [
// StorageProviderName.Cookie,
// StorageProviderName.LocalStorage,
// StorageProviderName.SessionStorage,
// StorageProviderName.IndexedDB,
// ],
// })
// fs.writeFileSync(this._PATH_SESSION_FILE, sessionData);
// console.log("-- Connection OK --")
resolve(page)
} else {
console.error("-- !!!! Connection ERROR !!!! --");
reject()
}
// Allready connected
} else {
console.log("-- Allready connected --")
resolve(page)
}
})
}
getSellNumberFromURL = function (Url) {
const match = Url.match(/\/(\d+)-/);
if (match) {
const extractedNumber = parseInt(match[1], 10); // Convert the matched string to an integer
if (!isNaN(extractedNumber)) {
return match[1];
} else {
console.log("Invalid number in the URL");
}
} else {
console.log("Number not found in the URL.");
}
}
async Live() {
this.Url = "https://drouot.com/fr/v/147085-fine-asian-european--islamic-works-of-art"
const page = await this._getPage(true);
await this.CheckAndConnect(page)
await page.goto(this.Url);
const [LiveOn] = await page.$x("//span[contains(text(), 'Live en cours')]");
if (LiveOn) {
const SellNumber = this.getSellNumberFromURL(this.Url)
const UrlLive = "https://drouot.com/live/bidlive/" + SellNumber
await page.goto(UrlLive);
}
// get the Live Link
//await browser.close();
}
};
module.exports = Drouot

View File

@ -0,0 +1,378 @@
// interencheresData.js
const {ScraperTools} = require('../Scraper');
const urlModule = require('url');
const moment = require('moment-timezone');
const { Console } = require('console');
const { title } = require('process');
class InterencheresData extends ScraperTools {
_Name = 'drouot'
getUrlInfo = async (url) => {
// URL Lot : https://drouot.com/fr/l/25184163-john-conde-17651794-britanniqu
// https://drouot.com/fr/{{v: Vente/ l: Lot}}/{{LotID}}-john-conde-{{????}}-britanniqu
// URL Sale : https://drouot.com/fr/v/152658-fine-paintings-and-frames
// https://drouot.com/fr/{{v: Vente/ l: Lot}}/{{SaleID}}-fine-paintings-and-frames
let parsedUrl = new urlModule.URL(url);
let pathParts = parsedUrl.pathname.split('/').filter(Boolean);
// if sale URL
let saleID = 0
let lotID = 0
let TypeUrl = ''
let urlSale = ''
let urlLot = ''
if(pathParts[1] == 'v'){
TypeUrl = 'Sale'
saleID = pathParts[2].split('-')[0];
urlSale = parsedUrl.origin + parsedUrl.pathname
}else if(pathParts[1] == 'l'){
TypeUrl = 'Lot'
lotID = pathParts[2].split('-')[0];
urlLot = parsedUrl.origin + parsedUrl.pathname
}
return {
'TypeUrl': TypeUrl,
'saleID': saleID,
'lotID': lotID,
'urlSale': urlSale,
'urlLot': urlLot
}
}
// ## Lot
getPictures = async (page, Url) => {
console.log("getPictures "+this._Name+": "+Url)
//add the picture to the list
let PictList = []
page.on('response', async response => {
const url = response.url();
if (response.request().resourceType() === 'image' && url.match("size=fullHD")) {
response.buffer().then(file => {
console.log("push "+url)
PictList.push(url)
});
}
});
function checkDup() {
const toFindDuplicates = array => array.filter((item, index) => array.indexOf(item) !== index)
const duplicateElements = toFindDuplicates(PictList);
// if dupplicate pictures added in the array
if (duplicateElements.length > 0) {
// remove diplucated content
PictList = PictList.filter(function (elem, pos) {
return PictList.indexOf(elem) == pos;
})
// stop the process
return false
// no dupplicated picture
} else {
// continue the process
return true
}
}
// Navigate the page to a URL
await page.goto(Url);
await page.waitForTimeout(500);
//get the Next link
const [ButtonNext] = await page.$x("//*[@id='next']");
if (ButtonNext) {
let condition = true
do {
console.log("click")
await ButtonNext.evaluate(b => b.click());
await page.waitForTimeout(500);
condition = checkDup();
} while (condition);
}
return PictList
}
getLotNumber = async (page) => {
const lotNumberXPath = [
'/html/body/div[1]/div[4]/div[2]/div/div/div/div[3]/div/div[1]/div[2]/div[1]/span/span',
]
let lotNumberString = await this.getTextContent(lotNumberXPath, page, 'lotNumberXPath')
let lotNumber = '';
if(lotNumberString != ''){
lotNumber = lotNumberString.replace('Lot ', '');
}
return lotNumber
}
getLotTitle = async (page) => {
const lotTitleXPath = [
'/html/body/div[1]/div[4]/div[2]/div/div/div/div[3]/div/div[1]/div[2]/div[5]/h1',
]
let lotTitleString = await this.getTextContent(lotTitleXPath, page, 'lotTitleXPath')
if (lotTitleString.length > 90) {
lotTitleString = lotTitleString.substring(0, 90) + '...';
}
return lotTitleString
}
getEstimate = async (page) => {
const EstimateXPath = [
'/html/body/div[1]/div[4]/div[2]/div/div/div/div[3]/div/div[1]/div[2]/div[7]/div/div[1]/span/span',
]
let EstimateString = await this.getTextContent(EstimateXPath, page, 'EstimateXPath')
//console.log('EstimateString : '+EstimateString)
let EstimateLow = 0
let EstimateHigh = 0
if(EstimateString != ''){
let matches = EstimateString.match(/(\d{1,3}(?:\s\d{3})*)/g);
if (matches) {
if (matches.length >= 2) {
EstimateLow = parseInt(matches[0].replace(/\s/g, ''), 10);
EstimateHigh = parseInt(matches[1].replace(/\s/g, ''), 10);
console.log('Low:', EstimateLow);
console.log('High:', EstimateHigh);
} else
if(matches.length == 1){
EstimateLow = parseInt(matches[0].replace(/\s/g, ''), 10);
EstimateHigh = 0;
} else {
console.error('Could not extract numbers.');
}
}
}
return {EstimateLow, EstimateHigh}
}
getDescription = async (page) => {
const DescriptionXPath = [
'//h3[contains(@class, "descriptionLineWrap")]',
]
let Description = await this.getTextContent(DescriptionXPath, page, 'DescriptionXPath')
return Description
}
getFees = async (page) => {
let feesText = ''
let fees = 0
const FeesXPath = [
'/html/body/div[1]/div[4]/div[2]/div/div/div/div[3]/div/div[1]/div[2]/div[7]/div/div[3]/a/span',
]
feesText = await this.getTextContent(FeesXPath, page, 'FeesXPath')
// detect digit
if (!/\d/.test(feesText)) {
const FeesXPath = [
'/html/body/div[1]/div[4]/div[2]/div/div/div/div[3]/div/div[1]/div[2]/div[7]/div/div[2]/a/span',
]
feesText = await this.getTextContent(FeesXPath, page, 'FeesXPath')
}
feesText = feesText.replace(/[\n]/g, '').replace(/\s+/g, ' ').trim();
let matches = feesText.match(/(\d+(\.\d+)?)/)
if (matches) {
fees = matches[0];
}
return {feesText, fees}
}
getLotID = async (url) => {
let UrlInfo = await this.getUrlInfo(url);
let id_lot = UrlInfo.lotID
return id_lot
}
getSaleID = async (page) => {
const UrlCatalogueXPath = [
'/html/body/div[1]/div[4]/div[2]/div/div/div/div[3]/div/div[1]/div[2]/div[10]/div/div[2]/div[3]/a[1]',
]
let UrlCatalogue = await this.getAttribute(UrlCatalogueXPath, page, "href", "UrlCatalogueXPath")
console.log('UrlCatalogue : '+UrlCatalogue)
UrlCatalogue = UrlCatalogue.substring(0,1) == '/' ? 'https://drouot.com'+UrlCatalogue : UrlCatalogue
let UrlInfo = await this.getUrlInfo(UrlCatalogue);
let id_sale = UrlInfo.saleID
let urlSale = UrlInfo.urlSale
return {id_sale, urlSale}
}
// ## Lot List
_getLotInfoList = async (page, Elements) => {
let LotList = [];
for (let element of Elements) {
let Lot = {}
try{
let LotnameXPath = [
'.//a/div/div/div[5]/div/div[1]',
'.//a/div/div/div[4]/div/div[1]',
]
let Lotname = await this.getTextContentElement(LotnameXPath, page, element, 'LotnameXPath')
// idPlatform from the url
let LotUrlXPath = [
'.//a'
]
let urlLot = await this.getAttributeElement(LotUrlXPath, page, element, 'href', 'UrlListLot')
let match = urlLot.match(/lot-(.*).html/);
let idPlatform = match[1];
Lot = {
title: Lotname,
idPlatform: idPlatform,
platform: this._Name,
lotNumber: Lotname.split('Lot ')[1]
}
}catch(e){
console.error(e)
}
//console.log(LotList)
LotList.push(Lot);
};
return LotList;
}
getLotList = async (page) => {
let LotList = []
let NextBtn = false
do {
// extract Lot List
const LotListXPath = [
'//div[contains(@class, "sale-item-wrapper")]',
]
let Elements = await page.$x(LotListXPath[0]);
if (Elements.length > 0) {
LotList = [].concat(LotList, await this._getLotInfoList(page, Elements))
}
// search for the Button Next (only if enabled)
let NextPageButtonXPath = "//button[contains(@aria-label, 'Page suivante') and not(contains(@class, 'v-pagination__navigation--disabled'))]"
let NextPageButton = await page.$x(NextPageButtonXPath);
if (NextPageButton.length > 0) {
NextBtn = true
await NextPageButton[0].evaluate(b => b.click());
await page.waitForTimeout(1000);
console.log('Next Page')
}else{
NextBtn = false
}
} while (NextBtn);
return LotList
}
// ## Sale
getSaleTitle = async (page) => {
const SaleTitleXPath = [
'/html/body/div/div[4]/div[2]/div/div/div/div/div[3]/div/div[2]/div[1]/div/h1',
]
let SaleTitle = await this.getTextContent(SaleTitleXPath, page, 'SaleTitleXPath')
return SaleTitle
}
getSaleDate = async (page) => {
// Test if Live Sale
let BoolLive = false;
try {
// const VideoXPath = [
// '//*[@id="streaming-subscriber"]',
// ]
// let VideoExists = await this.ElementExists(VideoXPath, page, 'VideoXPath')
// console.log('VideoExists : '+VideoExists)
// BoolLive = VideoExists
} catch (error) {}
let SaleDate;
// if futur sale
if(!BoolLive){
await page.waitForTimeout(400);
const SaleDateXPath = [
'/html/body/div/div[4]/div[2]/div/div/div/div/div[3]/div/div[2]/div[1]/div/div[1]/div',
]
let SaleDateString = await this.getTextContent(SaleDateXPath, page, 'SaleDateXPath')
SaleDateString = SaleDateString.trim()
let cleanStr = SaleDateString.replace(/\\s|\\n/g, ' ').replace(/\s+/g, ' ');
SaleDate = moment.tz(cleanStr, 'dddd D MMMM à HH:mm (z)', 'fr', 'Europe/Paris').format();
// Live Sale
}else{
SaleDate = moment.tz('Europe/Paris').format();
}
console.log('SaleDate : '+SaleDate)
return SaleDate
}
getSaleLocation = async (page) => {
const SaleLocationXPath = [
'/html/body/div/div[4]/div[2]/div/div/div/div/div[3]/div/div[2]/div[1]/div/div[4]',
]
let SaleLocation = await this.getTextContent(SaleLocationXPath, page, 'SaleLocationXPath')
return SaleLocation.trim()
}
getSaleHouseName = async (page) => {
const SaleHouseNameXPath = [
'/html/body/div/div[4]/div[2]/div/div/div/div/div[3]/div/div[2]/div[1]/div/h4/a[1]/span',
]
let SaleHouseName = await this.getTextContent(SaleHouseNameXPath, page, 'SaleHouseNameXPath')
SaleHouseName = SaleHouseName.replace(/[\n]/g, '').replace(/\s+/g, ' ').trim();
return SaleHouseName.trim()
}
}
module.exports = InterencheresData

View File

@ -0,0 +1,273 @@
// Interencheres.js
'use strict';
const {Scraper} = require('../Scraper');
const InterencheresData = require('./InterencheresData');
class Interencheres extends Scraper {
constructor(Url) {
super(Url);
this.platformData = new InterencheresData();
this.platformData.getUrlInfo(Url).then((data) => {
if(data.lotID == 0 && data.saleID == 0){
throw new Error('Invalid URL');
}
});
this._Name = 'interencheres'
this._PAGE_MAIN = ""
this._PAGE_LOGIN = ""
this._USER = ""
this._PWD = ""
this._PATH_SESSION_FILE = ".session/session_inter.json"
}
getPictures = async ({ page, data}) => {
const PictList = await this.platformData.getPictures(page, this.Url);
console.log('PictList : '+PictList)
return PictList
}
getLotInfos = async ({ page, data}) => {
console.log("getLotInfos "+this._Name+": "+this.Url)
// Navigate the page to a URL
await page.goto(this.Url);
let idLot = await this.platformData.getLotID(this.Url);
console.log('idLot : '+idLot)
// ## LotNumber
let lotNumber = await this.platformData.getLotNumber(page);
console.log('lotNumber : '+lotNumber)
// ## Estimate
let {EstimateLow, EstimateHigh} = await this.platformData.getEstimate(page);
console.log('EstimateLow : '+EstimateLow)
// ## Description
let Description = await this.platformData.getDescription(page);
//console.log('Description : '+Description)
// ## Fees
let {feesText, fees} = await this.platformData.getFees(page);
console.log('feesText : '+feesText)
console.log('fees : '+fees)
// ################
// ### SALE
let {id_sale, urlSale} = await this.platformData.getSaleIdUrl(this.Url);
console.log('SellNumber : '+id_sale)
console.log('url : '+urlSale)
let LotInfos = {
idPlatform: idLot,
platform : this._Name,
url: this.Url,
title: 'Lot '+lotNumber,
lotNumber: lotNumber,
EstimateLow: EstimateLow,
EstimateHigh: EstimateHigh,
Description: Description,
feesText: feesText,
fees: fees,
saleInfo: {
idSale: id_sale,
url: urlSale
}
}
console.log('LotInfos : '+LotInfos)
return LotInfos
}
getSaleInfos = async ({ page, data}) => {
console.log("getSaleInfos "+this._Name+": "+this.Url)
// Navigate the page to a URL
await page.goto(this.Url);
let {id_sale, urlSale} = await this.platformData.getSaleIdUrl(this.Url);
console.log('url : '+urlSale)
// ## Title
let title = await this.platformData.getSaleTitle(page);
// ## Date
let date = await this.platformData.getSaleDate(page);
// ## Location
let location = await this.platformData.getSaleLocation(page);
// ## SaleHouseName
let saleHouseName = await this.platformData.getSaleHouseName(page);
// ## Status
// ready : ready to be followed
// following : sale is followed by the AuctionAgent
// askStop : sale is followed by the AuctionAgent and the user ask to stop following
// pause : the Sale is stopped by the Auction House and ready to restart
// end : the Sale is ended
let status = 'ready'
let SaleInfo = {
idPlatform: id_sale,
platform : this._Name,
url: urlSale,
title: title,
date: date,
location: location,
saleHouseName: saleHouseName,
status: status
}
//console.log('SaleInfo : ', JSON.stringify(SaleInfo, null, 2));
return SaleInfo
}
getLotList = async ({ page, data}) => {
console.log("getLotList "+this._Name+": "+this.Url)
// Navigate the page to a URL
await page.goto(this.Url);
const LotList = await this.platformData.getLotList(page);
console.log('LotList : '+LotList)
return LotList
}
Live = async (browser) => {
console.log("Live "+this._Name+": "+this.Url)
const page = await browser.newPage();
let CheckAskStop = null;
let Socket = null;
const StopLive = async (params) => {
clearInterval(CheckAskStop);
Socket.off('Network.webSocketFrameReceived', listener);
page.close()
}
const listener = async (params) => {
let payload = params.response.payloadData
if(payload.length>1 && payload.substring(0, 2) == '42'){
payload = JSON.parse(payload.substring(2))
//console.log(payload)
const type = payload[0];
const payloadData = payload[1];
switch (type) {
case 'startSale':
break;
case 'listAuctionedItems':
break;
case 'joinedSale':
await this.JucunduNextItem(
payloadData.sale_id,
payloadData.timestamp,
payloadData.item_id,
payloadData.order_number.primary,
payloadData.title,
payloadData.description,
payloadData.pricing.estimates.min,
payloadData.pricing.estimates.max,
payloadData
);
break;
case 'auctionedItem':
await this.JucunduAuctionedItem(
payloadData.item_id,
payloadData.timestamp,
payloadData.auctioned.amount,
payloadData.auctioned.sold,
payloadData.auctioned.type
);
break;
case 'nextItem':
await this.JucunduNextItem(
payloadData.sale_id,
payloadData.timestamp,
payloadData.item_id,
payloadData.order_number.primary,
payloadData.title,
payloadData.description,
payloadData.pricing.estimates.min,
payloadData.pricing.estimates.max,
payloadData
);
break;
case 'bid':
await this.JucundusBid(
payloadData.item_id,
payloadData.timestamp,
payloadData.amount,
payloadData.auctioned_type
);
break;
case 'pauseSale':
console.error('** Pause **');
console.log(payloadData);
// await this.JucundusEndSale()
// StopLive()
break;
case 'endSale':
// await this.JucundusEndSale()
// StopLive()
break;
default:
console.error('Unknown data type:', type);
console.log(payloadData);
}
}
};
try{
await page.goto(this.Url);
Socket = await page.target().createCDPSession();
await Socket.send('Network.enable');
await Socket.send('Page.enable');
Socket.on('Network.webSocketFrameReceived', listener);
console.log('Listener set up for Network.webSocketFrameReceived event');
// check if stop was asked
CheckAskStop = setInterval(async () => {
this.JucundusCheckStop()
.then(AskStop => {
if(AskStop){
StopLive()
}
})
}, 10000); // 10000 milliseconds = 10 seconds
}catch(e){
console.log('Error : '+e)
throw new Error('Error: '+e)
}
}
};
module.exports = Interencheres

View File

@ -0,0 +1,394 @@
// interencheresData.js
const {ScraperTools} = require('../Scraper');
const urlModule = require('url');
const moment = require('moment-timezone');
const { Console } = require('console');
const { title } = require('process');
class InterencheresData extends ScraperTools {
_Name = 'interencheres'
getUrlInfo = async (url) => {
let parsedUrl = new urlModule.URL(url);
let pathParts = parsedUrl.pathname.split('/').filter(Boolean);
let typeSale = pathParts[0];
let saleID = pathParts[1].split('-')[pathParts[1].split('-').length-1];
// if lot URL
let lotID = 0;
if(pathParts.length > 2){
lotID = pathParts[2].split('-')[1].split('.')[0];
}
return {
'typeSale': typeSale,
'saleID': saleID,
'lotID': lotID
}
}
// ## Lot
getPictures = async (page, Url) => {
let PictList = []
function checkDup() {
const toFindDuplicates = array => array.filter((item, index) => array.indexOf(item) !== index)
const duplicateElements = toFindDuplicates(PictList);
// if dupplicate pictures added in the array
if (duplicateElements.length > 0) {
// remove diplucated content
PictList = PictList.filter(function (elem, pos) {
return PictList.indexOf(elem) == pos;
})
// stop the process
return false
// no dupplicated picture
} else {
// continue the process
return true
}
}
page.on('response', async response => {
const url = response.url();
if (url.match("thumbor-indbupload.interencheres.com")) {
response.buffer().then(file => {
console.log("push "+url)
PictList.push(url)
});
}
});
console.log('go to : '+Url)
await page.goto(Url);
const picturesNumberXPath = [
"//div[contains(@class, 'pswp__counter')]"
]
let picturesNumberString = await this.getTextContent(picturesNumberXPath, page, 'picturesNumberXPath')
let picturesNumber = 100;
if(picturesNumberString != ''){
picturesNumber = parseInt(picturesNumberString.split(" / ")[1])
console.log('picturesNumber : '+picturesNumber)
}
let condition = true
let idx = 0
do {
const ButtonNextXPath = [
"//button[contains(@class, 'pswp__button--arrow--right')]"
]
await this.clickLink(ButtonNextXPath, page, 'ButtonNextXPath')
await page.waitForTimeout(300);
idx++
// if number of pictures found or 20 pictures checked
if(idx+1 == picturesNumber || idx==20)condition = false
} while (condition);
checkDup()
return PictList
}
getLotNumber = async (page) => {
const lotNumberXPath = [
'/html/body/div[1]/div/div/div/main/div/div/div[2]/div/div[1]/div[2]/div[2]/div[1]',
'/html/body/div[1]/div/div/div[1]/main/div/div/div[2]/div/div[1]/div[2]/div[1]/div[1]'
]
let lotNumberString = await this.getTextContent(lotNumberXPath, page, 'lotNumberXPath')
let lotNumber = '';
if(lotNumberString != ''){
lotNumber = lotNumberString.replace('Lot ', '');
}
return lotNumber
}
getEstimate = async (page) => {
const EstimateXPath = [
'/html/body/div[1]/div/div/div/main/div/div/div[2]/div/div[1]/div[2]/div[3]/div[2]/span',
'/html/body/div[1]/div/div/div[1]/main/div/div/div[2]/div/div[1]/div[2]/div[2]/div[2]/span'
]
let EstimateString = await this.getTextContent(EstimateXPath, page, 'EstimateXPath')
console.log('EstimateString : '+EstimateString)
let EstimateLow = 0
let EstimateHigh = 0
if(EstimateString != ''){
let matches = EstimateString.match(/(\d{1,3}(?:\s\d{3})*)/g);
if (matches) {
if (matches.length >= 2) {
EstimateLow = parseInt(matches[0].replace(/\s/g, ''), 10);
EstimateHigh = parseInt(matches[1].replace(/\s/g, ''), 10);
console.log('Low:', EstimateLow);
console.log('High:', EstimateHigh);
} else
if(matches.length == 1){
EstimateLow = parseInt(matches[0].replace(/\s/g, ''), 10);
EstimateHigh = 0;
} else {
console.log('Could not extract numbers.');
}
}
}
return {EstimateLow, EstimateHigh}
}
getDescription = async (page) => {
const DescriptionXPath = [
'//div[contains(@class, "description")]',
'/html/body/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/div[4]/div',
'/html/body/div[1]/div/div/div[1]/main/div/div/div[2]/div/div[1]/div[2]/div[4]/div',
'/html/body/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/div[3]/div'
]
let Description = await this.getTextContent(DescriptionXPath, page, 'DescriptionXPath')
return Description
}
getFees = async (page) => {
let feesText = ''
let fees = 0
const ButtonFeesXPath = [
'.//a[contains(text(),"Frais de vente")]',
'/html/body/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/div[3]/div[2]/a',
'/html/body/div[1]/div/div/div[1]/main/div/div/div[2]/div/div[1]/div[2]/div[3]/div[2]/a',
'/html/body/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/div[2]/div[2]/a'
]
if(await this.clickLink(ButtonFeesXPath, page, 'ButtonFeesXPath')){
const FeesXPath = [
"//strong[contains(text(), 'Frais de vente :')]/following-sibling::span",
'/html/body/div[1]/div/div/div[3]/div/div/div[2]/div/p[1]/p[1]/span',
]
feesText = await this.getTextContent(FeesXPath, page, 'FeesXPath')
if(feesText != ''){
feesText = feesText.replace(/[\n]/g, '').replace(/\s+/g, ' ').trim();
let matches = feesText.match(/(\d+(\.\d+)?)/)
if (matches) {
fees = matches[0];
}
}
}
return {feesText, fees}
}
getLotID = async (url) => {
let UrlInfo = await this.getUrlInfo(url);
let id_lot = UrlInfo.lotID
console.log('id_lot : '+id_lot)
return id_lot
}
getSaleIdUrl = async (url) => {
let UrlInfo = await this.getUrlInfo(url);
let id_sale = UrlInfo.saleID
let urlSale = url
// remove lot information if present
if (urlSale.includes('/lot-')) {
urlSale = url.split("/lot-")[0]
}
// remove parameters
if (urlSale.includes('?')) {
urlSale = urlSale.split("?")[0];
}
console.log('getSaleIdUrl urlSale : '+urlSale)
return {id_sale, urlSale}
}
// ## Lot List
_getLotInfoList = async (page, Elements) => {
let LotList = [];
for (let element of Elements) {
let Lot = {}
try{
let LotnameXPath = [
'.//a/div/div/div[5]/div/div[1]',
'.//a/div/div/div[4]/div/div[1]',
]
let Lotname = await this.getTextContentElement(LotnameXPath, page, element, 'LotnameXPath')
// idPlatform from the url
let LotUrlXPath = [
'.//a'
]
let urlLot = await this.getAttributeElement(LotUrlXPath, page, element, 'href', 'UrlListLot')
let match = urlLot.match(/lot-(.*).html/);
let idPlatform = match[1];
Lot = {
title: Lotname,
idPlatform: idPlatform,
platform: this._Name,
lotNumber: Lotname.split('Lot ')[1]
}
}catch(e){
console.error(e)
}
//console.log(LotList)
LotList.push(Lot);
};
return LotList;
}
getLotList = async (page) => {
let LotList = []
let NextBtn = false
do {
// extract Lot List
const LotListXPath = [
'//div[contains(@class, "sale-item-wrapper")]',
]
let Elements = await page.$x(LotListXPath[0]);
if (Elements.length > 0) {
LotList = [].concat(LotList, await this._getLotInfoList(page, Elements))
}
// search for the Button Next (only if enabled)
let NextPageButtonXPath = "//button[contains(@aria-label, 'Page suivante') and not(contains(@class, 'v-pagination__navigation--disabled'))]"
let NextPageButton = await page.$x(NextPageButtonXPath);
if (NextPageButton.length > 0) {
NextBtn = true
await NextPageButton[0].evaluate(b => b.click());
await page.waitForTimeout(1000);
console.log('Next Page')
}else{
NextBtn = false
}
} while (NextBtn);
return LotList
}
// ## Sale
getSaleTitle = async (page) => {
const SaleTitleXPath = [
'/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[1]/div/div[1]/div[2]/h1/div/div/div/div/div',
'/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[1]/div/div/div/h1/div/div/div/div/div',
'/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[1]/div/div/div/div/h1/div/div/div/div/div'
]
let SaleTitle = await this.getTextContent(SaleTitleXPath, page, 'SaleTitleXPath')
return SaleTitle
}
getSaleDate = async (page) => {
// Test if Live Sale
let BoolLive = false;
try {
const VideoXPath = [
'//*[@id="streaming-subscriber"]',
]
let VideoExists = await this.ElementExists(VideoXPath, page, 'VideoXPath')
console.log('VideoExists : '+VideoExists)
BoolLive = VideoExists
} catch (error) {}
let SaleDate;
// if futur sale
if(!BoolLive){
await page.waitForTimeout(400);
const SaleDateXPath = [
'/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[1]/div/div[1]/div[2]/h2/div[2]/div/div[1]/span',
]
let SaleDateString = await this.getTextContent(SaleDateXPath, page, 'SaleDateXPath')
SaleDateString = SaleDateString.trim()
console.log('SaleDateString : '+SaleDateString)
const months = {
'janvier': '01',
'février': '02',
'mars': '03',
'avril': '04',
'mai': '05',
'juin': '06',
'juillet': '07',
'août': '08',
'septembre': '09',
'octobre': '10',
'novembre': '11',
'décembre': '12'
};
let SaleDateArr = SaleDateString.split(' ');
let day = parseInt(SaleDateArr[0].length === 1 ? '0'+SaleDateArr[0] : SaleDateArr[0]);
let month = parseInt(months[SaleDateArr[1]]);
let year = parseInt(SaleDateArr[2]);
let hour = parseInt(SaleDateArr[4].split('h')[0]);
let minute = parseInt(SaleDateArr[4].split('h')[1]);
SaleDate = moment.tz([year, month - 1, day, hour, minute], 'Europe/Paris').format();
// Live Sale
}else{
SaleDate = moment.tz('Europe/Paris').format();
}
console.log('SaleDate : '+SaleDate)
return SaleDate
}
getSaleLocation = async (page) => {
const SaleLocationXPath = [
'/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[1]/div/div[1]/div[2]/h2/div[2]/div/div[2]/span',
'/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[1]/div/div/div/h2/div[2]'
]
let SaleLocation = await this.getTextContent(SaleLocationXPath, page, 'SaleLocationXPath')
return SaleLocation.trim()
}
getSaleHouseName = async (page) => {
const SaleHouseNameXPath = [
'/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[2]/div[2]/div/div/div/div/div[2]/div/div/div[2]/div[2]/div[2]/a',
'/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[2]/div[3]/div/div/div/div/div[2]/div/div/div[2]/div[2]/div[2]/a',
'/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[2]/div[2]/div/div/div/div/div[2]/div/div/div/div[2]/div[2]/a',
'/html/body/div[1]/div/div/div[1]/main/div/div/div/div/div/div[2]/div[3]/div/div/div/div/div[2]/div/div/div/div[2]/div[2]/a'
]
let SaleHouseName = await this.getTextContent(SaleHouseNameXPath, page, 'SaleHouseNameXPath')
SaleHouseName = SaleHouseName.replace(/[\n]/g, '').replace(/\s+/g, ' ').trim();
return SaleHouseName.trim()
}
}
module.exports = InterencheresData

View File

@ -0,0 +1,331 @@
// Scraper.js
'use strict';
const fs = require('node:fs');
const fetch = require('node-fetch');
class Scraper {
_Name = ""
_Browser = null
_PAGE_MAIN = ""
_PAGE_LOGIN = ""
_USER = ""
_PWD = ""
_PATH_SESSION_FILE = ""
_Proxy = ""
_DebugMode = false
_JucundusUrl = "http://host.docker.internal:3000"
constructor(Url) {
this.Url = Url;
}
async _saveSession(page) {
return new Promise(async (resolve, reject) => {
console.log('-- Save Session --')
const cookies = await page.cookies();
//console.log(cookies)
//await fs.writeFile(this._PATH_SESSION_FILE, JSON.stringify(cookies));
fs.writeFileSync(this._PATH_SESSION_FILE, JSON.stringify(cookies));
resolve(page)
})
}
async _restoreSession(page) {
return new Promise(async (resolve, reject) => {
if (fs.existsSync(this._PATH_SESSION_FILE)) {
console.log('-- restore Session --')
const cookies = JSON.parse(fs.readFileSync(this._PATH_SESSION_FILE));
await page.setCookie(...cookies);
}
resolve(page)
})
}
getPictures({ page, data}) {
}
getLotInfos({ page, data}) {}
getSaleInfos({ page, data}) {}
getLotList({ page, data}) {}
async Live({ page, data}) {}
async JucundusCheckStop(){
//console.log('Check if Stop is asked')
// check if stop was asked
let url = encodeURIComponent(this.Url)
return new Promise((resolve, reject) => {
fetch(this._JucundusUrl+'/api/sale/getByUrl/'+url)
.then(response => response.json())
.then(saleInfo => {
let status = saleInfo.status
//console.log('status : '+status)
if(status == 'askStop'){
console.log('Stop was asked')
// return to ready status
this.JucundusSetSaleStatus(saleInfo, 'ready')
.then(
resolve(true)
);
} else {
resolve(false);
}
})
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
async JucundusEndSale(){
console.log('JucundusEndSale')
// check if stop was asked
let url = encodeURIComponent(this.Url)
return new Promise((resolve, reject) => {
fetch(this._JucundusUrl+'/api/sale/getByUrl/'+url)
.then(response => response.json())
.then(saleInfo => {
// set end status
this.JucundusSetSaleStatus(saleInfo, 'end')
.then(
resolve(true)
);
})
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
async JucundusSetSaleStatus(saleInfo, status){
// change the status of the sale
saleInfo.status = status
return new Promise((resolve, reject) => {
fetch(this._JucundusUrl+'/api/sale/sale/'+saleInfo._id, {
method: 'PUT',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(saleInfo)})
.then(resolve(true))
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
async JucunduNextItem(sale_id, timestamp, item_id, num_lot, title, description, EstimateLow, EstimateHigh, RawData){
console.log('JucunduNextItem', sale_id, timestamp, item_id, num_lot)
return new Promise((resolve, reject) => {
fetch(this._JucundusUrl+'/api/lot/NextItem', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(
{
idPlatform: item_id,
idSalePlatform: sale_id,
platform: this._Name,
timestamp: timestamp,
lotNumber: num_lot,
title: title,
description: description,
EstimateLow: EstimateLow,
EstimateHigh: EstimateHigh,
RawData: RawData
}
)})
.then(resolve(true))
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
async JucundusBid(item_id, timestamp, amount, auctioned_type){
console.log('JucundusBid', timestamp, item_id, amount, auctioned_type)
return new Promise((resolve, reject) => {
fetch(this._JucundusUrl+'/api/lot/Bid', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(
{
idPlatform: item_id,
platform: this._Name,
timestamp: timestamp,
amount: amount,
auctioned_type: auctioned_type
}
)})
.then(resolve(true))
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
async JucunduAuctionedItem(item_id, timestamp, amount, sold, auctioned_type){
console.log('JucunduAuctionedItem', timestamp, item_id, amount, sold)
return new Promise((resolve, reject) => {
fetch(this._JucundusUrl+'/api/lot/AuctionedItem', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(
{
idPlatform: item_id,
platform: this._Name,
timestamp: timestamp,
amount: amount,
auctioned_type: auctioned_type,
sold: sold,
}
)})
.then(resolve(true))
.catch(error => {
console.error(error);
reject(new Error('Error: '+error))
});
})
}
};
class ScraperTools {
_CONST_INTERENCHERES = 'interencheres'
_CONST_DROUOT = 'drouot'
detectPlatform(URL){
let Url = 'http'+String(URL).split("http")[1]
if(Url.includes('interencheres')){
return this._CONST_INTERENCHERES
}
if(Url.includes('drouot')){
return this._CONST_DROUOT
}
}
async clickLink(XPath = [], page, context){
for (let XPathString of XPath) {
try{
await page.waitForXPath(XPathString, { timeout: 1000 });
}catch(e){}
let Elements = await page.$x(XPathString);
if (Elements.length > 0) {
await Elements[0].evaluate(b => b.click());
return true;
}
}
console.error("Error: No Link found for "+context)
return false;
}
async getAttribute(XPath = [], page, attribute, context){
for (let XPathString of XPath) {
try{
await page.waitForXPath(XPathString, { timeout: 1000 });
}catch(e){}
let Elements = await page.$x(XPathString);
if (Elements.length > 0) {
const Attribute = await page.evaluate((element,attribute) => element.getAttribute(attribute), Elements[0],attribute);
if(Attribute != "" && Attribute != null && Attribute != undefined) {
return Attribute;
}
}
}
console.error("Error: No content found for the Attribute "+attribute+" for "+context)
return "";
}
async getTextContent(XPath = [], page, context, log = false){
for (let XPathString of XPath) {
try{
await page.waitForXPath(XPathString, { timeout: 1000 });
}catch(e){}
let Elements = await page.$x(XPathString);
if (Elements.length > 0) {
if(log) console.log(Elements)
let Content = await page.evaluate(el => el.textContent, Elements[0]);
if(Content != "" && Content != null && Content != undefined) {
return Content;
}
}
}
console.error("Error: No content found for "+context)
return "";
}
async ElementExists(XPath = [], page, context){
for (let XPathString of XPath) {
try{
let Elements = await page.$x(XPathString);
if (Elements.length > 0) {
return true;
}
}catch(e){}
}
return false;
}
async getTextContentElement(XPath = [], page, Element, context){
for (let XPathString of XPath) {
let El = await Element.$x(XPathString);
if (El.length > 0) {
let Content = await page.evaluate(el => el.textContent, El[0]);
if(Content != "" && Content != null && Content != undefined) {
return Content;
}
}
}
console.error("Error: No content found for "+context)
return "";
}
async getAttributeElement(XPath = [], page, Element, attribute, context){
for (let XPathString of XPath) {
let El = await Element.$x(XPathString);
if (El.length > 0) {
const Attribute = await page.evaluate((el, attr) => el.getAttribute(attr), El[0], attribute);
if(Attribute != "" && Attribute != null && Attribute != undefined) {
return Attribute;
}
}
}
console.error("Error: No content found for the Attribute "+attribute+" for "+context)
return "";
}
}
module.exports = {Scraper, ScraperTools};

21
README.md Normal file
View File

@ -0,0 +1,21 @@
Dev :
```bash
docker-compose -f docker-compose-dev.yml build
docker-compose -f docker-compose-dev.yml up --scale agent=3
```
# API extern (scrapper)
## Lot
http://localhost:3020/api/lot/getPictures/https%3A%2F%2Fwww.interencheres.com%2Fvehicules%2Fvehicules-624955%2Flot-75622389.html
http://localhost:3020/api/lot/getInfos/https%3A%2F%2Fwww.interencheres.com%2Fvehicules%2Fintertrading-vehicules-626382%2Flot-75938824.html%3Funiverse%3Dvehicles
## sale
http://localhost:3020/api/sale/getInfos/https%3A%2F%2Fwww.interencheres.com%2Fmateriels-professionnels%2Fvente-de-mobilier-et-materiel-de-bureau-627660
http://localhost:3020/api/sale/getLotList/https%3A%2F%2Fwww.interencheres.com%2Fmateriels-professionnels%2Fvente-de-mobilier-et-materiel-de-bureau-627660
http://localhost:3020/api/sale/followSale/https%3A%2F%2Fwww.interencheres.com%2Fmateriels-professionnels%2Fvente-de-mobilier-et-materiel-de-bureau-627660
# API intern (agent)
http://localhost:3000/internApi/follow/https%3A%2F%2Fwww.interencheres.com%2Fmateriels-professionnels%2Fvente-de-mobilier-et-materiel-de-bureau-627660

41
agent/Dockerfile.dev Normal file
View File

@ -0,0 +1,41 @@
FROM node:slim
# We don't need the standalone Chromium
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true
# Install Google Chrome Stable and fonts
# Note: this installs the necessary libs to make the browser work with Puppeteer.
RUN apt-get update && apt-get install gnupg wget -y && \
wget --quiet --output-document=- https://dl-ssl.google.com/linux/linux_signing_key.pub | gpg --dearmor > /etc/apt/trusted.gpg.d/google-archive.gpg && \
sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' && \
apt-get update && \
apt-get install google-chrome-stable -y --no-install-recommends && \
rm -rf /var/lib/apt/lists/*
# Setting up the work directory
WORKDIR /agent
#Copying all the files in our project
COPY . .
# Installing dependencies
RUN npm install
# Add user so we don't need --no-sandbox.
# same layer as npm install to keep re-chowned files from using up several hundred MBs more space
# RUN groupadd -r pptruser && useradd -r -g pptruser -G audio,video pptruser \
# && mkdir -p /home/pptruser/Downloads \
# && chown -R pptruser:pptruser /home/pptruser \
# && chown -R pptruser:pptruser ./node_modules \
# && chown -R pptruser:pptruser ./package.json \
# && chown -R pptruser:pptruser ./package-lock.json
# # Run everything after as non-privileged user.
# USER pptruser
# Starting our application
#CMD [ "npm", "run", "debug-cluster" ]
CMD [ "npm", "run", "start" ]
# Exposing server port
EXPOSE 3020

View File

@ -0,0 +1,65 @@
const asyncHandler = require("express-async-handler");
const {ScraperTools} = require('../AuctionServices/Scraper/Scraper.js')
const Drouot = require('../AuctionServices/Scraper/Drouot/Drouot.js')
const Interencheres = require('../AuctionServices/Scraper/Interencheres/Interencheres.js')
let CleanUrl = function(url){
if(String(url).split("http").length > 1){
url = 'http'+String(url).split("http")[1]
}else{
url = ""
}
return url
}
let getAuctionPlatform = function(Url){
let AuctionPlatform
let STools = new ScraperTools();
switch (STools.detectPlatform(Url)) {
case STools._CONST_INTERENCHERES:
AuctionPlatform = new Interencheres(Url);
break;
case STools._CONST_DROUOT:
AuctionPlatform = new Drouot(Url);
break;
default:
break;
}
return AuctionPlatform
}
// get Sale info
exports.sale = asyncHandler(async (req, res, next) => {
let url = req.params.url
url = decodeURIComponent(url);
url = CleanUrl(url)
if(url == ""){
res.status(400).send("URL not supported")
}
try{
let AuctionPlatform = getAuctionPlatform(url);
if(AuctionPlatform){
console.log("Agent Follow Sale: ", url)
AuctionPlatform.Live(req.browser)
res.status(200).send({"Following URL": url})
}else{
res.status(400).send("URL not supported")
}
}catch(e){
res.status(500).send("Error: "+e)
}
});

View File

@ -0,0 +1,11 @@
const asyncHandler = require("express-async-handler");
// get Sale info
exports.health = asyncHandler(async (req, res, next) => {
try{
res.status(200).send("Health OK")
}catch(e){
res.status(500).send("Error: "+e)
}
});

25
agent/index.js Normal file
View File

@ -0,0 +1,25 @@
const express = require('express')
const app = express()
var bodyParser = require('body-parser');
app.use(bodyParser.json())
//const puppeteer = require('puppeteer');
const puppeteer = require('puppeteer-extra');
const pluginStealth = require('puppeteer-extra-plugin-stealth');
puppeteer.use(pluginStealth())
const puppeteerMiddleware = require('./middleware/puppeteer');
(async () => {
app.use(puppeteerMiddleware(puppeteer));
// main routes
app.use('/internApi/follow', require('./routes/follow'));
app.use('/health', require('./routes/health'));
})();
module.exports = app

View File

@ -0,0 +1,14 @@
module.exports = (puppeteer) => {
return async (req, res, next) => {
const browser = await puppeteer.launch({
executablePath: '/usr/bin/google-chrome',
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu'],
ignoreDefaultArgs: ['--disable-extensions'],
headless: 'new'
});
req.browser = browser;
next();
}
}

2901
agent/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

25
agent/package.json Normal file
View File

@ -0,0 +1,25 @@
{
"name": "agent",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"start": "nodemon server.js",
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"body-parser": "^1.20.2",
"express": "^4.19.2",
"express-async-handler": "^1.2.0",
"moment-timezone": "^0.5.45",
"node-fetch": "^2.6.1",
"puppeteer": "^22.6.4",
"puppeteer-extra": "^3.3.6",
"puppeteer-extra-plugin-stealth": "^2.11.2"
},
"devDependencies": {
"nodemon": "^3.0.3"
}
}

6
agent/routes/follow.js Normal file
View File

@ -0,0 +1,6 @@
const controllers = require('../controllers/follow')
const router = require('express').Router()
router.get('/sale/:url', controllers.sale)
module.exports = router

6
agent/routes/health.js Normal file
View File

@ -0,0 +1,6 @@
const controllers = require('../controllers/health')
const router = require('express').Router()
router.get('/check', controllers.health)
module.exports = router

16
agent/server.js Normal file
View File

@ -0,0 +1,16 @@
const app = require('./index.js')
const port = process.env.PORT || '80'
app.listen(port, () => {
console.log('Server listening on port '+port);
});
process.on('unhandledRejection', (reason, promise) => {
console.log('Unhandled Rejection at:', promise, 'reason:', reason);
// Application specific logging, throwing an error, or other logic here
});
process.on('uncaughtException', (err, origin) => {
console.log('Caught exception: ', err, 'Exception origin: ', origin);
// Application specific logging, throwing an error, or other logic here
});

29
docker-compose-dev.yml Normal file
View File

@ -0,0 +1,29 @@
version: '3.1'
services:
agent:
build:
context: ./agent
dockerfile: Dockerfile.dev
volumes:
- ./agent:/agent
- ./AuctionServices:/agent/AuctionServices
networks:
- internal
# ports:
# - "80:80"
scrapper:
build:
context: ./scrapper
dockerfile: Dockerfile.dev
ports:
- 3020:3020
volumes:
- ./scrapper:/scrapper
- ./AuctionServices:/scrapper/AuctionServices
networks:
- internal
networks:
internal:

24
docker-compose.yml Normal file
View File

@ -0,0 +1,24 @@
version: '3.1'
services:
scrapper:
build: .
restart: always
hostname: auctionagent.saucisse.ninja
ports:
- 3000
labels:
- "traefik.enable=true"
- "traefik.http.middlewares.redirecthttps.redirectscheme.scheme=https"
- "traefik.http.middlewares.redirecthttps.redirectscheme.permanent=true"
- "traefik.http.routers.auctionagent-http.rule=Host(`auctionagent.saucisse.ninja`)"
- "traefik.http.routers.auctionagent-http.middlewares=redirecthttps"
- "traefik.http.routers.auctionagent.rule=Host(`auctionagent.saucisse.ninja`)"
- "traefik.http.routers.auctionagent.tls=true"
- "traefik.http.routers.auctionagent.tls.certresolver=myresolver"
- "traefik.http.routers.auctionagent.tls.domains[0].main=auctionagent.saucisse.ninja"
- "traefik.http.services.scrapper.loadbalancer.server.port=3000"

41
scrapper/Dockerfile.dev Normal file
View File

@ -0,0 +1,41 @@
FROM node:slim
# We don't need the standalone Chromium
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true
# Install Google Chrome Stable and fonts
# Note: this installs the necessary libs to make the browser work with Puppeteer.
RUN apt-get update && apt-get install gnupg wget -y && \
wget --quiet --output-document=- https://dl-ssl.google.com/linux/linux_signing_key.pub | gpg --dearmor > /etc/apt/trusted.gpg.d/google-archive.gpg && \
sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' && \
apt-get update && \
apt-get install google-chrome-stable -y --no-install-recommends && \
rm -rf /var/lib/apt/lists/*
# Setting up the work directory
WORKDIR /scrapper
#Copying all the files in our project
COPY . .
# Installing dependencies
RUN npm install
# Add user so we don't need --no-sandbox.
# same layer as npm install to keep re-chowned files from using up several hundred MBs more space
# RUN groupadd -r pptruser && useradd -r -g pptruser -G audio,video pptruser \
# && mkdir -p /home/pptruser/Downloads \
# && chown -R pptruser:pptruser /home/pptruser \
# && chown -R pptruser:pptruser ./node_modules \
# && chown -R pptruser:pptruser ./package.json \
# && chown -R pptruser:pptruser ./package-lock.json
# # Run everything after as non-privileged user.
# USER pptruser
# Starting our application
#CMD [ "npm", "run", "debug-cluster" ]
CMD [ "npm", "run", "start" ]
# Exposing server port
EXPOSE 80

44
scrapper/app.js Normal file
View File

@ -0,0 +1,44 @@
const express = require('express')
const app = express()
var bodyParser = require('body-parser');
app.use(bodyParser.json())
//const puppeteer = require('puppeteer');
const puppeteer = require('puppeteer-extra');
const pluginStealth = require('puppeteer-extra-plugin-stealth');
puppeteer.use(pluginStealth())
const puppeteerCluster = require('./middleware/puppeteerCluster');
const { Cluster } = require('puppeteer-cluster');
(async () => {
cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_BROWSER,
maxConcurrency: 6,
//monitor: true,
timeout: 20000,
retryLimit: 2,
puppeteerOptions: {
executablePath: '/usr/bin/google-chrome',
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu'],
ignoreDefaultArgs: ['--disable-extensions'],
headless: 'new',
env: {
TZ: 'Europe/Paris'
}
},
puppeteer: puppeteer
});
app.use(puppeteerCluster(cluster));
// main routes
app.use('/api/sale', require('./routes/sale'));
app.use('/api/lot', require('./routes/lot'));
})();
module.exports = app

View File

@ -0,0 +1,83 @@
const asyncHandler = require("express-async-handler");
const {ScraperTools} = require('../AuctionServices/Scraper/Scraper.js')
const Drouot = require('../AuctionServices/Scraper/Drouot/Drouot.js')
const Interencheres = require('../AuctionServices/Scraper/Interencheres/Interencheres.js')
let getAuctionPlatform = function(Url){
let AuctionPlatform
let STools = new ScraperTools();
switch (STools.detectPlatform(Url)) {
case STools._CONST_INTERENCHERES:
AuctionPlatform = new Interencheres(Url);
break;
case STools._CONST_DROUOT:
AuctionPlatform = new Drouot(Url);
break;
default:
break;
}
return AuctionPlatform
}
let CleanUrl = function(url){
if(String(url).split("http").length > 1){
url = 'http'+String(url).split("http")[1]
}else{
url = ""
}
return url
}
// Display list of all pictures of a lot.
exports.getPictures = asyncHandler(async (req, res, next) => {
let url = req.params.url
url = decodeURIComponent(url);
url = CleanUrl(url)
if(url == ""){
res.status(400).send("URL not supported")
}
try{
let AuctionPlatform = getAuctionPlatform(url);
if(AuctionPlatform){
const PictList = await req.puppeteerCluster.execute(AuctionPlatform.getPictures);
res.json(PictList);
}else{
res.status(400).send("URL not supported")
}
}catch(e){
res.status(500).send("Error: "+e)
}
});
exports.getInfos = asyncHandler(async (req, res, next) => {
let url = req.params.url
url = decodeURIComponent(url);
url = CleanUrl(url)
if(url == ""){
res.status(400).send("URL not supported")
}
try{
let AuctionPlatform = getAuctionPlatform(url);
if(AuctionPlatform){
const LotInfos = await req.puppeteerCluster.execute(AuctionPlatform.getLotInfos);
res.json(LotInfos);
}else{
res.status(400).send("URL not supported")
}
}catch(e){
res.status(500).send("Error: "+e)
}
});

View File

@ -0,0 +1,126 @@
const asyncHandler = require("express-async-handler");
const fetch = require('node-fetch');
const {ScraperTools} = require('../AuctionServices/Scraper/Scraper.js')
const Drouot = require('../AuctionServices/Scraper/Drouot/Drouot.js')
const Interencheres = require('../AuctionServices/Scraper/Interencheres/Interencheres.js')
let getAuctionPlatform = function(Url){
let AuctionPlatform
let STools = new ScraperTools();
switch (STools.detectPlatform(Url)) {
case STools._CONST_INTERENCHERES:
AuctionPlatform = new Interencheres(Url);
break;
case STools._CONST_DROUOT:
AuctionPlatform = new Drouot(Url);
break;
default:
break;
}
return AuctionPlatform
}
let CleanUrl = function(url){
if(String(url).split("http").length > 1){
url = 'http'+String(url).split("http")[1]
}else{
url = ""
}
return url
}
// ## PUPPETEER CLUSTER
// get Sale info
exports.getSaleInfos = asyncHandler(async (req, res, next) => {
let url = req.params.url
url = decodeURIComponent(url);
url = CleanUrl(url)
if(url == ""){
res.status(400).send("URL not supported")
}
try{
let AuctionPlatform = getAuctionPlatform(url);
if(AuctionPlatform){
const SaleInfos = await req.puppeteerCluster.execute(AuctionPlatform.getSaleInfos);
res.json(SaleInfos);
}else{
res.status(400).send("URL not supported")
}
}catch(e){
res.status(500).send("Error: "+e)
}
});
// get Sale Lot list
exports.getLotList = asyncHandler(async (req, res, next) => {
let url = req.params.url
url = decodeURIComponent(url);
url = CleanUrl(url)
if(url == ""){
res.status(400).send("URL not supported")
}
try{
let AuctionPlatform = getAuctionPlatform(url);
if(AuctionPlatform){
const LotList = await req.puppeteerCluster.execute(AuctionPlatform.getLotList);
res.json(LotList);
}else{
res.status(400).send("URL not supported")
}
}catch(e){
res.status(500).send("Error: "+e)
}
});
// ## AGENT PUPPETEER
//Follow a live Sale
exports.followSale = asyncHandler(async (req, res, next) => {
let url = req.params.url
url = decodeURIComponent(url);
url = CleanUrl(url)
if(url == ""){
res.status(400).send("URL not supported")
}
try{
let AuctionPlatform = getAuctionPlatform(url);
if(AuctionPlatform){
console.log('Scrapper followSale : '+encodeURIComponent(url))
fetch('http://agent/internApi/follow/sale/'+encodeURIComponent(url))
.then(response => {
console.log("fetch OK")
//response.json()
} )
.then(saleInfo => {})
.catch(error => {
console.error(error);
throw new Error('Error: '+error)
});
//res.status(200).send({status: "Following"})
res.status(500).send({"Error": "ok"})
}else{
res.status(400).send("URL not supported")
}
}catch(e){
res.status(500).send("Error: "+e)
}
});

View File

@ -0,0 +1,6 @@
module.exports = (cluster) => {
return (req, res, next) => {
req.puppeteerCluster = cluster;
next();
}
}

2949
scrapper/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

27
scrapper/package.json Normal file
View File

@ -0,0 +1,27 @@
{
"name": "auctionagent",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"start": "nodemon server.js",
"debug-cluster": "DEBUG='puppeteer-cluster:*' node server.js"
},
"author": "",
"license": "ISC",
"dependencies": {
"body-parser": "^1.20.2",
"express": "^4.18.2",
"express-async-handler": "^1.2.0",
"moment-timezone": "^0.5.45",
"node-fetch": "^2.6.1",
"puppeteer": "^21.10.0",
"puppeteer-cluster": "^0.23.0",
"puppeteer-extra": "^3.3.6",
"puppeteer-extra-plugin-stealth": "^2.11.2"
},
"devDependencies": {
"nodemon": "^3.0.3"
}
}

7
scrapper/routes/lot.js Normal file
View File

@ -0,0 +1,7 @@
const controllers = require('../controllers/lot')
const router = require('express').Router()
router.get('/getPictures/:url', controllers.getPictures)
router.get('/getInfos/:url', controllers.getInfos)
module.exports = router

8
scrapper/routes/sale.js Normal file
View File

@ -0,0 +1,8 @@
const controllers = require('../controllers/sale')
const router = require('express').Router()
router.get('/getSaleInfos/:url', controllers.getSaleInfos)
router.get('/getLotList/:url', controllers.getLotList)
router.get('/followSale/:url', controllers.followSale)
module.exports = router

16
scrapper/server.js Normal file
View File

@ -0,0 +1,16 @@
const app = require('./app.js')
const port = process.env.PORT || '3020'
app.listen(port, () => {
console.log('Server listening on port '+port);
});
process.on('unhandledRejection', (reason, promise) => {
console.log('Unhandled Rejection at:', promise, 'reason:', reason);
// Application specific logging, throwing an error, or other logic here
});
process.on('uncaughtException', (err, origin) => {
console.log('Caught exception: ', err, 'Exception origin: ', origin);
// Application specific logging, throwing an error, or other logic here
});

View File

@ -0,0 +1,20 @@
const fetch = require('node-fetch');
class Jucundus {
constructor() {
this.url = 'http://localhost:3000/api';
}
setSaleStatus(status) {
fetch(ApiURL+'/sale/setSaleStatus/'+status)
.then(response => response.json())
.then(data => {
res.json(data);
})
.catch(error => {
console.error(error);
});
}
}
module.exports = Jucundus

View File