Agent/AuctionServices/Scraper/Drouot/workData/Drouot.old.js

185 lines
5.0 KiB
JavaScript

// Drouot.js
'use strict';
const {Scraper} = require('../../Scraper');
class Drouot extends Scraper {
constructor(Url) {
super(Url);
this._Name = 'drouot'
this._PAGE_MAIN = "https://drouot.com/fr/"
this._PAGE_LOGIN = "https://auth.drouot.com/login"
this._USER = "jp.ranu@cogip.de"
this._PWD = "LYPYRKDUsSMH5BaWQxvH#"
this._PATH_SESSION_FILE = ".session/session_drouot.json"
}
getPictures = async ({ page, data}) => {
console.log("getPictures "+this._Name+": "+this.Url)
//add the picture to the list
let PictList = []
page.on('response', async response => {
const url = response.url();
if (response.request().resourceType() === 'image' && url.match("size=fullHD")) {
response.buffer().then(file => {
console.log("push "+url)
PictList.push(url)
});
}
});
function checkDup() {
const toFindDuplicates = array => array.filter((item, index) => array.indexOf(item) !== index)
const duplicateElements = toFindDuplicates(PictList);
// if dupplicate pictures added in the array
if (duplicateElements.length > 0) {
// remove diplucated content
PictList = PictList.filter(function (elem, pos) {
return PictList.indexOf(elem) == pos;
})
// stop the process
return false
// no dupplicated picture
} else {
// continue the process
return true
}
}
// Navigate the page to a URL
await page.goto(this.Url);
console.log("goto "+this.Url)
await page.waitForTimeout(500);
//get the Next link
const [ButtonNext] = await page.$x("//*[@id='next']");
if (ButtonNext) {
let condition = true
do {
console.log("click")
await ButtonNext.evaluate(b => b.click());
await page.waitForTimeout(500);
condition = checkDup();
} while (condition);
}
return PictList
}
async CheckAndConnect(page) {
return new Promise(async (resolve, reject) => {
await page.goto(this._PAGE_MAIN);
//get the Connexion button
const [Connexion] = await page.$x("//div[contains(@class, 'btn') and contains(@class, 'ghost') and contains(text(), 'Connexion')]");
console.log(Connexion)
// if Connection button found => Login
if (Connexion) {
console.log("-- Login --")
await page.goto(this._PAGE_LOGIN);
//get the Email field
//console.log("-- get Email Input --")
await page.type('#email', this._USER);
//console.log("-- get password Input --")
await page.type("#password", this._PWD);
//console.log("-- get ConnexionButton --")
const [ConnexionButton] = await page.$x("//button[contains(text(), 'Connexion')]");
await ConnexionButton.evaluate(b => b.click());
//console.log("-- Login wait --")
await page.waitForTimeout(1000);
//resolve(page)
const [ConnexionOK] = await page.$x("//button[contains(text(), 'Continuer en tant que')]");
if (ConnexionOK) {
console.log("-- Connection OK --")
await ConnexionOK.evaluate(b => b.click());
await page.waitForTimeout(1000);
await this._saveSession(page)
// const sessionData = await page.session.dumpString({
// storageProviders: [
// StorageProviderName.Cookie,
// StorageProviderName.LocalStorage,
// StorageProviderName.SessionStorage,
// StorageProviderName.IndexedDB,
// ],
// })
// fs.writeFileSync(this._PATH_SESSION_FILE, sessionData);
// console.log("-- Connection OK --")
resolve(page)
} else {
console.error("-- !!!! Connection ERROR !!!! --");
reject()
}
// Allready connected
} else {
console.log("-- Allready connected --")
resolve(page)
}
})
}
getSellNumberFromURL = function (Url) {
const match = Url.match(/\/(\d+)-/);
if (match) {
const extractedNumber = parseInt(match[1], 10); // Convert the matched string to an integer
if (!isNaN(extractedNumber)) {
return match[1];
} else {
console.log("Invalid number in the URL");
}
} else {
console.log("Number not found in the URL.");
}
}
async Live() {
this.Url = "https://drouot.com/fr/v/147085-fine-asian-european--islamic-works-of-art"
const page = await this._getPage(true);
await this.CheckAndConnect(page)
await page.goto(this.Url);
const [LiveOn] = await page.$x("//span[contains(text(), 'Live en cours')]");
if (LiveOn) {
const SellNumber = this.getSellNumberFromURL(this.Url)
const UrlLive = "https://drouot.com/live/bidlive/" + SellNumber
await page.goto(UrlLive);
}
// get the Live Link
//await browser.close();
}
};
module.exports = Drouot