// interencheresData.js const {ScraperTools} = require('../Scraper'); const urlModule = require('url'); const moment = require('moment-timezone'); const { Console } = require('console'); const { title } = require('process'); class DrouotData extends ScraperTools { _Name = 'drouot' _LiveData = null getUrlInfo = async (url) => { // URL Lot : https://drouot.com/fr/l/25184163-john-conde-17651794-britanniqu // https://drouot.com/fr/{{v: Vente/ l: Lot}}/{{LotID}}-john-conde-{{????}}-britanniqu // URL Sale : https://drouot.com/fr/v/152658-fine-paintings-and-frames // https://drouot.com/fr/{{v: Vente/ l: Lot}}/{{SaleID}}-fine-paintings-and-frames let parsedUrl = new urlModule.URL(url); let pathParts = parsedUrl.pathname.split('/').filter(Boolean); // if sale URL let saleID = 0 let lotID = 0 let TypeUrl = '' let urlSale = '' let urlLot = '' if(pathParts[1] == 'v'){ TypeUrl = 'Sale' saleID = pathParts[2].split('-')[0]; urlSale = parsedUrl.origin + parsedUrl.pathname }else if(pathParts[1] == 'l'){ TypeUrl = 'Lot' lotID = pathParts[2].split('-')[0]; urlLot = parsedUrl.origin + parsedUrl.pathname } return { 'TypeUrl': TypeUrl, 'saleID': saleID, 'lotID': lotID, 'urlSale': urlSale, 'urlLot': urlLot } } // ## Lot getPictures = async (page, Url) => { console.log("getPictures "+this._Name+": "+Url) //add the picture to the list let PictList = [] page.on('response', async response => { const url = response.url(); if (response.request().resourceType() === 'image' && url.match("size=fullHD")) { response.buffer().then(file => { console.log("push "+url) PictList.push(url) }); } }); function checkDup() { const toFindDuplicates = array => array.filter((item, index) => array.indexOf(item) !== index) const duplicateElements = toFindDuplicates(PictList); // if dupplicate pictures added in the array if (duplicateElements.length > 0) { // remove diplucated content PictList = PictList.filter(function (elem, pos) { return PictList.indexOf(elem) == pos; }) // stop the process return false // no dupplicated picture } else { // continue the process return true } } // Navigate the page to a URL await page.goto(Url); await page.waitForTimeout(500); //get the Next link const [ButtonNext] = await page.$x("//*[@id='next']"); if (ButtonNext) { let condition = true do { console.log("click") await ButtonNext.evaluate(b => b.click()); await page.waitForTimeout(500); condition = checkDup(); } while (condition); } return PictList } getLotNumber = async (page) => { const lotNumberXPath = [ '/html/body/div[1]/div[4]/div[2]/div/div/div/div[3]/div/div[1]/div[2]/div[1]/span/span', ] let lotNumberString = await this.getTextContent(lotNumberXPath, page, 'lotNumberXPath') let lotNumber = ''; if(lotNumberString != ''){ lotNumber = lotNumberString.replace('Lot ', ''); } return lotNumber } getLotTitle = async (page) => { const lotTitleXPath = [ '/html/body/div[1]/div[4]/div[2]/div/div/div/div[3]/div/div[1]/div[2]/div[5]/h1', ] let lotTitleString = await this.getTextContent(lotTitleXPath, page, 'lotTitleXPath') if (lotTitleString.length > 90) { lotTitleString = lotTitleString.substring(0, 90) + '...'; } return lotTitleString } getEstimate = async (page) => { const EstimateXPath = [ '/html/body/div[1]/div[4]/div[2]/div/div/div/div[3]/div/div[1]/div[2]/div[7]/div/div[1]/span/span', ] let EstimateString = await this.getTextContent(EstimateXPath, page, 'EstimateXPath') //console.log('EstimateString : '+EstimateString) let EstimateLow = 0 let EstimateHigh = 0 if(EstimateString != ''){ let matches = EstimateString.match(/(\d{1,3}(?:\s\d{3})*)/g); if (matches) { if (matches.length >= 2) { EstimateLow = parseInt(matches[0].replace(/\s/g, ''), 10); EstimateHigh = parseInt(matches[1].replace(/\s/g, ''), 10); console.log('Low:', EstimateLow); console.log('High:', EstimateHigh); } else if(matches.length == 1){ EstimateLow = parseInt(matches[0].replace(/\s/g, ''), 10); EstimateHigh = 0; } else { console.error('Could not extract numbers.'); } } } return {EstimateLow, EstimateHigh} } getDescription = async (page) => { const DescriptionXPath = [ '//h3[contains(@class, "descriptionLineWrap")]', ] let Description = await this.getTextContent(DescriptionXPath, page, 'DescriptionXPath') return Description } getFees = async (page) => { let feesText = '' let fees = 0 const FeesXPath = [ '/html/body/div[1]/div[4]/div[2]/div/div/div/div[3]/div/div[1]/div[2]/div[7]/div/div[3]/a/span', ] feesText = await this.getTextContent(FeesXPath, page, 'FeesXPath') // detect digit if (!/\d/.test(feesText)) { const FeesXPath = [ '/html/body/div[1]/div[4]/div[2]/div/div/div/div[3]/div/div[1]/div[2]/div[7]/div/div[2]/a/span', ] feesText = await this.getTextContent(FeesXPath, page, 'FeesXPath') } feesText = feesText.replace(/[\n]/g, '').replace(/\s+/g, ' ').trim(); let matches = feesText.match(/(\d+(\.\d+)?)/) if (matches) { fees = matches[0]; } return {feesText, fees} } getLotID = async (url) => { let UrlInfo = await this.getUrlInfo(url); let id_lot = UrlInfo.lotID return id_lot } getSaleID = async (page) => { const UrlCatalogueXPath = [ '/html/body/div[1]/div[4]/div[2]/div/div/div/div[3]/div/div[1]/div[2]/div[10]/div/div[2]/div[3]/a[1]', ] let UrlCatalogue = await this.getAttribute(UrlCatalogueXPath, page, "href", "UrlCatalogueXPath") console.log('UrlCatalogue : '+UrlCatalogue) UrlCatalogue = UrlCatalogue.substring(0,1) == '/' ? 'https://drouot.com'+UrlCatalogue : UrlCatalogue let UrlInfo = await this.getUrlInfo(UrlCatalogue); let id_sale = UrlInfo.saleID let urlSale = UrlInfo.urlSale return {id_sale, urlSale} } // ## Lot List _getLotInfoList = async (page, Elements) => { let LotList = []; for (let element of Elements) { let Lot = {} try{ let LotnameXPath = [ './/a/div/div/div[5]/div/div[1]', './/a/div/div/div[4]/div/div[1]', ] let Lotname = await this.getTextContentElement(LotnameXPath, page, element, 'LotnameXPath') // idPlatform from the url let LotUrlXPath = [ './/a' ] let urlLot = await this.getAttributeElement(LotUrlXPath, page, element, 'href', 'UrlListLot') let match = urlLot.match(/lot-(.*).html/); let idPlatform = match[1]; Lot = { title: Lotname, idPlatform: idPlatform, platform: this._Name, lotNumber: Lotname.split('Lot ')[1] } }catch(e){ console.error(e) } //console.log(LotList) LotList.push(Lot); }; return LotList; } getLotList = async (page) => { let LotList = [] let NextBtn = false do { // extract Lot List const LotListXPath = [ '//div[contains(@class, "sale-item-wrapper")]', ] let Elements = await page.$x(LotListXPath[0]); if (Elements.length > 0) { LotList = [].concat(LotList, await this._getLotInfoList(page, Elements)) } // search for the Button Next (only if enabled) let NextPageButtonXPath = "//button[contains(@aria-label, 'Page suivante') and not(contains(@class, 'v-pagination__navigation--disabled'))]" let NextPageButton = await page.$x(NextPageButtonXPath); if (NextPageButton.length > 0) { NextBtn = true await NextPageButton[0].evaluate(b => b.click()); await page.waitForTimeout(1000); console.log('Next Page') }else{ NextBtn = false } } while (NextBtn); return LotList } // ## Sale getSaleTitle = async (page) => { const SaleTitleXPath = [ '/html/body/div/div[4]/div[2]/div/div/div/div/div[3]/div/div[2]/div[1]/div/h1', ] let SaleTitle = await this.getTextContent(SaleTitleXPath, page, 'SaleTitleXPath') return SaleTitle } getSaleDate = async (page) => { // Test if Live Sale let BoolLive = false; try { // const VideoXPath = [ // '//*[@id="streaming-subscriber"]', // ] // let VideoExists = await this.ElementExists(VideoXPath, page, 'VideoXPath') // console.log('VideoExists : '+VideoExists) // BoolLive = VideoExists } catch (error) {} let SaleDate; // if futur sale if(!BoolLive){ await page.waitForTimeout(400); const SaleDateXPath = [ '/html/body/div/div[4]/div[2]/div/div/div/div/div[3]/div/div[2]/div[1]/div/div[1]/div', ] let SaleDateString = await this.getTextContent(SaleDateXPath, page, 'SaleDateXPath') SaleDateString = SaleDateString.trim() let cleanStr = SaleDateString.replace(/\\s|\\n/g, ' ').replace(/\s+/g, ' '); SaleDate = moment.tz(cleanStr, 'dddd D MMMM à HH:mm (z)', 'fr', 'Europe/Paris').format(); // Live Sale }else{ SaleDate = moment.tz('Europe/Paris').format(); } console.log('SaleDate : '+SaleDate) return SaleDate } getSaleLocation = async (page) => { const SaleLocationXPath = [ '/html/body/div/div[4]/div[2]/div/div/div/div/div[3]/div/div[2]/div[1]/div/div[4]', ] let SaleLocation = await this.getTextContent(SaleLocationXPath, page, 'SaleLocationXPath') return SaleLocation.trim() } getSaleHouseName = async (page) => { const SaleHouseNameXPath = [ '/html/body/div/div[4]/div[2]/div/div/div/div/div[3]/div/div[2]/div[1]/div/h4/a[1]/span', ] let SaleHouseName = await this.getTextContent(SaleHouseNameXPath, page, 'SaleHouseNameXPath') SaleHouseName = SaleHouseName.replace(/[\n]/g, '').replace(/\s+/g, ' ').trim(); return SaleHouseName.trim() } // ## Live Data setLiveData = (Data) => { this._LiveData = Data } getLiveDataLot = async(lotId) => { for (let lot of this._LiveData.lots) { if (lot.id === lotId) { return lot; } } } } module.exports = DrouotData