// interencheresData.js const {ScraperTools} = require('../Scraper'); const urlModule = require('url'); const moment = require('moment-timezone'); const { Console } = require('console'); const { title } = require('process'); class InterencheresData extends ScraperTools { _Name = 'interencheres' getUrlInfo = async (url) => { let parsedUrl = new urlModule.URL(url); let pathParts = parsedUrl.pathname.split('/').filter(Boolean); let typeSale = pathParts[0]; let saleID = pathParts[1].split('-')[pathParts[1].split('-').length-1]; // if lot URL let lotID = 0; if(pathParts.length > 2){ lotID = pathParts[2].split('-')[1].split('.')[0]; } return { 'typeSale': typeSale, 'saleID': saleID, 'lotID': lotID } } // ## Lot getPictures = async (page, Url) => { let PictList = [] function checkDup() { const toFindDuplicates = array => array.filter((item, index) => array.indexOf(item) !== index) const duplicateElements = toFindDuplicates(PictList); // if dupplicate pictures added in the array if (duplicateElements.length > 0) { // remove diplucated content PictList = PictList.filter(function (elem, pos) { return PictList.indexOf(elem) == pos; }) // stop the process return false // no dupplicated picture } else { // continue the process return true } } page.on('response', async response => { const url = response.url(); if (url.match("thumbor-indbupload.interencheres.com")) { response.buffer().then(file => { console.log("push "+url) PictList.push(url) }); } }); console.log('go to : '+Url) await page.goto(Url); const picturesNumberXPath = [ "//div[contains(@class, 'pswp__counter')]" ] let picturesNumberString = await this.getTextContent(picturesNumberXPath, page, 'picturesNumberXPath') let picturesNumber = 100; if(picturesNumberString != ''){ picturesNumber = parseInt(picturesNumberString.split(" / ")[1]) console.log('picturesNumber : '+picturesNumber) } let condition = true let idx = 0 do { const ButtonNextXPath = [ "//button[contains(@class, 'pswp__button--arrow--right')]" ] await this.clickLink(ButtonNextXPath, page, 'ButtonNextXPath') await page.waitForTimeout(300); idx++ // if number of pictures found or 20 pictures checked if(idx+1 == picturesNumber || idx==20)condition = false } while (condition); checkDup() return PictList } getLotNumber = async (page) => { const lotNumberXPath = [ '/html/body/div[1]/div/div/div/main/div/div/div[2]/div/div[1]/div[2]/div[2]/div[1]', '/html/body/div[1]/div/div/div[1]/main/div/div/div[2]/div/div[1]/div[2]/div[1]/div[1]' ] let lotNumberString = await this.getTextContent(lotNumberXPath, page, 'lotNumberXPath') let lotNumber = ''; if(lotNumberString != ''){ lotNumber = lotNumberString.replace('Lot ', ''); } return lotNumber } getEstimate = async (page) => { const EstimateXPath = [ '/html/body/div[1]/div/div/div/main/div/div/div[2]/div/div[1]/div[2]/div[3]/div[2]/span', '/html/body/div[1]/div/div/div[1]/main/div/div/div[2]/div/div[1]/div[2]/div[2]/div[2]/span' ] let EstimateString = await this.getTextContent(EstimateXPath, page, 'EstimateXPath') console.log('EstimateString : '+EstimateString) let EstimateLow = 0 let EstimateHigh = 0 if(EstimateString != ''){ let matches = EstimateString.match(/(\d{1,3}(?:\s\d{3})*)/g); if (matches) { if (matches.length >= 2) { EstimateLow = parseInt(matches[0].replace(/\s/g, ''), 10); EstimateHigh = parseInt(matches[1].replace(/\s/g, ''), 10); console.log('Low:', EstimateLow); console.log('High:', EstimateHigh); } else if(matches.length == 1){ EstimateLow = parseInt(matches[0].replace(/\s/g, ''), 10); EstimateHigh = 0; } else { console.log('Could not extract numbers.'); } } } return {EstimateLow, EstimateHigh} } getDescription = async (page) => { const DescriptionXPath = [ '//div[contains(@class, "description")]', '/html/body/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/div[4]/div', '/html/body/div[1]/div/div/div[1]/main/div/div/div[2]/div/div[1]/div[2]/div[4]/div', '/html/body/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/div[3]/div' ] let Description = await this.getTextContent(DescriptionXPath, page, 'DescriptionXPath') return Description } getFees = async (page) => { let feesText = '' let fees = 0 const ButtonFeesXPath = [ './/a[contains(text(),"Frais de vente")]', '/html/body/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/div[3]/div[2]/a', '/html/body/div[1]/div/div/div[1]/main/div/div/div[2]/div/div[1]/div[2]/div[3]/div[2]/a', '/html/body/div[1]/div/div/div[2]/div/div[1]/div[2]/div/div[1]/div[2]/div[2]/div[2]/a' ] if(await this.clickLink(ButtonFeesXPath, page, 'ButtonFeesXPath')){ const FeesXPath = [ "//strong[contains(text(), 'Frais de vente :')]/following-sibling::span", '/html/body/div[1]/div/div/div[3]/div/div/div[2]/div/p[1]/p[1]/span', ] feesText = await this.getTextContent(FeesXPath, page, 'FeesXPath') if(feesText != ''){ feesText = feesText.replace(/[\n]/g, '').replace(/\s+/g, ' ').trim(); let matches = feesText.match(/(\d+(\.\d+)?)/) if (matches) { fees = matches[0]; } } } return {feesText, fees} } getLotID = async (url) => { let UrlInfo = await this.getUrlInfo(url); let id_lot = UrlInfo.lotID console.log('id_lot : '+id_lot) return id_lot } getSaleIdUrl = async (url) => { let UrlInfo = await this.getUrlInfo(url); let id_sale = UrlInfo.saleID let urlSale = url // remove lot information if present if (urlSale.includes('/lot-')) { urlSale = url.split("/lot-")[0] } // remove parameters if (urlSale.includes('?')) { urlSale = urlSale.split("?")[0]; } console.log('getSaleIdUrl urlSale : '+urlSale) return {id_sale, urlSale} } // ## Lot List _getLotInfoList = async (page, Elements) => { let LotList = []; for (let element of Elements) { let Lot = {} try{ let LotnameXPath = [ './/a/div/div/div[5]/div/div[1]', './/a/div/div/div[4]/div/div[1]', ] let Lotname = await this.getTextContentElement(LotnameXPath, page, element, 'LotnameXPath') // idPlatform from the url let LotUrlXPath = [ './/a' ] let urlLot = await this.getAttributeElement(LotUrlXPath, page, element, 'href', 'UrlListLot') let match = urlLot.match(/lot-(.*).html/); let idPlatform = match[1]; Lot = { title: Lotname, idPlatform: idPlatform, platform: this._Name, lotNumber: Lotname.split('Lot ')[1] } }catch(e){ console.error(e) } //console.log(LotList) LotList.push(Lot); }; return LotList; } getLotList = async (page) => { let LotList = [] let NextBtn = false do { // extract Lot List const LotListXPath = [ '//div[contains(@class, "sale-item-wrapper")]', ] let Elements = await page.$x(LotListXPath[0]); if (Elements.length > 0) { LotList = [].concat(LotList, await this._getLotInfoList(page, Elements)) } // search for the Button Next (only if enabled) let NextPageButtonXPath = "//button[contains(@aria-label, 'Page suivante') and not(contains(@class, 'v-pagination__navigation--disabled'))]" let NextPageButton = await page.$x(NextPageButtonXPath); if (NextPageButton.length > 0) { NextBtn = true await NextPageButton[0].evaluate(b => b.click()); await page.waitForTimeout(1000); console.log('Next Page') }else{ NextBtn = false } } while (NextBtn); return LotList } // ## Sale getSaleTitle = async (page) => { const SaleTitleXPath = [ '/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[1]/div/div[1]/div[2]/h1/div/div/div/div/div', '/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[1]/div/div/div/h1/div/div/div/div/div', '/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[1]/div/div/div/div/h1/div/div/div/div/div', '/html/body/div[1]/div/div/div[1]/main/div/div/div/div/div/div[1]/div/div/div/div/h1/div/div/div/div/div' ] let SaleTitle = await this.getTextContent(SaleTitleXPath, page, 'SaleTitleXPath') return SaleTitle } getSaleDate = async (page) => { // Test if Live Sale let BoolLive = false; try { const VideoXPath = [ '//*[@id="streaming-subscriber"]', ] let VideoExists = await this.ElementExists(VideoXPath, page, 'VideoXPath') console.log('VideoExists : '+VideoExists) BoolLive = VideoExists } catch (error) {} let SaleDate; // if futur sale if(!BoolLive){ await page.waitForTimeout(400); const SaleDateXPath = [ '/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[1]/div/div[1]/div[2]/h2/div[2]/div/div[1]/span', ] let SaleDateString = await this.getTextContent(SaleDateXPath, page, 'SaleDateXPath') SaleDateString = SaleDateString.trim() console.log('SaleDateString : '+SaleDateString) const months = { 'janvier': '01', 'février': '02', 'mars': '03', 'avril': '04', 'mai': '05', 'juin': '06', 'juillet': '07', 'août': '08', 'septembre': '09', 'octobre': '10', 'novembre': '11', 'décembre': '12' }; let SaleDateArr = SaleDateString.split(' '); let day = parseInt(SaleDateArr[0].length === 1 ? '0'+SaleDateArr[0] : SaleDateArr[0]); let month = parseInt(months[SaleDateArr[1]]); let year = parseInt(SaleDateArr[2]); let hour = parseInt(SaleDateArr[4].split('h')[0]); let minute = parseInt(SaleDateArr[4].split('h')[1]); SaleDate = moment.tz([year, month - 1, day, hour, minute], 'Europe/Paris').format(); // Live Sale }else{ SaleDate = moment.tz('Europe/Paris').format(); } console.log('SaleDate : '+SaleDate) return SaleDate } getSaleLocation = async (page) => { const SaleLocationXPath = [ '/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[1]/div/div[1]/div[2]/h2/div[2]/div/div[2]/span', '/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[1]/div/div/div/h2/div[2]' ] let SaleLocation = await this.getTextContent(SaleLocationXPath, page, 'SaleLocationXPath') return SaleLocation.trim() } getSaleHouseName = async (page) => { const SaleHouseNameXPath = [ '/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[2]/div[2]/div/div/div/div/div[2]/div/div/div[2]/div[2]/div[2]/a', '/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[2]/div[3]/div/div/div/div/div[2]/div/div/div[2]/div[2]/div[2]/a', '/html/body/div[1]/div/div/div/main/div/div/div/div/div/div[2]/div[2]/div/div/div/div/div[2]/div/div/div/div[2]/div[2]/a', '/html/body/div[1]/div/div/div[1]/main/div/div/div/div/div/div[2]/div[3]/div/div/div/div/div[2]/div/div/div/div[2]/div[2]/a' ] let SaleHouseName = await this.getTextContent(SaleHouseNameXPath, page, 'SaleHouseNameXPath') SaleHouseName = SaleHouseName.replace(/[\n]/g, '').replace(/\s+/g, ' ').trim(); return SaleHouseName.trim() } } module.exports = InterencheresData