Untitled
unknown
javascript
2 years ago
18 kB
19
Indexable
const puppeteer = require("puppeteer"); const fs = require("fs"); const axios = require("axios"); function run() { return new Promise(async (resolve, reject) => { try { // GETTING IN const browser = await puppeteer.launch({ // headless: false, defaultViewport: false, args: [ "--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-accelerated-2d-canvas", "--no-first-run", "--no-zygote", "--single-process", "--disable-gpu", "--ignore-certificate-errors", ], }); const page = await browser.newPage(); const client = await page.target().createCDPSession(); await page.setCacheEnabled(true); const { width, height } = { width: 2300, // define height max height: 6000, }; await page.setViewport({ width, height }); await page.goto("https://www.amazon.ae/deals?ref_=nav_cs_gb", { waitUntil: "networkidle2", timeout: 900000, }); // await page.waitForNavigation() // SITE LOADED // GET ALL CATEGORY DETAILS let categories = await page.$$eval( "#anonCarousel1 > ol > li > a", (items) => { return items.map((item) => { return { // select last span name: item.querySelector("span:last-of-type").innerText, link: item.href, dataTestId: item.getAttribute("data-testid"), image_url: item.querySelector( "span.GridPresets-module__gridPresetImageSection_2p68sRHExZZwCJorBe2_N3 > img" ).src, products: [], }; }); } ); categories.shift(); let isOnMainPage = true; let products = []; // GOING TO SINGLE CATEGORY DETAIL PAGE for (let i = 18; i < categories.length; i++) { products = []; console.log("category in: ", i, " -- isOnMainPage: ", isOnMainPage); // ENSURING WETHER WE ARE IN MAIN PAGE OR NOT if (!isOnMainPage) { try { await page.goto("https://www.amazon.ae/deals?ref_=nav_cs_gb", { waitUntil: "load", timeout: 900000, }); await page.waitForNavigation(); isOnMainPage = true; } catch (e) { fs.appendFileSync("amazon-error.log", e + "\n"); continue; } } console.log("isOnMainPage finally: ", isOnMainPage); // GOING CATEGORY LINK try { await page.click(`[data-testid="${categories[i]["dataTestId"]}"]`, { waitUntil: "load", timeout: 900000, }); await page.waitForNavigation(); } catch (e) { fs.appendFileSync("amazon-error.log", e + "\n"); continue; } // HERE WAITING FOR SELECTOR TO LOAD try { await page.waitForSelector( "#grid-main-container > div.a-row.Grid-module__gridSection_1SEJTeTsU88s6aVeuuekAp > div > div", { visible: true, timeout: 900000, } ); } catch (e) { fs.appendFileSync("amazon-error.log", e + "\n"); continue; } // CHECK WHETHER THE CATEGORY HAS NO PRODUCT const isEmptyProduct = await page.$('div[role="note"]'); if (isEmptyProduct) { continue; } let isNextBtnVisible = true; while (isNextBtnVisible) { // THIS IS THE PRODUCT ROOT DIV NODE WHICH WILL CONTAIN ALL PRODUCT let rootProducts = await page.evaluate(async (el) => { // PARENT DIV let productElements = await Array.from( document.querySelectorAll( "#grid-main-container > div.a-row.Grid-module__gridSection_1SEJTeTsU88s6aVeuuekAp > div > div" ) ); let data = productElements.map((el) => { // WE WILL SCRAPE INDIVIDUAL PRODUCT CART DETAILS FROM HERE let pName = el.querySelector( "div.DealGridItem-module__dealItemContent_1vFddcq1F8pUxM8dd9FW32 > div > div > a:nth-child(3) > div" ); let offer = el.querySelector( "div > div > div > a.a-size-mini.a-link-normal.DealLink-module__dealLink_3v4tPYOP4qJj9bdiy0xAT.a-color-base.a-text-normal > div:nth-child(1) > div" ); let link = el.querySelector( "div.DealGridItem-module__dealItemContent_1vFddcq1F8pUxM8dd9FW32 > div > div > a:nth-child(3)" ); let price = el.querySelector( "div > div > div > span > span > span:nth-child(2) > span.a-price-whole" ); let image = el.querySelector("div > div > a > div > div > img"); return { name: pName ? pName.textContent : "", image: image ? { image_url: image.src, } : "", offer: offer ? offer.textContent : "", link: link ? link.href : "", price: price ? price.textContent : "", }; }); return data; }); // SUB PRODUCTS STARTS FROM HERE for (j = 0; j < rootProducts.length; j++) { rootProducts[j].subProducts = []; let hasMoved = false; try { await page.goto(rootProducts[j].link, { waitUntil: "load", timeout: 900000, }); hasMoved = true; } catch (e) { fs.appendFileSync("amazon-error.log", e + "\n"); continue; } let title = await page.$("#productTitle"); // HERE WE ARE DIFFERENTIATING PAGE BY REALIZING TITLE ID SELECTOR if (title) { // THIS PAGE WILL HAVE DIRECT SINGLE PRODUCT DETAILS try { let item = await getSingleProductDetails(); if (item) products = [...products, item]; } catch (e) { fs.appendFileSync("amazon-error.log", e + "\n"); continue; } } // else { // // THIS PAGE WILL HAVE MULTIPLE PRODUCT DETAILS(SUBPRODUCTS) // try { // let items = await getSubProductDetails(); // if (items?.length) { // products = [...products,...items]; // } // } catch (e) { // fs.appendFileSync("amazon-error.log", e + "\n"); // continue; // } // } if (hasMoved) { await page.goBack(); } } // GOING BACK TO MAIN CATEGORY PAGE try { await page.goto("https://www.amazon.ae/deals?ref_=nav_cs_gb", { waitUntil: "load", timeout: 900000, }); isOnMainPage = true; } catch (e) { isOnMainPage = false; fs.appendFileSync("amazon-error.log", e + "\n"); break; } // -----------HANDLING NEXT BUTTON------------- await page.waitForSelector("li.a-last", { visible: true, timeout: 900000, }); isNextBtnVisible = (await page.$("li.a-disabled.a-last")) === null; if (!isNextBtnVisible) { break; } let nextBtn = await page.$("li.a-last"); if (nextBtn) { await nextBtn.click(); await page.waitForNavigation({ waitUntil: "load", timeout: 900000, }); } else break; // -----------HANDLING NEXT BUTTON------------- } categories[i]["products"] = products; console.log("cat " + i + " done"); } // THIS FUNCTION IS FOR SCRAPING SINGLE PRODUCT DETAILS async function getSingleProductDetails() { let subProduct = await page.evaluate(async () => { let sTitle = await document.querySelector("#productTitle"); let sOffer = await document.querySelector( "#corePriceDisplay_desktop_feature_div > div.a-section.a-spacing-none.aok-align-center > span.a-size-large.a-color-price.savingPriceOverride.aok-align-center.reinventPriceSavingsPercentageMargin.savingsPercentage" ); let sCuPrice = await document.querySelector( "#corePriceDisplay_desktop_feature_div > div.a-section.a-spacing-none.aok-align-center > span.a-price.aok-align-center.reinventPricePriceToPayMargin.priceToPay > span:nth-child(2) > span.a-price-whole" ); let sCuPriceFra = await document.querySelector( "#corePriceDisplay_desktop_feature_div > div.a-section.a-spacing-none.aok-align-center > span.a-price.aok-align-center.reinventPricePriceToPayMargin.priceToPay > span:nth-child(2) > span.a-price-fraction" ); let currency = await document.querySelector( "#corePriceDisplay_desktop_feature_div > div.a-section.a-spacing-none.aok-align-center > span.a-price.aok-align-center.reinventPricePriceToPayMargin.priceToPay > span:nth-child(2) > span.a-price-symbol" ); let oPrice = await document.querySelector( "#corePriceDisplay_desktop_feature_div > div.a-section.a-spacing-small.aok-align-center > span > span.a-size-small.a-color-secondary.aok-align-center.basisPrice > span > span.a-offscreen" ); let pColor = await document.querySelector( "#variation_color_name > div > span" ); let details = await Array.from( document.querySelectorAll( "#productOverview_feature_div > div > table > tbody > tr" ) ); let detailsData = details.map(async (el) => { // await el.waitForSelector("td.a-span3 > span", {visible: true, timeout: 900000}) let title = await el.querySelector("td.a-span3 > span"); let description = await el.querySelector("td.a-span9 > span"); return { title: title ? title.innerText : "", description: description ? description.innerText : "", }; }); let image = await document.querySelector("#landingImage"); let summaries = await Array.from( document.querySelectorAll("#feature-bullets > ul > li") ).map((item) => { description: item.querySelector("span").textContent; }); let sRating = await document.querySelector( "#acrPopover > span.a-declarative > a > i.a-icon.a-icon-star > span" ); let rating = { rating: "", rated_out_of: "", }; if (sRating) { sRating = sRating.textContent; let ratingValeus = sRating.split(" "); rating.rating = parseFloat(ratingValeus[0]); rating.rated_out_of = parseFloat(ratingValeus[3]); } return { title: sTitle ? sTitle.textContent : "", rating: rating, offer: sOffer ? sOffer.textContent : "", currentPrice: sCuPrice ? sCuPrice.textContent + sCuPriceFra.textContent : "", oldPrice: oPrice ? parseFloat(oPrice.textContent.replace(currency.textContent, "")) : "", currency: currency ? currency.textContent : "", details: detailsData, color: pColor ? pColor.textContent : "", summary: summaries, images: image ? { image_url: image.src, } : "", }; }); return subProduct; } // THIS FUNCTION WILL SRAPE PRODUCT DATA WHICH WILL HAVE MULTIPLE SUB PRODUCT async function getSubProductDetails() { let subProducts = await page.evaluate(async () => { let items = await Array.from( document.querySelectorAll("#octopus-dlp-asin-stream > ul > li") ); let data = []; items.forEach(async (el) => { let name = await el.querySelector( "span > div > div.a-section.octopus-dlp-asin-info-section > div.a-section.octopus-dlp-asin-title > a" ); let link = name ? name.href : ""; name = name ? name.innerText : ""; let image = await el.querySelector( "span > div > div.a-section.a-spacing-base.a-text-center.octopus-dlp-image-section > a > img" ); image = image ? image.src : ""; // OFFER STARTS let offerLeft = await el.querySelector( "span > div > div.a-section.octopus-dlp-asin-info-section > div.a-row.a-spacing-mini.a-grid-vertical-align.a-grid-center > div > div.a-size-mini.oct-deal-badge-element.oct-deal-badge-label > span:nth-child(1)" ); let offerRight = await el.querySelector( "span > div > div.a-section.octopus-dlp-asin-info-section > div.a-row.a-spacing-mini.a-grid-vertical-align.a-grid-center > div > div.a-size-mini.oct-deal-badge-element.oct-deal-badge-label > span:nth-child(2)" ); let offer = ""; if (offerLeft && offerRight) { offer = offerLeft.textContent + offerRight.textContent; } // OFFER ENDS let currency = await el.querySelector( "span > div > div.a-section.octopus-dlp-asin-info-section > div.a-row.octopus-dlp-price > span.a-price.octopus-widget-price > span:nth-child(2) > span.a-price-symbol" ); currency = currency ? currency.textContent : ""; let cuPrice = await el.querySelector( "span > div > div.a-section.octopus-dlp-asin-info-section > div.a-row.octopus-dlp-price > span.a-price.octopus-widget-price > span:nth-child(2) > span.a-price-whole" ); cuPrice = cuPrice ? cuPrice.textContent : ""; let cuPriceFraction = await el.querySelector( "span > div > div.a-section.octopus-dlp-asin-info-section > div.a-row.octopus-dlp-price > span.a-price.octopus-widget-price > span:nth-child(2) > span.a-price-fraction" ); cuPriceFraction = cuPriceFraction ? cuPriceFraction.textContent : ""; let oldPrice = await el.querySelector( "span > div > div.a-section.octopus-dlp-asin-info-section > div.a-row.octopus-dlp-price > span.octopus-widget-price-saving-info > span.a-size-mini.a-color-tertiary.octopus-widget-strike-through-price.a-text-strike" ); oldPrice = oldPrice ? parseFloat( oldPrice.textContent.replace(currency.textContent, "") ) : ""; // RATING STARTS let sRating = await el.querySelector( " span > div > div.a-section.octopus-dlp-asin-info-section > div:nth-child(2) > i > span" ); let rating = { rating: "", rated_out_of: "", }; if (sRating) { sRating = sRating.textContent; let ratingValeus = sRating.split(" "); rating.rating = parseFloat(ratingValeus[0]); rating.rated_out_of = parseFloat(ratingValeus[3]); } // RATING ENDS data.push({ link: link, name: name, images: { image_url: image, }, offer: offer, currency: currency, currentPrice: cuPrice + cuPriceFraction, oldPrice: oldPrice, rating: rating, }); }); return data; }); let hasMoved = false; for (let i = 0; i < subProducts.length; i++) { hasMoved = false; try { await page.goto(subProducts[i].link, { waitUntil: "load", timeout: 900000, }); hasMoved = true; } catch (e) { fs.appendFileSync("error.log", e + "\n"); continue; } let newProduct = []; try { let item = await getSingleProductDetails(); if (item) { newProduct.push(item); } } catch (e) { fs.appendFileSync("amazon-error.log", e + "\n"); continue; } subProducts[i]["subProducts"] = [...newProduct]; if (hasMoved) { await page.goBack(); } } return subProducts; } await page.close(); await browser.close(); // SCRAPING ENDED HERE let items = { url: "https://www.amazon.ae", categories: [...categories], }; fs.writeFileSync("amazon.json", JSON.stringify(items)); return resolve(); } catch (e) { console.log("errror --------- ", e); fs.appendFileSync("amazon-error.log", e + "\n"); return reject(e); } }); } run();
Editor is loading...