Untitled
unknown
javascript
3 years ago
18 kB
23
Indexable
const puppeteer = require("puppeteer");
const fs = require("fs");
const axios = require("axios");
function run() {
return new Promise(async (resolve, reject) => {
try {
// GETTING IN
const browser = await puppeteer.launch({
// headless: false,
defaultViewport: false,
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--disable-accelerated-2d-canvas",
"--no-first-run",
"--no-zygote",
"--single-process",
"--disable-gpu",
"--ignore-certificate-errors",
],
});
const page = await browser.newPage();
const client = await page.target().createCDPSession();
await page.setCacheEnabled(true);
const { width, height } = {
width: 2300,
// define height max
height: 6000,
};
await page.setViewport({ width, height });
await page.goto("https://www.amazon.ae/deals?ref_=nav_cs_gb", {
waitUntil: "networkidle2",
timeout: 900000,
});
// await page.waitForNavigation()
// SITE LOADED
// GET ALL CATEGORY DETAILS
let categories = await page.$$eval(
"#anonCarousel1 > ol > li > a",
(items) => {
return items.map((item) => {
return {
// select last span
name: item.querySelector("span:last-of-type").innerText,
link: item.href,
dataTestId: item.getAttribute("data-testid"),
image_url: item.querySelector(
"span.GridPresets-module__gridPresetImageSection_2p68sRHExZZwCJorBe2_N3 > img"
).src,
products: [],
};
});
}
);
categories.shift();
let isOnMainPage = true;
let products = [];
// GOING TO SINGLE CATEGORY DETAIL PAGE
for (let i = 18; i < categories.length; i++) {
products = [];
console.log("category in: ", i, " -- isOnMainPage: ", isOnMainPage);
// ENSURING WETHER WE ARE IN MAIN PAGE OR NOT
if (!isOnMainPage) {
try {
await page.goto("https://www.amazon.ae/deals?ref_=nav_cs_gb", {
waitUntil: "load",
timeout: 900000,
});
await page.waitForNavigation();
isOnMainPage = true;
} catch (e) {
fs.appendFileSync("amazon-error.log", e + "\n");
continue;
}
}
console.log("isOnMainPage finally: ", isOnMainPage);
// GOING CATEGORY LINK
try {
await page.click(`[data-testid="${categories[i]["dataTestId"]}"]`, {
waitUntil: "load",
timeout: 900000,
});
await page.waitForNavigation();
} catch (e) {
fs.appendFileSync("amazon-error.log", e + "\n");
continue;
}
// HERE WAITING FOR SELECTOR TO LOAD
try {
await page.waitForSelector(
"#grid-main-container > div.a-row.Grid-module__gridSection_1SEJTeTsU88s6aVeuuekAp > div > div",
{
visible: true,
timeout: 900000,
}
);
} catch (e) {
fs.appendFileSync("amazon-error.log", e + "\n");
continue;
}
// CHECK WHETHER THE CATEGORY HAS NO PRODUCT
const isEmptyProduct = await page.$('div[role="note"]');
if (isEmptyProduct) {
continue;
}
let isNextBtnVisible = true;
while (isNextBtnVisible) {
// THIS IS THE PRODUCT ROOT DIV NODE WHICH WILL CONTAIN ALL PRODUCT
let rootProducts = await page.evaluate(async (el) => {
// PARENT DIV
let productElements = await Array.from(
document.querySelectorAll(
"#grid-main-container > div.a-row.Grid-module__gridSection_1SEJTeTsU88s6aVeuuekAp > div > div"
)
);
let data = productElements.map((el) => {
// WE WILL SCRAPE INDIVIDUAL PRODUCT CART DETAILS FROM HERE
let pName = el.querySelector(
"div.DealGridItem-module__dealItemContent_1vFddcq1F8pUxM8dd9FW32 > div > div > a:nth-child(3) > div"
);
let offer = el.querySelector(
"div > div > div > a.a-size-mini.a-link-normal.DealLink-module__dealLink_3v4tPYOP4qJj9bdiy0xAT.a-color-base.a-text-normal > div:nth-child(1) > div"
);
let link = el.querySelector(
"div.DealGridItem-module__dealItemContent_1vFddcq1F8pUxM8dd9FW32 > div > div > a:nth-child(3)"
);
let price = el.querySelector(
"div > div > div > span > span > span:nth-child(2) > span.a-price-whole"
);
let image = el.querySelector("div > div > a > div > div > img");
return {
name: pName ? pName.textContent : "",
image: image
? {
image_url: image.src,
}
: "",
offer: offer ? offer.textContent : "",
link: link ? link.href : "",
price: price ? price.textContent : "",
};
});
return data;
});
// SUB PRODUCTS STARTS FROM HERE
for (j = 0; j < rootProducts.length; j++) {
rootProducts[j].subProducts = [];
let hasMoved = false;
try {
await page.goto(rootProducts[j].link, {
waitUntil: "load",
timeout: 900000,
});
hasMoved = true;
} catch (e) {
fs.appendFileSync("amazon-error.log", e + "\n");
continue;
}
let title = await page.$("#productTitle");
// HERE WE ARE DIFFERENTIATING PAGE BY REALIZING TITLE ID SELECTOR
if (title) {
// THIS PAGE WILL HAVE DIRECT SINGLE PRODUCT DETAILS
try {
let item = await getSingleProductDetails();
if (item) products = [...products, item];
} catch (e) {
fs.appendFileSync("amazon-error.log", e + "\n");
continue;
}
}
// else {
// // THIS PAGE WILL HAVE MULTIPLE PRODUCT DETAILS(SUBPRODUCTS)
// try {
// let items = await getSubProductDetails();
// if (items?.length) {
// products = [...products,...items];
// }
// } catch (e) {
// fs.appendFileSync("amazon-error.log", e + "\n");
// continue;
// }
// }
if (hasMoved) {
await page.goBack();
}
}
// GOING BACK TO MAIN CATEGORY PAGE
try {
await page.goto("https://www.amazon.ae/deals?ref_=nav_cs_gb", {
waitUntil: "load",
timeout: 900000,
});
isOnMainPage = true;
} catch (e) {
isOnMainPage = false;
fs.appendFileSync("amazon-error.log", e + "\n");
break;
}
// -----------HANDLING NEXT BUTTON-------------
await page.waitForSelector("li.a-last", {
visible: true,
timeout: 900000,
});
isNextBtnVisible = (await page.$("li.a-disabled.a-last")) === null;
if (!isNextBtnVisible) {
break;
}
let nextBtn = await page.$("li.a-last");
if (nextBtn) {
await nextBtn.click();
await page.waitForNavigation({
waitUntil: "load",
timeout: 900000,
});
} else break;
// -----------HANDLING NEXT BUTTON-------------
}
categories[i]["products"] = products;
console.log("cat " + i + " done");
}
// THIS FUNCTION IS FOR SCRAPING SINGLE PRODUCT DETAILS
async function getSingleProductDetails() {
let subProduct = await page.evaluate(async () => {
let sTitle = await document.querySelector("#productTitle");
let sOffer = await document.querySelector(
"#corePriceDisplay_desktop_feature_div > div.a-section.a-spacing-none.aok-align-center > span.a-size-large.a-color-price.savingPriceOverride.aok-align-center.reinventPriceSavingsPercentageMargin.savingsPercentage"
);
let sCuPrice = await document.querySelector(
"#corePriceDisplay_desktop_feature_div > div.a-section.a-spacing-none.aok-align-center > span.a-price.aok-align-center.reinventPricePriceToPayMargin.priceToPay > span:nth-child(2) > span.a-price-whole"
);
let sCuPriceFra = await document.querySelector(
"#corePriceDisplay_desktop_feature_div > div.a-section.a-spacing-none.aok-align-center > span.a-price.aok-align-center.reinventPricePriceToPayMargin.priceToPay > span:nth-child(2) > span.a-price-fraction"
);
let currency = await document.querySelector(
"#corePriceDisplay_desktop_feature_div > div.a-section.a-spacing-none.aok-align-center > span.a-price.aok-align-center.reinventPricePriceToPayMargin.priceToPay > span:nth-child(2) > span.a-price-symbol"
);
let oPrice = await document.querySelector(
"#corePriceDisplay_desktop_feature_div > div.a-section.a-spacing-small.aok-align-center > span > span.a-size-small.a-color-secondary.aok-align-center.basisPrice > span > span.a-offscreen"
);
let pColor = await document.querySelector(
"#variation_color_name > div > span"
);
let details = await Array.from(
document.querySelectorAll(
"#productOverview_feature_div > div > table > tbody > tr"
)
);
let detailsData = details.map(async (el) => {
// await el.waitForSelector("td.a-span3 > span", {visible: true, timeout: 900000})
let title = await el.querySelector("td.a-span3 > span");
let description = await el.querySelector("td.a-span9 > span");
return {
title: title ? title.innerText : "",
description: description ? description.innerText : "",
};
});
let image = await document.querySelector("#landingImage");
let summaries = await Array.from(
document.querySelectorAll("#feature-bullets > ul > li")
).map((item) => {
description: item.querySelector("span").textContent;
});
let sRating = await document.querySelector(
"#acrPopover > span.a-declarative > a > i.a-icon.a-icon-star > span"
);
let rating = {
rating: "",
rated_out_of: "",
};
if (sRating) {
sRating = sRating.textContent;
let ratingValeus = sRating.split(" ");
rating.rating = parseFloat(ratingValeus[0]);
rating.rated_out_of = parseFloat(ratingValeus[3]);
}
return {
title: sTitle ? sTitle.textContent : "",
rating: rating,
offer: sOffer ? sOffer.textContent : "",
currentPrice: sCuPrice
? sCuPrice.textContent + sCuPriceFra.textContent
: "",
oldPrice: oPrice
? parseFloat(oPrice.textContent.replace(currency.textContent, ""))
: "",
currency: currency ? currency.textContent : "",
details: detailsData,
color: pColor ? pColor.textContent : "",
summary: summaries,
images: image
? {
image_url: image.src,
}
: "",
};
});
return subProduct;
}
// THIS FUNCTION WILL SRAPE PRODUCT DATA WHICH WILL HAVE MULTIPLE SUB PRODUCT
async function getSubProductDetails() {
let subProducts = await page.evaluate(async () => {
let items = await Array.from(
document.querySelectorAll("#octopus-dlp-asin-stream > ul > li")
);
let data = [];
items.forEach(async (el) => {
let name = await el.querySelector(
"span > div > div.a-section.octopus-dlp-asin-info-section > div.a-section.octopus-dlp-asin-title > a"
);
let link = name ? name.href : "";
name = name ? name.innerText : "";
let image = await el.querySelector(
"span > div > div.a-section.a-spacing-base.a-text-center.octopus-dlp-image-section > a > img"
);
image = image ? image.src : "";
// OFFER STARTS
let offerLeft = await el.querySelector(
"span > div > div.a-section.octopus-dlp-asin-info-section > div.a-row.a-spacing-mini.a-grid-vertical-align.a-grid-center > div > div.a-size-mini.oct-deal-badge-element.oct-deal-badge-label > span:nth-child(1)"
);
let offerRight = await el.querySelector(
"span > div > div.a-section.octopus-dlp-asin-info-section > div.a-row.a-spacing-mini.a-grid-vertical-align.a-grid-center > div > div.a-size-mini.oct-deal-badge-element.oct-deal-badge-label > span:nth-child(2)"
);
let offer = "";
if (offerLeft && offerRight) {
offer = offerLeft.textContent + offerRight.textContent;
}
// OFFER ENDS
let currency = await el.querySelector(
"span > div > div.a-section.octopus-dlp-asin-info-section > div.a-row.octopus-dlp-price > span.a-price.octopus-widget-price > span:nth-child(2) > span.a-price-symbol"
);
currency = currency ? currency.textContent : "";
let cuPrice = await el.querySelector(
"span > div > div.a-section.octopus-dlp-asin-info-section > div.a-row.octopus-dlp-price > span.a-price.octopus-widget-price > span:nth-child(2) > span.a-price-whole"
);
cuPrice = cuPrice ? cuPrice.textContent : "";
let cuPriceFraction = await el.querySelector(
"span > div > div.a-section.octopus-dlp-asin-info-section > div.a-row.octopus-dlp-price > span.a-price.octopus-widget-price > span:nth-child(2) > span.a-price-fraction"
);
cuPriceFraction = cuPriceFraction
? cuPriceFraction.textContent
: "";
let oldPrice = await el.querySelector(
"span > div > div.a-section.octopus-dlp-asin-info-section > div.a-row.octopus-dlp-price > span.octopus-widget-price-saving-info > span.a-size-mini.a-color-tertiary.octopus-widget-strike-through-price.a-text-strike"
);
oldPrice = oldPrice
? parseFloat(
oldPrice.textContent.replace(currency.textContent, "")
)
: "";
// RATING STARTS
let sRating = await el.querySelector(
" span > div > div.a-section.octopus-dlp-asin-info-section > div:nth-child(2) > i > span"
);
let rating = {
rating: "",
rated_out_of: "",
};
if (sRating) {
sRating = sRating.textContent;
let ratingValeus = sRating.split(" ");
rating.rating = parseFloat(ratingValeus[0]);
rating.rated_out_of = parseFloat(ratingValeus[3]);
}
// RATING ENDS
data.push({
link: link,
name: name,
images: {
image_url: image,
},
offer: offer,
currency: currency,
currentPrice: cuPrice + cuPriceFraction,
oldPrice: oldPrice,
rating: rating,
});
});
return data;
});
let hasMoved = false;
for (let i = 0; i < subProducts.length; i++) {
hasMoved = false;
try {
await page.goto(subProducts[i].link, {
waitUntil: "load",
timeout: 900000,
});
hasMoved = true;
} catch (e) {
fs.appendFileSync("error.log", e + "\n");
continue;
}
let newProduct = [];
try {
let item = await getSingleProductDetails();
if (item) {
newProduct.push(item);
}
} catch (e) {
fs.appendFileSync("amazon-error.log", e + "\n");
continue;
}
subProducts[i]["subProducts"] = [...newProduct];
if (hasMoved) {
await page.goBack();
}
}
return subProducts;
}
await page.close();
await browser.close();
// SCRAPING ENDED HERE
let items = {
url: "https://www.amazon.ae",
categories: [...categories],
};
fs.writeFileSync("amazon.json", JSON.stringify(items));
return resolve();
} catch (e) {
console.log("errror --------- ", e);
fs.appendFileSync("amazon-error.log", e + "\n");
return reject(e);
}
});
}
run();
Editor is loading...