Untitled
unknown
javascript
2 years ago
2.5 kB
3
Indexable
Never
const {chromium} = require('@playwright/test'); //forbidden elements are meta tags that has property robots with value noindex const forbiddenElements = [ 'meta[name="robots"][content="noindex"]', 'meta[name="robots"][content="noindex, nofollow"]', 'meta[name="googlebot"][content="noindex"]', 'meta[name="googlebot"][content="noindex, nofollow"]', 'meta[charSet="utf-8"]', //for testing if this apprach works `meta`, //for testing if this approach works ]; const testedUrls = []; //function to load website, check for forbidden links, collect all valid links and crawl them to check for forbidden links async function checkSiteForNoIndex(url, elementsSelectors= [], skipUrls = []){ console.log(`Crawling ${url}`); //launch browser const browser = await chromium.launch(); //create new page const page = await browser.newPage(); //go to url await page.goto(url); //TEST CURRENT PAGE FOR FORBIDDEN ELEMENTS console.log(`page loaded: ${url}`) const faulty = await page.evaluate( async (lookFor) => { let foundFault = false; let faultyLink = { url: document.URL, forbiddenElements: [] } //loop through forbidden elements for (const element of lookFor) { console.log(`looking for ${element}`) //get the elements that match the selector const elements = document.querySelectorAll(element); //get the url of the page const url = document.URL; //check if there are any elements if(elements.length > 0){ //if the element is present, log the url and the element console.log(`Page ${url} has a ${element} element`); //add the element to the faultyLink object faultyLink.forbiddenElements.push(element); } else { //if the element is not present, log the url and the element console.log(`Page ${url} does not have a ${element} element`); } } return faultyLink; }, elementsSelectors ); console.log(faulty); await browser.close(); } checkSiteForNoIndex('https://www.24mx.ie/',forbiddenElements);