Untitled
unknown
javascript
3 years ago
2.5 kB
14
Indexable
const {chromium} = require('@playwright/test');
//forbidden elements are meta tags that has property robots with value noindex
const forbiddenElements = [
'meta[name="robots"][content="noindex"]',
'meta[name="robots"][content="noindex, nofollow"]',
'meta[name="googlebot"][content="noindex"]',
'meta[name="googlebot"][content="noindex, nofollow"]',
'meta[charSet="utf-8"]', //for testing if this apprach works
`meta`, //for testing if this approach works
];
const testedUrls = [];
//function to load website, check for forbidden links, collect all valid links and crawl them to check for forbidden links
async function checkSiteForNoIndex(url, elementsSelectors= [], skipUrls = []){
console.log(`Crawling ${url}`);
//launch browser
const browser = await chromium.launch();
//create new page
const page = await browser.newPage();
//go to url
await page.goto(url);
//TEST CURRENT PAGE FOR FORBIDDEN ELEMENTS
console.log(`page loaded: ${url}`)
const faulty = await page.evaluate(
async (lookFor) => {
let foundFault = false;
let faultyLink = {
url: document.URL,
forbiddenElements: []
}
//loop through forbidden elements
for (const element of lookFor) {
console.log(`looking for ${element}`)
//get the elements that match the selector
const elements = document.querySelectorAll(element);
//get the url of the page
const url = document.URL;
//check if there are any elements
if(elements.length > 0){
//if the element is present, log the url and the element
console.log(`Page ${url} has a ${element} element`);
//add the element to the faultyLink object
faultyLink.forbiddenElements.push(element);
}
else {
//if the element is not present, log the url and the element
console.log(`Page ${url} does not have a ${element} element`);
}
}
return faultyLink;
}, elementsSelectors
);
console.log(faulty);
await browser.close();
}
checkSiteForNoIndex('https://www.24mx.ie/',forbiddenElements);Editor is loading...