Untitled

mail@pastecode.io avatar
unknown
javascript
2 years ago
2.5 kB
3
Indexable
Never
const {chromium} = require('@playwright/test');

//forbidden elements are meta tags that has property robots with value noindex
const forbiddenElements = [
    'meta[name="robots"][content="noindex"]', 
    'meta[name="robots"][content="noindex, nofollow"]', 
    'meta[name="googlebot"][content="noindex"]', 
    'meta[name="googlebot"][content="noindex, nofollow"]',
    'meta[charSet="utf-8"]', //for testing if this apprach works
    `meta`, //for testing if this approach works

];

const testedUrls = [];

//function to load website, check for forbidden links, collect all valid links and crawl them to check for forbidden links
async function checkSiteForNoIndex(url, elementsSelectors= [], skipUrls = []){
    console.log(`Crawling ${url}`);
    //launch browser
    const browser = await chromium.launch();
    //create new page
    const page = await browser.newPage();
    //go to url
    await page.goto(url);
    //TEST CURRENT PAGE FOR FORBIDDEN ELEMENTS
    console.log(`page loaded: ${url}`)
    
    const faulty = await page.evaluate(
        async (lookFor) => {
            let foundFault = false;
            let faultyLink = {
                url: document.URL,
                forbiddenElements: []
            }
            //loop through forbidden elements
            for (const element of lookFor) {
                console.log(`looking for ${element}`)
                //get the elements that match the selector
                const elements = document.querySelectorAll(element);
                //get the url of the page
                const url = document.URL;
                //check if there are any elements
                if(elements.length > 0){
                    //if the element is present, log the url and the element
                    console.log(`Page ${url} has a ${element} element`);
                    //add the element to the faultyLink object
                    faultyLink.forbiddenElements.push(element);
                    
                }
                else {
                    //if the element is not present, log the url and the element
                    console.log(`Page ${url} does not have a ${element} element`);
                }
            }
            return faultyLink;
        }, elementsSelectors
    );
    console.log(faulty);
    await browser.close();

}

checkSiteForNoIndex('https://www.24mx.ie/',forbiddenElements);