scrape.js

Web scraping program to scrape tweets from an account then scroll down and repeat...
mail@pastecode.io avatar
unknown
javascript
a year ago
1.1 kB
19
Indexable
Never
const puppeteer = require('puppeteer-extra');

(async () => {

  const browser = await puppeteer.launch({
    executablePath: '/usr/bin/brave',
    headless: false,
    defaultViewport: null,
    ignoreDefaultArgs: ["--disable-extensions"],
    args: ["--start-maximized", "--no-sandbox", "--disable-setuid-sandbox"],
  });
  const page = await browser.newPage();

  await page.goto('https://twitter.com/CNN', { waitUntil: 'networkidle2' });

  await autoScroll(page);

  const results = await page.$$eval('article div[lang]', (tweets) => tweets.map((tweet) => tweet.textContent));
  console.log(results);

  browser.close();
})();

async function autoScroll(page) {
  await page.evaluate(async () => {
    await new Promise((resolve) => {
      var totalHeight = 0;
      var distance = 100;
      var timer = setInterval(() => {
        var scrollHeight = document.body.scrollHeight;
        window.scrollBy(0, distance);
        totalHeight += distance;

        if (totalHeight >= scrollHeight - window.innerHeight) {
          clearInterval(timer);
          resolve();
        }
      }, 400);
    });
  });
}