scrape.js

stuff.url is undefined, but why?
mail@pastecode.io avatar
unknown
javascript
a year ago
2.6 kB
10
Indexable
Never
async function fetch(page) {
  while (true) {
    // const content = await page.$$eval('article div[lang]', (tweets) => tweets.map((tweet) => tweet.textContent));
    const stuff = await page.$$eval('article', (tweets) =>
      tweets.map((tweet) => {
        try {
          if (!tweet || tweet == null) return 0;
          if (
            !tweet.querySelectorAll("time") ||
            tweet.querySelectorAll("time") == null ||
            !tweet.outerHTML
          )
            return 0;

          //fallback mechanism to enter null if selector cannot retreive

          let _content = null;
          let _published = null;
          let _replies = null;
          let _retweets = null;
          let _likes = null;
          let _error = null;
          let _url = [...tweet.querySelectorAll("a")]
            .map((e) => e.getAttribute("href"))
            .filter((e) => e.includes("status") && !e.includes("photo"))[0];

          try {
            //find tweetmap data

            _content = tweet.textContent;
            _published = tweet
              .querySelectorAll("time")[0]
              .getAttribute("datetime");
            if (tweet.outerHTML.match("[0-9]+ .etweets"))
              _retweets = tweet.outerHTML
                .match("[0-9]+ .etweets")[0]
                .split(" ")[0];
            else
              _retweets = tweet.outerHTML
                .match("[0-9]+ .etweet")[0]
                .split(" ")[0];
            if (tweet.outerHTML.match("[0-9]+ .ikes"))
              _likes = tweet.outerHTML.match("[0-9]+ .ikes")[0].split(" ")[0];
            else
              _likes = tweet.outerHTML.match("[0-9]+ .ike")[0].split(" ")[0];
            if (tweet.outerHTML.match("[0-9]+ .eplies"))
              _replies = tweet.outerHTML
                .match("[0-9]+ .eplies")[0]
                .split(" ")[0];
            else
              _replies = tweet.outerHTML
                .match("[0-9]+ .eply")[0]
                .split(" ")[0];
          } catch (ex) {
            _error = ex.toString();
          }
          //populate tweetmap

          return {
            url: _url,
            content: _content,
            published: _published,
            replies: _replies,
            retweets: _retweets,
            likes: _likes,
            error: _error,
          };
        } catch (e) {
          console.log("puppeteer error");
          console.log(e);
          return 0;
        }
      }));

    console.log(typeof stuff); // object
    console.log(stuff.url); // undefined
  }
}