scrape.js
stuff.url is undefined, but why?unknown
javascript
a year ago
2.6 kB
10
Indexable
Never
async function fetch(page) { while (true) { // const content = await page.$$eval('article div[lang]', (tweets) => tweets.map((tweet) => tweet.textContent)); const stuff = await page.$$eval('article', (tweets) => tweets.map((tweet) => { try { if (!tweet || tweet == null) return 0; if ( !tweet.querySelectorAll("time") || tweet.querySelectorAll("time") == null || !tweet.outerHTML ) return 0; //fallback mechanism to enter null if selector cannot retreive let _content = null; let _published = null; let _replies = null; let _retweets = null; let _likes = null; let _error = null; let _url = [...tweet.querySelectorAll("a")] .map((e) => e.getAttribute("href")) .filter((e) => e.includes("status") && !e.includes("photo"))[0]; try { //find tweetmap data _content = tweet.textContent; _published = tweet .querySelectorAll("time")[0] .getAttribute("datetime"); if (tweet.outerHTML.match("[0-9]+ .etweets")) _retweets = tweet.outerHTML .match("[0-9]+ .etweets")[0] .split(" ")[0]; else _retweets = tweet.outerHTML .match("[0-9]+ .etweet")[0] .split(" ")[0]; if (tweet.outerHTML.match("[0-9]+ .ikes")) _likes = tweet.outerHTML.match("[0-9]+ .ikes")[0].split(" ")[0]; else _likes = tweet.outerHTML.match("[0-9]+ .ike")[0].split(" ")[0]; if (tweet.outerHTML.match("[0-9]+ .eplies")) _replies = tweet.outerHTML .match("[0-9]+ .eplies")[0] .split(" ")[0]; else _replies = tweet.outerHTML .match("[0-9]+ .eply")[0] .split(" ")[0]; } catch (ex) { _error = ex.toString(); } //populate tweetmap return { url: _url, content: _content, published: _published, replies: _replies, retweets: _retweets, likes: _likes, error: _error, }; } catch (e) { console.log("puppeteer error"); console.log(e); return 0; } })); console.log(typeof stuff); // object console.log(stuff.url); // undefined } }