scraper.js
unknown
javascript
4 years ago
1.7 kB
3
Indexable
const cheerio = require("cheerio"); const superagent = require("superagent"); var axios = superagent.agent(); const siteUrl = "https://www.cermati.com"; const link = new Set(); const fetchData = async (link) => { const result = await axios.get(siteUrl + link); return cheerio.load(result.text); }; const getResults = async () => { const $ = await fetchData('/artikel'); $('div.list-of-articles').each((i, value) => { $(value).find('div.article-list-item').each((j, data) => { link.add($(data).find('a').attr('href')) }); }); return [...link].sort() }; const getResultDetails = async (link) => { const $ = await fetchData(link); let detailContent = []; $('section.post-content').each((i, detail) => { const title = $(detail).find('h1.post-title').text().trim() const author = $(detail).find('span.author-name').text().trim() const postingDate = $(detail).find('span.post-date').children('span').text().trim() detailContent.push({ "link" : siteUrl+link, title, author, postingDate }) }); $('div.col-lg-3 .margin-bottom-30').each((i, sidebar) => { if (i == 1) { const temp = {relatedArticles:[]}; $(sidebar).find('ul.panel-items-list li').each((i, related) => { const urlRelated = siteUrl+$(related).find('li>a').attr('href'); const titleRelated = $(related).find('li a').children('h5.item-title').text(); temp.relatedArticles.push({ "url" : urlRelated, "title" : titleRelated }) }); detailContent.push(temp) } }); return detailContent; } module.exports = { getResults, getResultDetails };
Editor is loading...