scraper.js
unknown
javascript
5 years ago
1.7 kB
7
Indexable
const cheerio = require("cheerio");
const superagent = require("superagent");
var axios = superagent.agent();
const siteUrl = "https://www.cermati.com";
const link = new Set();
const fetchData = async (link) => {
const result = await axios.get(siteUrl + link);
return cheerio.load(result.text);
};
const getResults = async () => {
const $ = await fetchData('/artikel');
$('div.list-of-articles').each((i, value) => {
$(value).find('div.article-list-item').each((j, data) => {
link.add($(data).find('a').attr('href'))
});
});
return [...link].sort()
};
const getResultDetails = async (link) => {
const $ = await fetchData(link);
let detailContent = [];
$('section.post-content').each((i, detail) => {
const title = $(detail).find('h1.post-title').text().trim()
const author = $(detail).find('span.author-name').text().trim()
const postingDate = $(detail).find('span.post-date').children('span').text().trim()
detailContent.push({
"link" : siteUrl+link,
title,
author,
postingDate
})
});
$('div.col-lg-3 .margin-bottom-30').each((i, sidebar) => {
if (i == 1) {
const temp = {relatedArticles:[]};
$(sidebar).find('ul.panel-items-list li').each((i, related) => {
const urlRelated = siteUrl+$(related).find('li>a').attr('href');
const titleRelated = $(related).find('li a').children('h5.item-title').text();
temp.relatedArticles.push({
"url" : urlRelated,
"title" : titleRelated
})
});
detailContent.push(temp)
}
});
return detailContent;
}
module.exports = {
getResults,
getResultDetails
};Editor is loading...