Untitled
unknown
python
3 years ago
1.3 kB
9
Indexable
import requests
from bs4 import BeautifulSoup
from lxml import etree
base_url = "https://www.reclameaqui.com.br/"
header= {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64'}
def iosearch():
global header, base_url
iosearch_url = "https://www.reclameaqui.com.br/empresa/santander/lista-reclamacoes/?pagina=1"
html_content = requests.get(iosearch_url, headers = header, timeout=5 ).text
soup = BeautifulSoup(html_content, "html.parser")
dom = etree.HTML(str(soup))
number_of_pages = int(dom.xpath('//*[@id="__next"]/div[1]/div[1]/div[3]/main/section[2]/div[2]/div[2]/div[11]/ul/li[8]')[0].text)
for page in range(1, number_of_pages + 1):
print(page)
iosearch_url = f"https://www.reclameaqui.com.br/empresa/santander/lista-reclamacoes/?pagina={page}"
html_content = requests.get(iosearch_url, headers = header).text
soup = BeautifulSoup(html_content, "html.parser")
with open('data/urls.txt', 'a', encoding='latin-1') as file:
for href in range(len(soup.findAll("div",{"class":"bJdtis"}))):
url = base_url + soup.findAll("div",{"class":"bJdtis"})[href].find_all('a')[0].get('href') + '\n'
file.write(url)
print("Created Json File!")
if __name__ == "__main__":
res = iosearch()
Editor is loading...