Untitled

mail@pastecode.io avatar
unknown
python
7 months ago
966 B
1
Indexable
Never
import requests
from bs4 import BeautifulSoup

url = 'https://vnexpress.net/'
try:
    response = requests.get(url)
    response.raise_for_status()
except requests.exceptions.HTTPError as http_err:
    print(f"Có lỗi HTTP: {http_err}")
except requests.exceptions.RequestException as req_err:
    print(f"Có lỗi trong quá trình gửi yêu cầu: {req_err}")
else:
    soup = BeautifulSoup(response.text, 'html.parser')
    title_tags = soup.find_all(class_='title-news')
    for title_tag in title_tags:
        title_text = title_tag.get_text().strip()
        link = title_tag.a['href'] 
        print(title_text)
        sub_response = requests.get(link)
        sub_response.raise_for_status()
        sub_soup = BeautifulSoup(sub_response.text, 'html.parser')
        content = sub_soup.find(class_='fck_detail').get_text()  
        content = content.replace('\n', '') 
        print(f'{content} \n')