Untitled
user_3665552
plain_text
2 years ago
787 B
4
Indexable
import requests from bs4 import BeautifulSoup import random from icecream import ic import q def scrapeWikiArticle(url): response = requests.get( url=url, ) soup = BeautifulSoup(response.content, 'html.parser') title = soup.find(id="firstHeading") print(title.text) allLinks = soup.find(id="bodyContent").find_all("a") random.shuffle(allLinks) linkToScrape = 0 for link in allLinks: if link['href'].find("/wiki/") == -1: continue linkToScrape = link break scrapeWikiArticle("https://en.wikipedia.org" + linkToScrape['href']) return scrapeWikiArticle content = scrapeWikiArticle("https://en.wikipedia.org/wiki/Web_scraping") with open("output.txt", "w") as file: file.write(content)
Editor is loading...