Untitled

 avatar
user_3665552
plain_text
2 years ago
787 B
4
Indexable
import requests
from bs4 import BeautifulSoup
import random
from icecream import ic
import q

def scrapeWikiArticle(url):
    response = requests.get(
        url=url,
    )
    soup = BeautifulSoup(response.content, 'html.parser')
    title = soup.find(id="firstHeading")
    print(title.text)
    
    allLinks = soup.find(id="bodyContent").find_all("a")
    random.shuffle(allLinks)
    linkToScrape = 0
    
    for link in allLinks:
        if link['href'].find("/wiki/") == -1: 
            continue
        linkToScrape = link
        break
    scrapeWikiArticle("https://en.wikipedia.org" + linkToScrape['href'])
    return scrapeWikiArticle

content = scrapeWikiArticle("https://en.wikipedia.org/wiki/Web_scraping")

with open("output.txt", "w") as file:
    file.write(content)