Untitled
unknown
plain_text
2 years ago
946 B
2
Indexable
import csv from urllib.request import urlopen from bs4 import BeautifulSoup # specify the URL of the website you want to scrape print('Enter your url:') url = input() # url = 'https://www.example.com' # open the website and read the HTML content html = urlopen(url) soup = BeautifulSoup(html, 'html.parser') # create an empty list to store the email addresses emails = [] # find all the `a` tags that contain email addresses for a in soup.find_all('a'): # get the `href` attribute of the `a` tag email = a.get('href') # check if the `href` attribute starts with 'mailto:' if email and email.startswith('mailto:'): # remove the 'mailto:' prefix and append the email address to the list emails.append(email[7:]) # write the email addresses to a CSV file with open('emails.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerows([[e] for e in emails])
Editor is loading...