Untitled
unknown
plain_text
3 years ago
946 B
3
Indexable
import csv
from urllib.request import urlopen
from bs4 import BeautifulSoup
# specify the URL of the website you want to scrape
print('Enter your url:')
url = input()
# url = 'https://www.example.com'
# open the website and read the HTML content
html = urlopen(url)
soup = BeautifulSoup(html, 'html.parser')
# create an empty list to store the email addresses
emails = []
# find all the `a` tags that contain email addresses
for a in soup.find_all('a'):
# get the `href` attribute of the `a` tag
email = a.get('href')
# check if the `href` attribute starts with 'mailto:'
if email and email.startswith('mailto:'):
# remove the 'mailto:' prefix and append the email address to the list
emails.append(email[7:])
# write the email addresses to a CSV file
with open('emails.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerows([[e] for e in emails])Editor is loading...