Untitled
unknown
plain_text
a year ago
1.2 kB
2
Indexable
Never
#%% import os import requests from bs4 import BeautifulSoup import re f = open("nabird_classes.txt", 'r') lines = f.readlines() new_name = [] for line in lines: if '\n' in line: line = line[:-1] id, name = line.split(' ', 1) if ',' in name or ' and' in name: continue if '(' in name: cut_idx = name.index('(') name = name[:cut_idx-1] new_name.append(name) print(len(new_name)) # %% print(new_name) # %% search_term = 'Dark-eyed+Junco' # change this to your desired search term # build the search URL url = f'https://www.allaboutbirds.org/news/search/?q={search_term}' # send a GET request to the search URL response = requests.get(url, stream=True) # parse the HTML content of the response using BeautifulSoup soup = BeautifulSoup(response.content, 'html.parser') print(soup) # find all the search result items on the page search_results = soup.find_all('div', class_='species-search-results') # print the titles and URLs of the search result items for result in search_results: title = result.find('a', class_='article-item-link').text.strip() print(title) # url = result.find('a', class_='search-result__link')['href'] # print(f'Title: {title}\nURL: {url}\n') # %%