Untitled
unknown
plain_text
3 years ago
1.2 kB
12
Indexable
#%%
import os
import requests
from bs4 import BeautifulSoup
import re
f = open("nabird_classes.txt", 'r')
lines = f.readlines()
new_name = []
for line in lines:
if '\n' in line:
line = line[:-1]
id, name = line.split(' ', 1)
if ',' in name or ' and' in name:
continue
if '(' in name:
cut_idx = name.index('(')
name = name[:cut_idx-1]
new_name.append(name)
print(len(new_name))
# %%
print(new_name)
# %%
search_term = 'Dark-eyed+Junco' # change this to your desired search term
# build the search URL
url = f'https://www.allaboutbirds.org/news/search/?q={search_term}'
# send a GET request to the search URL
response = requests.get(url, stream=True)
# parse the HTML content of the response using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
print(soup)
# find all the search result items on the page
search_results = soup.find_all('div', class_='species-search-results')
# print the titles and URLs of the search result items
for result in search_results:
title = result.find('a', class_='article-item-link').text.strip()
print(title)
# url = result.find('a', class_='search-result__link')['href']
# print(f'Title: {title}\nURL: {url}\n')
# %%
Editor is loading...