Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
1.2 kB
2
Indexable
Never
#%%
import os
import requests
from bs4 import BeautifulSoup
import re

f = open("nabird_classes.txt", 'r')
lines = f.readlines()

new_name = []
for line in lines:
    if '\n' in line:
        line = line[:-1]
    id, name = line.split(' ', 1)
    if ',' in name or ' and' in name:
        continue
    if '(' in name:
        cut_idx = name.index('(')
        name = name[:cut_idx-1]
    
    new_name.append(name)
print(len(new_name))
# %%
print(new_name)
# %%
search_term = 'Dark-eyed+Junco'  # change this to your desired search term

# build the search URL
url = f'https://www.allaboutbirds.org/news/search/?q={search_term}'

# send a GET request to the search URL
response = requests.get(url, stream=True)

# parse the HTML content of the response using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
print(soup)
# find all the search result items on the page
search_results = soup.find_all('div', class_='species-search-results')

# print the titles and URLs of the search result items
for result in search_results:
    title = result.find('a', class_='article-item-link').text.strip()
    print(title)
    # url = result.find('a', class_='search-result__link')['href']
    # print(f'Title: {title}\nURL: {url}\n')
# %%