Untitled
unknown
python
3 years ago
1.7 kB
11
Indexable
import requests
import pandas as pd
import yfinance as yf
from bs4 import BeautifulSoup
def get_spx_companies():
# Get the wikipedia page with the S&P500 companies
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
html = requests.get(url).content
soup = BeautifulSoup(html, 'html.parser')
# Find the table with the company information and extract the tickers
table = soup.find('table', {'class': 'wikitable sortable'})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[0].text.strip()
tickers.append(ticker)
return tickers
tickers_of_interest = get_spx_companies()
# Add additional tickers which tend to be popular (meme stocks)
tickers_of_interest += ['BBBY', 'GME', 'AMC']
# Get company metadata
company_metadata = {
ticker: yf.Ticker(ticker).info for ticker in tickers_of_interest
}
# Convert company metadata into pandas dataframe
def get_company_keywords(company_metadata):
MAX_OFFICERS = 3
result = []
for k,v in company_metadata.items():
ticker = k
if 'shortName' not in v:
company_name = ''
else:
company_name = v['shortName']
officers = []
if 'companyOfficers' in v:
curr_officers = v['companyOfficers']
for i in range(min(MAX_OFFICERS, len(curr_officers))):
officers.append(curr_officers[i]['name'])
for j in range(len(officers), 3):
officers += ['']
curr_result = [ticker, company_name, *officers]
result.append(curr_result)
return pd.DataFrame(result, columns = ['ticker', 'company', 'officer1', 'officer2', 'officer3'])
keyword_df = get_company_keywords(company_metadata)
print(keyword_df.shape)
keyword_df.head(5)Editor is loading...