Untitled
unknown
plain_text
a year ago
1.5 kB
2
Indexable
Never
import requests import pandas as pd import yfinance as yf from bs4 import BeautifulSoup def get_spx_companies(): # Get the wikipedia page with the S&P500 companies url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies' html = requests.get(url).content soup = BeautifulSoup(html, 'html.parser') # Find the table with the company information and extract the tickers table = soup.find('table', {'class': 'wikitable sortable'}) tickers = [] for row in table.findAll('tr')[1:]: ticker = row.findAll('td')[0].text.strip() tickers.append(ticker) return tickers spx_tickers = get_spx_companies() # Get company metadata company_metadata = { ticker: yf.Ticker(ticker).info for ticker in spx_tickers } # Convert company metadata into pandas dataframe def get_company_keywords(company_metadata): MAX_OFFICERS = 3 result = [] for k,v in company_metadata.items(): ticker = k if 'shortName' not in v: company_name = '' else: company_name = v['shortName'] officers = [] if 'companyOfficers' in v: curr_officers = v['companyOfficers'] for i in range(min(MAX_OFFICERS, len(curr_officers))): officers.append(curr_officers[i]['name']) for j in range(len(officers), 3): officers += [''] curr_result = [ticker, company_name, *officers] result.append(curr_result) return pd.DataFrame(result, columns = ['ticker', 'company', 'officer1', 'officer2', 'officer3']) keyword_df = get_company_keywords(company_metadata) keyword_df.head(5)