Untitled

mail@pastecode.io avatar
unknown
python
a year ago
1.7 kB
5
Indexable
Never
import requests
import pandas as pd
import yfinance as yf
from bs4 import BeautifulSoup

def get_spx_companies():
  # Get the wikipedia page with the S&P500 companies
  url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
  html = requests.get(url).content
  soup = BeautifulSoup(html, 'html.parser')

  # Find the table with the company information and extract the tickers
  table = soup.find('table', {'class': 'wikitable sortable'})
  tickers = []
  for row in table.findAll('tr')[1:]:
      ticker = row.findAll('td')[0].text.strip()
      tickers.append(ticker)

  return tickers
  
tickers_of_interest = get_spx_companies()

# Add additional tickers which tend to be popular (meme stocks)
tickers_of_interest += ['BBBY', 'GME', 'AMC']
 


# Get company metadata
company_metadata = {
    ticker: yf.Ticker(ticker).info for ticker in tickers_of_interest

}

# Convert company metadata into pandas dataframe
def get_company_keywords(company_metadata):
  MAX_OFFICERS = 3
  result = []
  for k,v in company_metadata.items():
    ticker = k
    if 'shortName' not in v:
      company_name = ''
    else:
      company_name = v['shortName']
    officers = []
    if 'companyOfficers' in v:
      curr_officers = v['companyOfficers']
      for i in range(min(MAX_OFFICERS, len(curr_officers))):
        officers.append(curr_officers[i]['name'])
    for j in range(len(officers), 3):
      officers += ['']
    
    curr_result = [ticker, company_name, *officers]
    result.append(curr_result)
  
  return pd.DataFrame(result, columns = ['ticker', 'company', 'officer1', 'officer2', 'officer3'])

keyword_df = get_company_keywords(company_metadata)
print(keyword_df.shape)
keyword_df.head(5)