Untitled
unknown
python
a year ago
1.4 kB
1
Indexable
Never
import pandas as pd #read and append data df_aapl = pd.read_csv('aapl.csv') df_tsla = pd.read_csv('tsla.csv') df = df_aapl.append(df_tsla) #clear columns df = df.drop(columns=['<PER>', '<HIGH>', '<LOW>', '<CLOSE>', '<OPENINT>'], axis = 1) df = df.rename(columns={'<TICKER>': 'symbol', '<DATE>': 'date', '<TIME>': 'time', '<OPEN>': 'price', '<VOL>': 'vol'}) df['symbol'] = df['symbol'].str.rstrip('.US') #Create datetime based on columns df['datetime'] = pd.to_datetime(df['date'].astype(str) + df['time'].astype(str), format='%Y%m%d%H%M%S') df = df.drop(columns=['date', 'time']) #Forward fill missing values df['price'] = df['price'].fillna(method='ffill') #Remove rows exceeding rule price-avg(price > 3 * sigma) stock_stats = df.groupby('symbol').agg(['mean', 'std'])['price'] for stock in df['symbol'].unique(): avg = stock_stats.loc[stock]['mean'] std = stock_stats.loc[stock]['std'] outlier = df.loc[(df['symbol'] == stock) & (df['price'] - avg > 2 * std)] df = df.drop(outlier.index) #Create multiindex table with requested values ax = df.groupby(['symbol', pd.Grouper(key='datetime', freq='D')])['price'].agg(['min', 'first', 'last', 'max']) ax.rename(columns={'min': 'Low', 'first': 'Open', 'last': 'Close', 'max': 'High'}, inplace=True) print(ax)