Untitled
unknown
python
2 years ago
1.4 kB
8
Indexable
import pandas as pd
#read and append data
df_aapl = pd.read_csv('aapl.csv')
df_tsla = pd.read_csv('tsla.csv')
df = df_aapl.append(df_tsla)
#clear columns
df = df.drop(columns=['<PER>', '<HIGH>', '<LOW>', '<CLOSE>', '<OPENINT>'], axis = 1)
df = df.rename(columns={'<TICKER>': 'symbol',
'<DATE>': 'date',
'<TIME>': 'time',
'<OPEN>': 'price',
'<VOL>': 'vol'})
df['symbol'] = df['symbol'].str.rstrip('.US')
#Create datetime based on columns
df['datetime'] = pd.to_datetime(df['date'].astype(str) + df['time'].astype(str), format='%Y%m%d%H%M%S')
df = df.drop(columns=['date', 'time'])
#Forward fill missing values
df['price'] = df['price'].fillna(method='ffill')
#Remove rows exceeding rule price-avg(price > 3 * sigma)
stock_stats = df.groupby('symbol').agg(['mean', 'std'])['price']
for stock in df['symbol'].unique():
avg = stock_stats.loc[stock]['mean']
std = stock_stats.loc[stock]['std']
outlier = df.loc[(df['symbol'] == stock) & (df['price'] - avg > 2 * std)]
df = df.drop(outlier.index)
#Create multiindex table with requested values
ax = df.groupby(['symbol', pd.Grouper(key='datetime', freq='D')])['price'].agg(['min', 'first', 'last', 'max'])
ax.rename(columns={'min': 'Low',
'first': 'Open',
'last': 'Close',
'max': 'High'}, inplace=True)
print(ax)Editor is loading...