Untitled

mail@pastecode.io avatar
unknown
python
a year ago
1.4 kB
1
Indexable
Never

import pandas as pd

#read and append data
df_aapl = pd.read_csv('aapl.csv')
df_tsla = pd.read_csv('tsla.csv')

df = df_aapl.append(df_tsla)

#clear columns
df = df.drop(columns=['<PER>', '<HIGH>', '<LOW>', '<CLOSE>', '<OPENINT>'], axis = 1)
df = df.rename(columns={'<TICKER>': 'symbol',
                   '<DATE>': 'date',
                   '<TIME>': 'time',
                   '<OPEN>': 'price',
                   '<VOL>': 'vol'})
df['symbol'] = df['symbol'].str.rstrip('.US')

#Create datetime based on columns
df['datetime'] = pd.to_datetime(df['date'].astype(str) + df['time'].astype(str), format='%Y%m%d%H%M%S')
df = df.drop(columns=['date', 'time'])

#Forward fill missing values
df['price'] = df['price'].fillna(method='ffill')

#Remove rows exceeding rule price-avg(price > 3 * sigma)
stock_stats = df.groupby('symbol').agg(['mean', 'std'])['price']
for stock in df['symbol'].unique():
    avg = stock_stats.loc[stock]['mean']
    std = stock_stats.loc[stock]['std']
    outlier = df.loc[(df['symbol'] == stock) & (df['price'] - avg > 2 * std)]  
    df = df.drop(outlier.index)

#Create multiindex table with requested values
ax = df.groupby(['symbol', pd.Grouper(key='datetime', freq='D')])['price'].agg(['min', 'first', 'last', 'max'])
ax.rename(columns={'min': 'Low',
                   'first': 'Open',
                   'last': 'Close',
                   'max': 'High'}, inplace=True)
print(ax)