Untitled
unknown
plain_text
a year ago
6.2 kB
3
Indexable
import os import pandas as pd import matplotlib.pyplot as plt import matplotlib.patches as patches import seaborn as sns import warnings import numpy as np from numpy import array from importlib import reload # to reload modules if we made changes to them without restarting kernel from sklearn.naive_bayes import GaussianNB from xgboost import XGBClassifier # for features importance warnings.filterwarnings('ignore') plt.rcParams['figure.dpi'] = 227 # native screen dpi for my computer import statsmodels.api as sm from statsmodels.tsa.arima_model import ARIMA from statsmodels.tsa.statespace.sarimax import SARIMAX from statsmodels.graphics.tsaplots import plot_pacf, plot_acf from sklearn.metrics import mean_squared_error, confusion_matrix, f1_score, accuracy_score from pandas.plotting import autocorrelation_plot import tensorflow.keras as keras from tensorflow.python.keras.optimizer_v2 import rmsprop from functools import partial from tensorflow.keras import optimizers from tensorflow.keras.models import Sequential, Model from tensorflow.keras.layers import Input, Flatten, TimeDistributed, LSTM, Dense, Bidirectional, Dropout, ConvLSTM2D, Conv1D, GlobalMaxPooling1D, MaxPooling1D, Convolution1D, BatchNormalization, LeakyReLU from bayes_opt import BayesianOptimization from tensorflow.keras.utils import plot_model import functions import plotting np.random.seed(66) files = os.listdir('data/stocks') stocks = {} for file in files: if file.split('.')[1] == 'csv': name = file.split('.')[0] stocks[name] = pd.read_csv('data/stocks/'+file, index_col='Date') stocks[name].index = pd.to_datetime(stocks[name].index) def baseline_model(stock): baseline_predictions = np.random.randint(0, 2, len(stock)) accuracy = accuracy_score(functions.binary(stock), baseline_predictions) return accuracy baseline_accuracy = baseline_model(stocks['tsla'].Return) print('Baseline model accuracy: {:.1f}%'.format(baseline_accuracy * 100)) base_preds = [] for i in range(1000): base_preds.append(baseline_model(stocks['tsla'].Return)) plt.figure(figsize=(16,6)) plt.style.use('seaborn-whitegrid') plt.hist(base_preds, bins=50, facecolor='#4ac2fb') plt.title('Baseline Model Accuracy', fontSize=15) plt.axvline(np.array(base_preds).mean(), c='k', ls='--', lw=2) plt.show() print('Tesla historical data contains {} entries'.format(stocks['tsla'].shape[0])) stocks['tsla'][['Return']].head() plt.rcParams['figure.figsize'] = (16, 3) plot_acf(stocks['tsla'].Return, lags=range(300)) plt.show() orders = [(0,0,0),(1,0,0),(0,1,0),(0,0,1),(1,1,0)] train = list(stocks['tsla']['Return'][1000:1900].values) test = list(stocks['tsla']['Return'][1900:2300].values) all_predictions = {} for order in orders: try: history = train.copy() order_predictions = [] for i in range(len(test)): model = ARIMA(history, order=order) # defining ARIMA model model_fit = model.fit(disp=0) # fitting model y_hat = model_fit.forecast() # predicting 'return' order_predictions.append(y_hat[0][0]) # first element ([0][0]) is a prediction history.append(test[i]) # simply adding following day 'return' value to the model print('Prediction: {} of {}'.format(i+1,len(test)), end='\r') accuracy = accuracy_score( functions.binary(test), functions.binary(order_predictions) ) print(' ', end='\r') print('{} - {:.1f}% accuracy'.format(order, round(accuracy, 3)*100), end='\n') all_predictions[order] = order_predictions except: print(order, '<== Wrong Order', end='\n') pass fig = plt.figure(figsize=(16,4)) plt.plot(test, label='Test', color='#4ac2fb') plt.plot(all_predictions[(0,1,0)], label='Predictions', color='#ff4e97') plt.legend(frameon=True, loc=1, ncol=1, fontsize=10, borderpad=.6) plt.title('Arima Predictions', fontSize=15) plt.xlabel('Days', fontSize=13) plt.ylabel('Returns', fontSize=13) plt.annotate('', xy=(15, 0.05), xytext=(150, .2), fontsize=10, arrowprops={'width':0.4,'headwidth':7,'color':'#333333'} ) ax = fig.add_subplot(1, 1, 1) rect = patches.Rectangle((0,-.05), 30, .1, ls='--', lw=2, facecolor='y', edgecolor='k', alpha=.5) ax.add_patch(rect) plt.axes([.25, 1, .2, .5]) plt.plot(test[:30], color='#4ac2fb') plt.plot(all_predictions[(0,1,0)][:30], color='#ff4e97') plt.tick_params(axis='both', labelbottom=False, labelleft=False) plt.title('Lag') plt.show() plt.figure(figsize=(16,5)) plt.hist(stocks['tsla'][1900:2300].reset_index().Return, bins=20, label='True', facecolor='#4ac2fb') plt.hist(all_predictions[(0,1,0)], bins=20, label='Predicted', facecolor='#ff4e97', alpha=.7) plt.axvline(0, c='k', ls='--') plt.title('ARIMA True vs Predicted Values Distribution', fontSize=15) plt.legend(frameon=True, loc=1, ncol=1, fontsize=10, borderpad=.6) plt.show() tesla_headlines = pd.read_csv('data/tesla_headlines.csv', index_col='Date') tesla = stocks['tsla'].join(tesla_headlines.groupby('Date').mean().Sentiment) tesla.fillna(0, inplace=True) plt.style.use('seaborn-whitegrid') plt.figure(figsize=(16,6)) plt.plot(tesla.loc['2019-01-10':'2019-09-05'].Sentiment.shift(1), c='#3588cf', label='News Sentiment') plt.plot(tesla.loc['2019-01-10':'2019-09-05'].Return, c='#ff4e97', label='Return') plt.legend(frameon=True, fancybox=True, framealpha=.9, loc=1) plt.title('Tesla News Sentiment and Daily Return', fontSize=15) plt.show() pd.DataFrame({ 'Sentiment': tesla.loc['2019-01-10':'2019-09-05'].Sentiment.shift(1), 'Return': tesla.loc['2019-01-10':'2019-09-05'].Return}).corr() scaled_tsla = functions.scale(stocks['tsla'], scale=(0,1)) X = scaled_tsla[:-1] y = stocks['tsla'].Return.shift(-1)[:-1] xgb = XGBClassifier() xgb.fit(X[1500:], y[1500:]) n_steps = 21 scaled_tsla = functions.scale(stocks['tsla'], scale=(0,1)) X_train, \ y_train, \ X_test, \ y_test = functions.split_sequences( scaled_tsla.to_numpy()[:-1], stocks['tsla'].Return.shift(-1).to_numpy()[:-1], n_steps, split=True, ratio=0.8
Editor is loading...
Leave a Comment