Untitled
user_9363972
python
a year ago
7.5 kB
11
Indexable
import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import plotly.graph_objs as go import plotly.io as pio # Import plotly.io instead of plotly.offline import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.keras.layers import * from tensorflow.keras.optimizers import Adam from keras import metrics from plotly.offline import iplot train = pd.read_csv('train.csv', parse_dates=['date']) test = pd.read_csv('test.csv', parse_dates=['date']) lag_size = (test['date'].max().date() - train['date'].max().date()).days dates = train.groupby('date', as_index=False)['sales'].sum() stores = train.groupby(['store', 'date'], as_index=False)['sales'].sum() items = train.groupby(['item', 'date'], as_index=False)['sales'].sum() # Date # plot_dates = go.Scatter(x=dates['date'], y=dates['sales']) # layout = go.Layout(title='Daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales')) # fig = go.Figure(data=[plot_dates], layout=layout) # Store # plot_stores = [] # for store in stores['store'].unique(): # c_store = stores[(stores['store'] == store)] # plot_stores.append(go.Scatter(x=c_store['date'], y=c_store['sales'], name=('Store ' + str(store)))) # layout = go.Layout(title='Store daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales')) # fig = go.Figure(data=plot_stores, layout=layout) # Item # plot_items = [] # for item in items['item'].unique(): # c_items = items[(items['item'] == item)] # plot_items.append(go.Scatter( # x=c_items['date'], y=c_items['sales'], name=('Item ' + str(item)))) # layout = go.Layout(title='Item daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales')) # fig = go.Figure(data=plot_items, layout=layout) # pio.show(fig) indexes_train = [] for i in range(train.store.nunique()*train.item.nunique()): indexes_train = np.append(indexes_train, np.linspace (0, train.date.nunique()-1, train.date.nunique(), dtype=int)) i=+1 indexes_test = [] for i in range(test.store.nunique()*test.item.nunique()): indexes_test = np.append(indexes_test, np.linspace (0, test.date.nunique()-1, test.date.nunique(), dtype=int)) i=+1 train["indexes"] = indexes_train test["indexes"] = indexes_test data = pd.concat([train,test], sort=False) hour = 60*60 day = 24*hour week = 7*day year = 365.2425*day df1=data.copy() df1["index_second"]= df1["indexes"]*day df1["YEAR_index_norm"] = 2 * np.pi * df1["index_second"] / year df1["YEAR_cos_index"] = np.cos(df1["YEAR_index_norm"]) df1["YEAR_sin_index"] = np.sin(df1["YEAR_index_norm"]) df2 = df1.drop(["indexes", "index_second", "YEAR_index_norm"], axis=1) def create_date_time_features(df): df = df.copy() df['dayofweek'] = df.date.dt.dayofweek df['quarter'] = df.date.dt.quarter df['month'] = df.date.dt.month df['year'] = df.date.dt.year df['dayofyear'] = df.date.dt.dayofyear df['dayofmonth'] = df.date.dt.day df['weekofyear'] = df.date.dt.isocalendar().week.astype("int64") df["season"] = np.where(df.month.isin([12,1,2]), 0, 1) df["season"] = np.where(df.month.isin([6,7,8]), 2, df["season"]) df["season"] = np.where(df.month.isin([9, 10, 11]), 3, df["season"]) return df def lag_features(df, lags): for lag in lags: df['sales_lag_' + str(lag)] = df.groupby(["store", "item"])['sales'].transform(lambda x: x.shift(lag)) df.fillna(0, inplace=True) return df def roll_mean_features(df, windows): for window in windows: df['sales_roll_mean_' + str(window)] = df.groupby( ["store", "item"])['sales'].transform(lambda x:x. shift(1).rolling(window=window, min_periods=10, win_type="triang").mean()) df.fillna(0, inplace=True) return df def ewm_features(dataframe, alphas, lags): for alpha in alphas: for lag in lags: dataframe['sales_ewm_alpha_' + str(alpha).replace (".", "") + "_lag_" + str(lag)] = dataframe.groupby(["store", "item"])['sales'].transform(lambda x: x.shift(lag).ewm(alpha=alpha).mean()) dataframe.fillna(0, inplace=True) return dataframe df3 = create_date_time_features(df2) df4= df3.copy() lag_features(df4, [91, 98, 105, 112, 119, 126, 182, 364, 546, 728]) df5 = df4.copy() df5 = roll_mean_features(df5,[365,546]) df6= df5.copy() alphas = [0.95, 0.9, 0.8, 0.7, 0.5] lags = [91, 98, 105, 112, 180, 270, 365, 546, 728] df6= ewm_features(df6, alphas, lags) df7 = pd.get_dummies(df6, columns=['store', 'item', 'dayofweek', "quarter", 'month', "year", "season"]) df7["sales"] = np.log1p(df7["sales"].values) df_model = df7.copy() df_model = df_model.sort_values("date").reset_index(drop=True) train= df_model.loc[(df_model["date"] < "2017-01-01"), :] val= df_model.loc[(df_model["date"] >= "2017-01-01") & (df_model["date"] < "2017-04-01"), :] cols = [col for col in train.columns if col not in ['date', 'id', "sales", "year"]] X_train = train[cols] Y_train = train['sales'] X_val = val[cols] Y_val = val['sales'] NN_model = Sequential() NN_model.add(Dense(128, kernel_initializer='normal',input_dim = X_train.shape[1], activation='relu')) NN_model.add(Dense(256, kernel_initializer='normal',activation='relu')) NN_model.add(Dense(256, kernel_initializer='normal',activation='relu')) NN_model.add(Dense(256, kernel_initializer='normal',activation='relu')) NN_model.add(Dropout(0.2)) NN_model.add(Dense(1, kernel_initializer='normal',activation='linear')) NN_model.compile(loss=tf.keras.losses.mae, optimizer="adam", metrics=['mae']) NN_model.summary() X_train_tensor = tf.convert_to_tensor(X_train.values, dtype=tf.float32) Y_train_tensor = tf.convert_to_tensor(Y_train.values, dtype=tf.float32) NN_model.fit(X_train_tensor, Y_train_tensor, epochs=2, batch_size=32, validation_split=0.2) X_val_tensor = tf.convert_to_tensor(X_val.values, dtype=tf.float32) train_predictions_tensor = NN_model.predict(X_val_tensor) train_predictions = train_predictions_tensor.flatten() train_results = pd.DataFrame(data={'Train Predictions': np.expm1(train_predictions), 'Actuals': np.expm1(Y_val)}) train_results sns.lineplot(data=train_results.head(100)) plt.title("Sales vs Predicted Sales") plt.show() # df_final_model = df7.copy() test = df_final_model.loc[(df_final_model["date"] >= "2018-01-01"), :] X_test = test[cols].astype(np.float32) X_test_tensor = tf.convert_to_tensor(X_test) test_predictions = NN_model.predict(X_test_tensor).flatten() test_results = pd.DataFrame( data={'Test Predictions': np.expm1(test_predictions)}) new_test= pd.read_csv('test.csv', parse_dates = ['date']) result_df = test.loc[:, ['id', 'sales']] result_df['sales'] = np.expm1(test_predictions) result_df = pd.concat([ new_test, result_df], axis=1) result_df = result_df.drop('id', axis=1) result_df.to_csv('result_prediction.csv', index=False) train= pd.read_csv('train.csv', parse_dates = ['date']) plot_test = pd.concat([ train, result_df], axis=0).reset_index() daily_result = plot_test.groupby('date', as_index=False)['sales'].sum() sc = go.Scatter(x=daily_result['date'], y=daily_result['sales']) layout = go.Layout(title='Daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales')) fig = go.Figure(data=[sc], layout=layout) iplot(fig)
Editor is loading...
Leave a Comment