Untitled
import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import plotly.graph_objs as go import plotly.io as pio # Import plotly.io instead of plotly.offline import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.keras.layers import * from tensorflow.keras.optimizers import Adam from keras import metrics from plotly.offline import iplot train = pd.read_csv('train.csv', parse_dates=['date']) test = pd.read_csv('test.csv', parse_dates=['date']) lag_size = (test['date'].max().date() - train['date'].max().date()).days dates = train.groupby('date', as_index=False)['sales'].sum() stores = train.groupby(['store', 'date'], as_index=False)['sales'].sum() items = train.groupby(['item', 'date'], as_index=False)['sales'].sum() # Date plot_dates = go.Scatter(x=dates['date'], y=dates['sales']) layout = go.Layout(title='Daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales')) fig = go.Figure(data=[plot_dates], layout=layout) # Store plot_stores = [] for store in stores['store'].unique(): c_store = stores[(stores['store'] == store)] plot_stores.append(go.Scatter(x=c_store['date'], y=c_store['sales'], name=('Store ' + str(store)))) layout = go.Layout(title='Store daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales')) fig = go.Figure(data=plot_stores, layout=layout) # Item plot_items = [] for item in items['item'].unique(): c_items = items[(items['item'] == item)] plot_items.append(go.Scatter( x=c_items['date'], y=c_items['sales'], name=('Item ' + str(item)))) layout = go.Layout(title='Item daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales')) fig = go.Figure(data=plot_items, layout=layout) pio.show(fig) indexes_train = [] for i in range(train.store.nunique()*train.item.nunique()): indexes_train = np.append(indexes_train, np.linspace (0, train.date.nunique()-1, train.date.nunique(), dtype=int)) i=+1 indexes_test = [] for i in range(test.store.nunique()*test.item.nunique()): indexes_test = np.append(indexes_test, np.linspace (0, test.date.nunique()-1, test.date.nunique(), dtype=int)) i=+1 train["indexes"] = indexes_train test["indexes"] = indexes_test data = pd.concat([train,test], sort=False) hour = 60*60 day = 24*hour week = 7*day year = 365.2425*day df1=data.copy() df1["index_second"]= df1["indexes"]*day df1["YEAR_index_norm"] = 2 * np.pi * df1["index_second"] / year df1["YEAR_cos_index"] = np.cos(df1["YEAR_index_norm"]) df1["YEAR_sin_index"] = np.sin(df1["YEAR_index_norm"]) df2 = df1.drop(["indexes", "index_second", "YEAR_index_norm"], axis=1) def create_date_time_features(df): df = df.copy() df['dayofweek'] = df.date.dt.dayofweek df['quarter'] = df.date.dt.quarter df['month'] = df.date.dt.month df['year'] = df.date.dt.year df['dayofyear'] = df.date.dt.dayofyear df['dayofmonth'] = df.date.dt.day df['weekofyear'] = df.date.dt.isocalendar().week.astype("int64") df["season"] = np.where(df.month.isin([12,1,2]), 0, 1) df["season"] = np.where(df.month.isin([6,7,8]), 2, df["season"]) df["season"] = np.where(df.month.isin([9, 10, 11]), 3, df["season"]) return df def lag_features(df, lags): for lag in lags: df['sales_lag_' + str(lag)] = df.groupby(["store", "item"])['sales'].transform(lambda x: x.shift(lag)) df.fillna(0, inplace=True) return df
Leave a Comment