Untitled
user_9363972
python
a year ago
3.6 kB
14
Indexable
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.io as pio # Import plotly.io instead of plotly.offline
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam
from keras import metrics
from plotly.offline import iplot
train = pd.read_csv('train.csv', parse_dates=['date'])
test = pd.read_csv('test.csv', parse_dates=['date'])
lag_size = (test['date'].max().date() - train['date'].max().date()).days
dates = train.groupby('date', as_index=False)['sales'].sum()
stores = train.groupby(['store', 'date'], as_index=False)['sales'].sum()
items = train.groupby(['item', 'date'], as_index=False)['sales'].sum()
# Date
plot_dates = go.Scatter(x=dates['date'], y=dates['sales'])
layout = go.Layout(title='Daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales'))
fig = go.Figure(data=[plot_dates], layout=layout)
# Store
plot_stores = []
for store in stores['store'].unique():
c_store = stores[(stores['store'] == store)]
plot_stores.append(go.Scatter(x=c_store['date'], y=c_store['sales'], name=('Store ' + str(store))))
layout = go.Layout(title='Store daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales'))
fig = go.Figure(data=plot_stores, layout=layout)
# Item
plot_items = []
for item in items['item'].unique():
c_items = items[(items['item'] == item)]
plot_items.append(go.Scatter(
x=c_items['date'], y=c_items['sales'], name=('Item ' + str(item))))
layout = go.Layout(title='Item daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales'))
fig = go.Figure(data=plot_items, layout=layout)
pio.show(fig)
indexes_train = []
for i in range(train.store.nunique()*train.item.nunique()):
indexes_train = np.append(indexes_train, np.linspace (0, train.date.nunique()-1, train.date.nunique(), dtype=int))
i=+1
indexes_test = []
for i in range(test.store.nunique()*test.item.nunique()):
indexes_test = np.append(indexes_test, np.linspace (0, test.date.nunique()-1, test.date.nunique(), dtype=int))
i=+1
train["indexes"] = indexes_train
test["indexes"] = indexes_test
data = pd.concat([train,test], sort=False)
hour = 60*60
day = 24*hour
week = 7*day
year = 365.2425*day
df1=data.copy()
df1["index_second"]= df1["indexes"]*day
df1["YEAR_index_norm"] = 2 * np.pi * df1["index_second"] / year
df1["YEAR_cos_index"] = np.cos(df1["YEAR_index_norm"])
df1["YEAR_sin_index"] = np.sin(df1["YEAR_index_norm"])
df2 = df1.drop(["indexes", "index_second", "YEAR_index_norm"], axis=1)
def create_date_time_features(df):
df = df.copy()
df['dayofweek'] = df.date.dt.dayofweek
df['quarter'] = df.date.dt.quarter
df['month'] = df.date.dt.month
df['year'] = df.date.dt.year
df['dayofyear'] = df.date.dt.dayofyear
df['dayofmonth'] = df.date.dt.day
df['weekofyear'] = df.date.dt.isocalendar().week.astype("int64")
df["season"] = np.where(df.month.isin([12,1,2]), 0, 1)
df["season"] = np.where(df.month.isin([6,7,8]), 2, df["season"])
df["season"] = np.where(df.month.isin([9, 10, 11]), 3, df["season"])
return df
def lag_features(df, lags):
for lag in lags:
df['sales_lag_' + str(lag)] = df.groupby(["store", "item"])['sales'].transform(lambda x: x.shift(lag))
df.fillna(0, inplace=True)
return df
Editor is loading...
Leave a Comment