Untitled

 avatar
user_9363972
python
a month ago
3.6 kB
5
Indexable
Never
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.io as pio  # Import plotly.io instead of plotly.offline
import tensorflow as tf

from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import * 
from tensorflow.keras.optimizers import Adam 
from keras import metrics 
from plotly.offline import iplot

train = pd.read_csv('train.csv', parse_dates=['date'])
test = pd.read_csv('test.csv', parse_dates=['date'])

lag_size = (test['date'].max().date() - train['date'].max().date()).days

dates = train.groupby('date', as_index=False)['sales'].sum()
stores = train.groupby(['store', 'date'], as_index=False)['sales'].sum()
items = train.groupby(['item', 'date'], as_index=False)['sales'].sum()


# Date
plot_dates = go.Scatter(x=dates['date'], y=dates['sales'])
layout = go.Layout(title='Daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales'))
fig = go.Figure(data=[plot_dates], layout=layout)

# Store
plot_stores = [] 
for store in stores['store'].unique(): 
    c_store = stores[(stores['store'] == store)] 
    plot_stores.append(go.Scatter(x=c_store['date'], y=c_store['sales'], name=('Store ' + str(store)))) 
layout = go.Layout(title='Store daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales')) 
fig = go.Figure(data=plot_stores, layout=layout) 


# Item
plot_items = [] 
for item in items['item'].unique():
    c_items = items[(items['item'] == item)]
    plot_items.append(go.Scatter(
        x=c_items['date'], y=c_items['sales'], name=('Item ' + str(item))))
layout = go.Layout(title='Item daily sales', xaxis=dict(title='Date'), yaxis=dict(title='Sales')) 
fig = go.Figure(data=plot_items, layout=layout) 

pio.show(fig)

indexes_train = [] 
for i in range(train.store.nunique()*train.item.nunique()): 
    indexes_train = np.append(indexes_train, np.linspace (0, train.date.nunique()-1, train.date.nunique(), dtype=int)) 
    i=+1 
    
indexes_test = [] 
for i in range(test.store.nunique()*test.item.nunique()): 
    indexes_test = np.append(indexes_test, np.linspace (0, test.date.nunique()-1, test.date.nunique(), dtype=int)) 
    i=+1 
    
train["indexes"] = indexes_train
test["indexes"] = indexes_test

data = pd.concat([train,test], sort=False)

hour = 60*60 
day = 24*hour 
week = 7*day 
year = 365.2425*day 
df1=data.copy() 
df1["index_second"]= df1["indexes"]*day 
df1["YEAR_index_norm"] = 2 * np.pi * df1["index_second"] / year 
df1["YEAR_cos_index"] = np.cos(df1["YEAR_index_norm"]) 
df1["YEAR_sin_index"] = np.sin(df1["YEAR_index_norm"])


df2 = df1.drop(["indexes", "index_second", "YEAR_index_norm"], axis=1)

def create_date_time_features(df): 
    df = df.copy() 
    df['dayofweek'] = df.date.dt.dayofweek 
    df['quarter'] = df.date.dt.quarter 
    df['month'] = df.date.dt.month
    df['year'] = df.date.dt.year 
    df['dayofyear'] = df.date.dt.dayofyear 
    df['dayofmonth'] = df.date.dt.day 
    df['weekofyear'] = df.date.dt.isocalendar().week.astype("int64") 
    df["season"] = np.where(df.month.isin([12,1,2]), 0, 1) 
    df["season"] = np.where(df.month.isin([6,7,8]), 2, df["season"]) 
    df["season"] = np.where(df.month.isin([9, 10, 11]), 3, df["season"]) 
    return df

def lag_features(df, lags): 
    for lag in lags: 
        df['sales_lag_' + str(lag)] = df.groupby(["store", "item"])['sales'].transform(lambda x: x.shift(lag)) 
        df.fillna(0, inplace=True) 
    return df
Leave a Comment