Untitled
unknown
python
a year ago
7.5 kB
3
Indexable
import pandas as pd
import numpy as np
from statsmodels.tsa.seasonal import seasonal_decompose
from scipy import stats
import statsmodels.api as sm
from scipy.stats import f_oneway
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import acf
class SeasonalityAnalyzer:
def __init__(self, data, date_column, price_column, item_code_column):
"""
Initialize the analyzer with your dataset
Parameters:
-----------
data : pandas DataFrame
date_column : str
price_column : str
item_code_column : str
"""
self.data = data.copy()
self.date_column = date_column
self.price_column = price_column
self.item_code_column = item_code_column
# Ensure date is datetime
self.data[date_column] = pd.to_datetime(self.data[date_column])
# Add time components
self.data['month'] = self.data[date_column].dt.month
self.data['quarter'] = self.data[date_column].dt.quarter
self.data['year'] = self.data[date_column].dt.year
def decompose_time_series(self, item_code, period=12):
"""
Decompose time series into trend, seasonal, and residual components
Returns strength of seasonality
"""
# Filter for specific item
item_data = self.data[self.data[self.item_code_column] == item_code]
# Sort by date and set price as series
item_data = item_data.sort_values(by=self.date_column)
price_series = item_data[self.price_column]
# Decompose
decomposition = seasonal_decompose(price_series, period=period)
# Calculate strength of seasonality
# Variance of seasonality / (Variance of seasonality + Variance of residual)
seasonality_strength = np.var(decomposition.seasonal) / (
np.var(decomposition.seasonal) + np.var(decomposition.resid)
)
return {
'decomposition': decomposition,
'seasonality_strength': seasonality_strength,
'interpretation': self._interpret_seasonality_strength(seasonality_strength)
}
def monthly_price_variation(self, item_code):
"""
Perform ANOVA test to check if mean prices differ significantly across months
"""
item_data = self.data[self.data[self.item_code_column] == item_code]
# Group prices by month
monthly_groups = [group[self.price_column].values
for _, group in item_data.groupby('month')]
# Perform one-way ANOVA
f_statistic, p_value = f_oneway(*monthly_groups)
# Calculate coefficient of variation for each month
monthly_cv = item_data.groupby('month')[self.price_column].agg(lambda x: np.std(x) / np.mean(x))
return {
'f_statistic': f_statistic,
'p_value': p_value,
'monthly_cv': monthly_cv,
'is_seasonal': p_value < 0.05
}
def autocorrelation_analysis(self, item_code, nlags=24):
"""
Compute autocorrelation to identify seasonal patterns
"""
item_data = self.data[self.data[self.item_code_column] == item_code]
item_data = item_data.sort_values(by=self.date_column)
# Calculate autocorrelation
acf_values = acf(item_data[self.price_column], nlags=nlags)
# Find significant lags (95% confidence interval)
confidence_interval = 1.96 / np.sqrt(len(item_data))
significant_lags = [i for i, v in enumerate(acf_values)
if abs(v) > confidence_interval and i > 0]
return {
'acf_values': acf_values,
'significant_lags': significant_lags,
'has_seasonal_pattern': any(lag in [3, 6, 12] for lag in significant_lags)
}
def get_comprehensive_seasonality_score(self, item_code):
"""
Combine multiple seasonality measures into a single score
"""
# Get results from all methods
decomp_results = self.decompose_time_series(item_code)
monthly_var_results = self.monthly_price_variation(item_code)
acf_results = self.autocorrelation_analysis(item_code)
# Calculate composite score (0-1 scale)
score_components = [
decomp_results['seasonality_strength'],
1 - monthly_var_results['p_value'], # Convert p-value to confidence
len(acf_results['significant_lags']) / 24 # Normalize by max lags
]
composite_score = np.mean(score_components)
return {
'composite_score': composite_score,
'interpretation': self._interpret_seasonality_strength(composite_score),
'decomposition_strength': decomp_results['seasonality_strength'],
'monthly_variation_p_value': monthly_var_results['p_value'],
'significant_acf_lags': acf_results['significant_lags']
}
def _interpret_seasonality_strength(self, strength):
"""
Interpret the seasonality strength score
"""
if strength < 0.3:
return "Weak or no seasonality"
elif strength < 0.6:
return "Moderate seasonality"
else:
return "Strong seasonality"
def plot_seasonality_analysis(self, item_code):
"""
Create comprehensive seasonality visualization
"""
plt.figure(figsize=(15, 10))
# Get item data
item_data = self.data[self.data[self.item_code_column] == item_code]
item_data = item_data.sort_values(by=self.date_column)
# Plot original time series
plt.subplot(311)
plt.plot(item_data[self.date_column], item_data[self.price_column])
plt.title(f'Price Time Series for Item {item_code}')
# Plot monthly box plots
plt.subplot(312)
item_data.boxplot(column=self.price_column, by='month')
plt.title('Monthly Price Distribution')
# Plot autocorrelation
plt.subplot(313)
acf_results = self.autocorrelation_analysis(item_code)
plt.plot(acf_results['acf_values'])
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=1.96/np.sqrt(len(item_data)), linestyle='--', color='red')
plt.axhline(y=-1.96/np.sqrt(len(item_data)), linestyle='--', color='red')
plt.title('Autocorrelation Function')
plt.tight_layout()
return plt.gcf()
# Example usage:
"""
# Sample data structure
data = {
'date': pd.date_range(start='2022-01-01', end='2024-08-31', freq='M'),
'item_code': ['0101010101001'] * 32, # 32 months
'price': [100 + 10 * np.sin(2 * np.pi * i / 12) + np.random.normal(0, 2)
for i in range(32)] # Synthetic seasonal data
}
df = pd.DataFrame(data)
# Initialize analyzer
analyzer = SeasonalityAnalyzer(df, 'date', 'price', 'item_code')
# Get comprehensive analysis for an item
item_code = '0101010101001'
seasonality_results = analyzer.get_comprehensive_seasonality_score(item_code)
print(f"Seasonality Analysis for item {item_code}:")
print(f"Composite Score: {seasonality_results['composite_score']:.2f}")
print(f"Interpretation: {seasonality_results['interpretation']}")
# Plot analysis
analyzer.plot_seasonality_analysis(item_code)
plt.show()
"""Editor is loading...
Leave a Comment