Untitled
unknown
python
5 months ago
7.5 kB
2
Indexable
import pandas as pd import numpy as np from statsmodels.tsa.seasonal import seasonal_decompose from scipy import stats import statsmodels.api as sm from scipy.stats import f_oneway import matplotlib.pyplot as plt from statsmodels.tsa.stattools import acf class SeasonalityAnalyzer: def __init__(self, data, date_column, price_column, item_code_column): """ Initialize the analyzer with your dataset Parameters: ----------- data : pandas DataFrame date_column : str price_column : str item_code_column : str """ self.data = data.copy() self.date_column = date_column self.price_column = price_column self.item_code_column = item_code_column # Ensure date is datetime self.data[date_column] = pd.to_datetime(self.data[date_column]) # Add time components self.data['month'] = self.data[date_column].dt.month self.data['quarter'] = self.data[date_column].dt.quarter self.data['year'] = self.data[date_column].dt.year def decompose_time_series(self, item_code, period=12): """ Decompose time series into trend, seasonal, and residual components Returns strength of seasonality """ # Filter for specific item item_data = self.data[self.data[self.item_code_column] == item_code] # Sort by date and set price as series item_data = item_data.sort_values(by=self.date_column) price_series = item_data[self.price_column] # Decompose decomposition = seasonal_decompose(price_series, period=period) # Calculate strength of seasonality # Variance of seasonality / (Variance of seasonality + Variance of residual) seasonality_strength = np.var(decomposition.seasonal) / ( np.var(decomposition.seasonal) + np.var(decomposition.resid) ) return { 'decomposition': decomposition, 'seasonality_strength': seasonality_strength, 'interpretation': self._interpret_seasonality_strength(seasonality_strength) } def monthly_price_variation(self, item_code): """ Perform ANOVA test to check if mean prices differ significantly across months """ item_data = self.data[self.data[self.item_code_column] == item_code] # Group prices by month monthly_groups = [group[self.price_column].values for _, group in item_data.groupby('month')] # Perform one-way ANOVA f_statistic, p_value = f_oneway(*monthly_groups) # Calculate coefficient of variation for each month monthly_cv = item_data.groupby('month')[self.price_column].agg(lambda x: np.std(x) / np.mean(x)) return { 'f_statistic': f_statistic, 'p_value': p_value, 'monthly_cv': monthly_cv, 'is_seasonal': p_value < 0.05 } def autocorrelation_analysis(self, item_code, nlags=24): """ Compute autocorrelation to identify seasonal patterns """ item_data = self.data[self.data[self.item_code_column] == item_code] item_data = item_data.sort_values(by=self.date_column) # Calculate autocorrelation acf_values = acf(item_data[self.price_column], nlags=nlags) # Find significant lags (95% confidence interval) confidence_interval = 1.96 / np.sqrt(len(item_data)) significant_lags = [i for i, v in enumerate(acf_values) if abs(v) > confidence_interval and i > 0] return { 'acf_values': acf_values, 'significant_lags': significant_lags, 'has_seasonal_pattern': any(lag in [3, 6, 12] for lag in significant_lags) } def get_comprehensive_seasonality_score(self, item_code): """ Combine multiple seasonality measures into a single score """ # Get results from all methods decomp_results = self.decompose_time_series(item_code) monthly_var_results = self.monthly_price_variation(item_code) acf_results = self.autocorrelation_analysis(item_code) # Calculate composite score (0-1 scale) score_components = [ decomp_results['seasonality_strength'], 1 - monthly_var_results['p_value'], # Convert p-value to confidence len(acf_results['significant_lags']) / 24 # Normalize by max lags ] composite_score = np.mean(score_components) return { 'composite_score': composite_score, 'interpretation': self._interpret_seasonality_strength(composite_score), 'decomposition_strength': decomp_results['seasonality_strength'], 'monthly_variation_p_value': monthly_var_results['p_value'], 'significant_acf_lags': acf_results['significant_lags'] } def _interpret_seasonality_strength(self, strength): """ Interpret the seasonality strength score """ if strength < 0.3: return "Weak or no seasonality" elif strength < 0.6: return "Moderate seasonality" else: return "Strong seasonality" def plot_seasonality_analysis(self, item_code): """ Create comprehensive seasonality visualization """ plt.figure(figsize=(15, 10)) # Get item data item_data = self.data[self.data[self.item_code_column] == item_code] item_data = item_data.sort_values(by=self.date_column) # Plot original time series plt.subplot(311) plt.plot(item_data[self.date_column], item_data[self.price_column]) plt.title(f'Price Time Series for Item {item_code}') # Plot monthly box plots plt.subplot(312) item_data.boxplot(column=self.price_column, by='month') plt.title('Monthly Price Distribution') # Plot autocorrelation plt.subplot(313) acf_results = self.autocorrelation_analysis(item_code) plt.plot(acf_results['acf_values']) plt.axhline(y=0, linestyle='--', color='gray') plt.axhline(y=1.96/np.sqrt(len(item_data)), linestyle='--', color='red') plt.axhline(y=-1.96/np.sqrt(len(item_data)), linestyle='--', color='red') plt.title('Autocorrelation Function') plt.tight_layout() return plt.gcf() # Example usage: """ # Sample data structure data = { 'date': pd.date_range(start='2022-01-01', end='2024-08-31', freq='M'), 'item_code': ['0101010101001'] * 32, # 32 months 'price': [100 + 10 * np.sin(2 * np.pi * i / 12) + np.random.normal(0, 2) for i in range(32)] # Synthetic seasonal data } df = pd.DataFrame(data) # Initialize analyzer analyzer = SeasonalityAnalyzer(df, 'date', 'price', 'item_code') # Get comprehensive analysis for an item item_code = '0101010101001' seasonality_results = analyzer.get_comprehensive_seasonality_score(item_code) print(f"Seasonality Analysis for item {item_code}:") print(f"Composite Score: {seasonality_results['composite_score']:.2f}") print(f"Interpretation: {seasonality_results['interpretation']}") # Plot analysis analyzer.plot_seasonality_analysis(item_code) plt.show() """
Editor is loading...
Leave a Comment