Untitled

 avatar
unknown
python
5 months ago
7.5 kB
2
Indexable
import pandas as pd
import numpy as np
from statsmodels.tsa.seasonal import seasonal_decompose
from scipy import stats
import statsmodels.api as sm
from scipy.stats import f_oneway
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import acf

class SeasonalityAnalyzer:
    def __init__(self, data, date_column, price_column, item_code_column):
        """
        Initialize the analyzer with your dataset
        
        Parameters:
        -----------
        data : pandas DataFrame
        date_column : str
        price_column : str
        item_code_column : str
        """
        self.data = data.copy()
        self.date_column = date_column
        self.price_column = price_column
        self.item_code_column = item_code_column
        
        # Ensure date is datetime
        self.data[date_column] = pd.to_datetime(self.data[date_column])
        
        # Add time components
        self.data['month'] = self.data[date_column].dt.month
        self.data['quarter'] = self.data[date_column].dt.quarter
        self.data['year'] = self.data[date_column].dt.year

    def decompose_time_series(self, item_code, period=12):
        """
        Decompose time series into trend, seasonal, and residual components
        Returns strength of seasonality
        """
        # Filter for specific item
        item_data = self.data[self.data[self.item_code_column] == item_code]
        
        # Sort by date and set price as series
        item_data = item_data.sort_values(by=self.date_column)
        price_series = item_data[self.price_column]
        
        # Decompose
        decomposition = seasonal_decompose(price_series, period=period)
        
        # Calculate strength of seasonality
        # Variance of seasonality / (Variance of seasonality + Variance of residual)
        seasonality_strength = np.var(decomposition.seasonal) / (
            np.var(decomposition.seasonal) + np.var(decomposition.resid)
        )
        
        return {
            'decomposition': decomposition,
            'seasonality_strength': seasonality_strength,
            'interpretation': self._interpret_seasonality_strength(seasonality_strength)
        }

    def monthly_price_variation(self, item_code):
        """
        Perform ANOVA test to check if mean prices differ significantly across months
        """
        item_data = self.data[self.data[self.item_code_column] == item_code]
        
        # Group prices by month
        monthly_groups = [group[self.price_column].values 
                         for _, group in item_data.groupby('month')]
        
        # Perform one-way ANOVA
        f_statistic, p_value = f_oneway(*monthly_groups)
        
        # Calculate coefficient of variation for each month
        monthly_cv = item_data.groupby('month')[self.price_column].agg(lambda x: np.std(x) / np.mean(x))
        
        return {
            'f_statistic': f_statistic,
            'p_value': p_value,
            'monthly_cv': monthly_cv,
            'is_seasonal': p_value < 0.05
        }

    def autocorrelation_analysis(self, item_code, nlags=24):
        """
        Compute autocorrelation to identify seasonal patterns
        """
        item_data = self.data[self.data[self.item_code_column] == item_code]
        item_data = item_data.sort_values(by=self.date_column)
        
        # Calculate autocorrelation
        acf_values = acf(item_data[self.price_column], nlags=nlags)
        
        # Find significant lags (95% confidence interval)
        confidence_interval = 1.96 / np.sqrt(len(item_data))
        significant_lags = [i for i, v in enumerate(acf_values) 
                          if abs(v) > confidence_interval and i > 0]
        
        return {
            'acf_values': acf_values,
            'significant_lags': significant_lags,
            'has_seasonal_pattern': any(lag in [3, 6, 12] for lag in significant_lags)
        }

    def get_comprehensive_seasonality_score(self, item_code):
        """
        Combine multiple seasonality measures into a single score
        """
        # Get results from all methods
        decomp_results = self.decompose_time_series(item_code)
        monthly_var_results = self.monthly_price_variation(item_code)
        acf_results = self.autocorrelation_analysis(item_code)
        
        # Calculate composite score (0-1 scale)
        score_components = [
            decomp_results['seasonality_strength'],
            1 - monthly_var_results['p_value'],  # Convert p-value to confidence
            len(acf_results['significant_lags']) / 24  # Normalize by max lags
        ]
        
        composite_score = np.mean(score_components)
        
        return {
            'composite_score': composite_score,
            'interpretation': self._interpret_seasonality_strength(composite_score),
            'decomposition_strength': decomp_results['seasonality_strength'],
            'monthly_variation_p_value': monthly_var_results['p_value'],
            'significant_acf_lags': acf_results['significant_lags']
        }

    def _interpret_seasonality_strength(self, strength):
        """
        Interpret the seasonality strength score
        """
        if strength < 0.3:
            return "Weak or no seasonality"
        elif strength < 0.6:
            return "Moderate seasonality"
        else:
            return "Strong seasonality"

    def plot_seasonality_analysis(self, item_code):
        """
        Create comprehensive seasonality visualization
        """
        plt.figure(figsize=(15, 10))
        
        # Get item data
        item_data = self.data[self.data[self.item_code_column] == item_code]
        item_data = item_data.sort_values(by=self.date_column)
        
        # Plot original time series
        plt.subplot(311)
        plt.plot(item_data[self.date_column], item_data[self.price_column])
        plt.title(f'Price Time Series for Item {item_code}')
        
        # Plot monthly box plots
        plt.subplot(312)
        item_data.boxplot(column=self.price_column, by='month')
        plt.title('Monthly Price Distribution')
        
        # Plot autocorrelation
        plt.subplot(313)
        acf_results = self.autocorrelation_analysis(item_code)
        plt.plot(acf_results['acf_values'])
        plt.axhline(y=0, linestyle='--', color='gray')
        plt.axhline(y=1.96/np.sqrt(len(item_data)), linestyle='--', color='red')
        plt.axhline(y=-1.96/np.sqrt(len(item_data)), linestyle='--', color='red')
        plt.title('Autocorrelation Function')
        
        plt.tight_layout()
        return plt.gcf()

# Example usage:
"""
# Sample data structure
data = {
    'date': pd.date_range(start='2022-01-01', end='2024-08-31', freq='M'),
    'item_code': ['0101010101001'] * 32,  # 32 months
    'price': [100 + 10 * np.sin(2 * np.pi * i / 12) + np.random.normal(0, 2) 
              for i in range(32)]  # Synthetic seasonal data
}
df = pd.DataFrame(data)

# Initialize analyzer
analyzer = SeasonalityAnalyzer(df, 'date', 'price', 'item_code')

# Get comprehensive analysis for an item
item_code = '0101010101001'
seasonality_results = analyzer.get_comprehensive_seasonality_score(item_code)
print(f"Seasonality Analysis for item {item_code}:")
print(f"Composite Score: {seasonality_results['composite_score']:.2f}")
print(f"Interpretation: {seasonality_results['interpretation']}")

# Plot analysis
analyzer.plot_seasonality_analysis(item_code)
plt.show()
"""
Editor is loading...
Leave a Comment