Untitled

 avatar
unknown
plain_text
a month ago
3.2 kB
3
Indexable
import pandas as pd
import matplotlib.pyplot as plt


def load_and_clean_data(file_path):
    """Load and prepare AQI data"""
    df = pd.read_csv(file_path)
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'])
    return df

def create_visualizations(df):
    """Create multiple visualizations for AQI analysis"""
    # Use default style
    plt.style.use('default')
    
    # 1. Line plot - AQI Trends
    plt.figure(figsize=(10, 6))
    for city in df['City'].unique():
        city_data = df[df['City'] == city]
        plt.plot(city_data['Date'], city_data['AQI'], label=city, marker='o')
    plt.title('AQI Trends by City')
    plt.xlabel('Date')
    plt.ylabel('AQI Level')
    plt.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig('aqi_trends.png')
    plt.close()

    # 2. Bar plot - Average AQI by City
    plt.figure(figsize=(10, 6))
    city_avg = df.groupby('City')['AQI'].mean().sort_values(ascending=True)
    plt.bar(city_avg.index, city_avg.values)
    plt.title('Average AQI by City')
    plt.xlabel('City')
    plt.ylabel('Average AQI')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig('avg_aqi_by_city.png')
    plt.close()

    # 3. Heatmap - Correlation between pollutants
    plt.figure(figsize=(8, 6))
    pollutants = ['AQI', 'PM2.5', 'PM10', 'NO2', 'SO2', 'CO', 'O3']
    correlation = df[pollutants].corr()
    sns.heatmap(correlation, annot=True, cmap='coolwarm', center=0)
    plt.title('Correlation between Pollutants')
    plt.tight_layout()
    plt.savefig('pollutant_correlation.png')
    plt.close()

    # 4. Box plot - AQI Distribution
    plt.figure(figsize=(10, 6))
    plt.boxplot([df[df['City'] == city]['AQI'] for city in df['City'].unique()],
                labels=df['City'].unique())
    plt.title('AQI Distribution by City')
    plt.xlabel('City')
    plt.ylabel('AQI')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig('aqi_distribution.png')
    plt.close()

def generate_summary(df):
    """Generate a simple summary of the AQI data"""
    summary = df.groupby('City').agg({
        'AQI': ['mean', 'max', 'min'],
        'PM2.5': 'mean',
        'PM10': 'mean'
    }).round(2)
    
    summary.columns = ['Average AQI', 'Maximum AQI', 'Minimum AQI', 
                      'Average PM2.5', 'Average PM10']
    return summary

def main():
    try:
        # Load your CSV file
        df = load_and_clean_data('aqi_data.csv')  # Replace with your CSV file name
        
        # Create visualizations
        create_visualizations(df)
        
        # Generate and save summary
        summary = generate_summary(df)
        summary.to_csv('aqi_summary.csv')
        
        print("Analysis complete! Generated files:")
        print("1. aqi_trends.png - AQI trends over time")
        print("2. avg_aqi_by_city.png - Average AQI comparison")
        print("3. pollutant_correlation.png - Correlation heatmap")
        print("4. aqi_distribution.png - AQI distribution boxplot")
        print("5. aqi_summary.csv - Statistical summary")
        
    except Exception as e:
        print(f"An error occurred: {str(e)}")

if __name__ == "__main__":
    main()
Leave a Comment