Untitled
import pandas as pd import matplotlib.pyplot as plt def load_and_clean_data(file_path): """Load and prepare AQI data""" df = pd.read_csv(file_path) if 'Date' in df.columns: df['Date'] = pd.to_datetime(df['Date']) return df def create_visualizations(df): """Create multiple visualizations for AQI analysis""" # Use default style plt.style.use('default') # 1. Line plot - AQI Trends plt.figure(figsize=(10, 6)) for city in df['City'].unique(): city_data = df[df['City'] == city] plt.plot(city_data['Date'], city_data['AQI'], label=city, marker='o') plt.title('AQI Trends by City') plt.xlabel('Date') plt.ylabel('AQI Level') plt.legend() plt.xticks(rotation=45) plt.tight_layout() plt.savefig('aqi_trends.png') plt.close() # 2. Bar plot - Average AQI by City plt.figure(figsize=(10, 6)) city_avg = df.groupby('City')['AQI'].mean().sort_values(ascending=True) plt.bar(city_avg.index, city_avg.values) plt.title('Average AQI by City') plt.xlabel('City') plt.ylabel('Average AQI') plt.xticks(rotation=45) plt.tight_layout() plt.savefig('avg_aqi_by_city.png') plt.close() # 3. Heatmap - Correlation between pollutants plt.figure(figsize=(8, 6)) pollutants = ['AQI', 'PM2.5', 'PM10', 'NO2', 'SO2', 'CO', 'O3'] correlation = df[pollutants].corr() sns.heatmap(correlation, annot=True, cmap='coolwarm', center=0) plt.title('Correlation between Pollutants') plt.tight_layout() plt.savefig('pollutant_correlation.png') plt.close() # 4. Box plot - AQI Distribution plt.figure(figsize=(10, 6)) plt.boxplot([df[df['City'] == city]['AQI'] for city in df['City'].unique()], labels=df['City'].unique()) plt.title('AQI Distribution by City') plt.xlabel('City') plt.ylabel('AQI') plt.xticks(rotation=45) plt.tight_layout() plt.savefig('aqi_distribution.png') plt.close() def generate_summary(df): """Generate a simple summary of the AQI data""" summary = df.groupby('City').agg({ 'AQI': ['mean', 'max', 'min'], 'PM2.5': 'mean', 'PM10': 'mean' }).round(2) summary.columns = ['Average AQI', 'Maximum AQI', 'Minimum AQI', 'Average PM2.5', 'Average PM10'] return summary def main(): try: # Load your CSV file df = load_and_clean_data('aqi_data.csv') # Replace with your CSV file name # Create visualizations create_visualizations(df) # Generate and save summary summary = generate_summary(df) summary.to_csv('aqi_summary.csv') print("Analysis complete! Generated files:") print("1. aqi_trends.png - AQI trends over time") print("2. avg_aqi_by_city.png - Average AQI comparison") print("3. pollutant_correlation.png - Correlation heatmap") print("4. aqi_distribution.png - AQI distribution boxplot") print("5. aqi_summary.csv - Statistical summary") except Exception as e: print(f"An error occurred: {str(e)}") if __name__ == "__main__": main()
Leave a Comment