Untitled
unknown
plain_text
a year ago
3.2 kB
14
Indexable
import pandas as pd
import matplotlib.pyplot as plt
def load_and_clean_data(file_path):
"""Load and prepare AQI data"""
df = pd.read_csv(file_path)
if 'Date' in df.columns:
df['Date'] = pd.to_datetime(df['Date'])
return df
def create_visualizations(df):
"""Create multiple visualizations for AQI analysis"""
# Use default style
plt.style.use('default')
# 1. Line plot - AQI Trends
plt.figure(figsize=(10, 6))
for city in df['City'].unique():
city_data = df[df['City'] == city]
plt.plot(city_data['Date'], city_data['AQI'], label=city, marker='o')
plt.title('AQI Trends by City')
plt.xlabel('Date')
plt.ylabel('AQI Level')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('aqi_trends.png')
plt.close()
# 2. Bar plot - Average AQI by City
plt.figure(figsize=(10, 6))
city_avg = df.groupby('City')['AQI'].mean().sort_values(ascending=True)
plt.bar(city_avg.index, city_avg.values)
plt.title('Average AQI by City')
plt.xlabel('City')
plt.ylabel('Average AQI')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('avg_aqi_by_city.png')
plt.close()
# 3. Heatmap - Correlation between pollutants
plt.figure(figsize=(8, 6))
pollutants = ['AQI', 'PM2.5', 'PM10', 'NO2', 'SO2', 'CO', 'O3']
correlation = df[pollutants].corr()
sns.heatmap(correlation, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation between Pollutants')
plt.tight_layout()
plt.savefig('pollutant_correlation.png')
plt.close()
# 4. Box plot - AQI Distribution
plt.figure(figsize=(10, 6))
plt.boxplot([df[df['City'] == city]['AQI'] for city in df['City'].unique()],
labels=df['City'].unique())
plt.title('AQI Distribution by City')
plt.xlabel('City')
plt.ylabel('AQI')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('aqi_distribution.png')
plt.close()
def generate_summary(df):
"""Generate a simple summary of the AQI data"""
summary = df.groupby('City').agg({
'AQI': ['mean', 'max', 'min'],
'PM2.5': 'mean',
'PM10': 'mean'
}).round(2)
summary.columns = ['Average AQI', 'Maximum AQI', 'Minimum AQI',
'Average PM2.5', 'Average PM10']
return summary
def main():
try:
# Load your CSV file
df = load_and_clean_data('aqi_data.csv') # Replace with your CSV file name
# Create visualizations
create_visualizations(df)
# Generate and save summary
summary = generate_summary(df)
summary.to_csv('aqi_summary.csv')
print("Analysis complete! Generated files:")
print("1. aqi_trends.png - AQI trends over time")
print("2. avg_aqi_by_city.png - Average AQI comparison")
print("3. pollutant_correlation.png - Correlation heatmap")
print("4. aqi_distribution.png - AQI distribution boxplot")
print("5. aqi_summary.csv - Statistical summary")
except Exception as e:
print(f"An error occurred: {str(e)}")
if __name__ == "__main__":
main()Editor is loading...
Leave a Comment