Untitled
import pandas as pd import streamlit as st import matplotlib.pyplot as plt import matplotlib.patches as mpatches @st.cache_data def load_data(json_file): df = pd.read_json(json_file) df['CRASH_DATE'] = pd.to_datetime(df['CRASH_DATE']) return df @st.cache_data def get_top_crash_locations(df, n=10): location_counts = df.groupby(['LATITUDE', 'LONGITUDE']).size().reset_index(name='COUNT') top_locations = location_counts.sort_values('COUNT', ascending=False).head(n) return top_locations def plot_top_crash_locations(df, top_locations): fig, ax = plt.subplots(figsize=(10, 10)) ax.scatter(df['LONGITUDE'], df['LATITUDE'], s=1, color='lightgray', alpha=0.1) for _, location in top_locations.iterrows(): radius = location['COUNT'] * 0.0001 circle = plt.Circle((location['LONGITUDE'], location['LATITUDE']), radius, color='#FF0000', fill=False, linewidth=2) ax.add_artist(circle) ax.set_title('Top 10 Crash Locations in Chicago', size=16) ax.set_xlabel('Longitude', size=12) ax.set_ylabel('Latitude', size=12) ax.set_xlim(df['LONGITUDE'].min(), df['LONGITUDE'].max()) ax.set_ylim(df['LATITUDE'].min(), df['LATITUDE'].max()) legend_element = [plt.Line2D([0], [0], marker='o', color='w', label='Crash Count', markerfacecolor='#FF0000', markersize=10, markeredgecolor='#FF0000', markeredgewidth=2)] ax.legend(handles=legend_element, loc='upper right') plt.tight_layout() return fig def update_charts(df): fig1, ax1 = plt.subplots(figsize=(15, 10)) streets = df['STREET_NAME'].value_counts().head(15) streets.plot(kind='barh', ax=ax1) ax1.set_xlabel('Collisions', size=20) ax1.set_ylabel('Street', size=20) ax1.set_title('Total Number of Collisions by Street', size=20) ax1.tick_params(axis='both', which='major', labelsize=15) for i, v in enumerate(streets): ax1.text(v, i, str(v), va='center', fontsize=12) fig2, ax2 = plt.subplots(figsize=(15, 10)) killed_df = df[df['INJURIES_FATAL'] > 0] injured_df = df[(df['INJURIES_TOTAL'] > 0) & (df['INJURIES_FATAL'] == 0)] nothing_df = df[(df['INJURIES_FATAL'] == 0) & (df['INJURIES_TOTAL'] == 0)] ax2.scatter(nothing_df['LONGITUDE'], nothing_df['LATITUDE'], alpha=0.04, s=1, color='blue') ax2.scatter(injured_df['LONGITUDE'], injured_df['LATITUDE'], alpha=0.1, s=1, color='yellow') ax2.scatter(killed_df['LONGITUDE'], killed_df['LATITUDE'], color='red', s=5) blue_patch = mpatches.Patch(label='car body damage', alpha=0.2, color='blue') yellow_patch = mpatches.Patch(color='yellow', label='personal injury', alpha=0.5) red_patch = mpatches.Patch(color='red', label='fatal accidents') ax2.legend([blue_patch, yellow_patch, red_patch], ('car body damage', 'personal injury', 'fatal accidents'), loc='upper left', prop={'size': 20}) ax2.set_title('Severity of Motor Vehicle Collisions in Chicago', size=20) ax2.set_xlim((-87.92, -87.52)) ax2.set_ylim((41.64, 42.03)) ax2.set_xlabel('Longitude', size=20) ax2.set_ylabel('Latitude', size=20) fig3, ax3 = plt.subplots(figsize=(15, 10)) street_names = df['STREET_NAME'].value_counts().head(10).index.tolist() ax3.scatter(df['LONGITUDE'], df['LATITUDE'], s=1, color='darkseagreen', alpha=0.1) colors = ['red', 'blue', 'magenta', 'orange', 'yellow', 'purple', 'black', 'chartreuse', 'brown', 'darkgreen'] for i, street in enumerate(street_names): street_df = df[df['STREET_NAME'] == street] ax3.scatter(street_df['LONGITUDE'], street_df['LATITUDE'], s=2, color=colors[i], label=street, alpha=0.5) ax3.legend(loc='upper left', prop={'size': 12}) ax3.set_title('Vehicle Collisions in Chicago - Top 10 Streets', size=20) ax3.set_xlim((-87.92, -87.52)) ax3.set_ylim((41.64, 42.03)) ax3.set_xlabel('Longitude', size=20) ax3.set_ylabel('Latitude', size=20) fig4, ax4 = plt.subplots(figsize=(15, 10)) by_hour = df['CRASH_HOUR'].value_counts().sort_index() colors = ['g', '0.75', 'y', 'k', 'b', 'r'] * 4 by_hour.plot(kind='barh', ax=ax4, color=colors[:len(by_hour)]) ax4.set_xlabel('Collisions', size=20) ax4.set_ylabel('Hour of the Day', size=20) ax4.set_title('Total Number of Collisions by Hour of the Day', size=20) full_hours = range(24) full_labels = [f"{h:02d}" for h in full_hours] ax4.set_yticks(full_hours) ax4.set_yticklabels(full_labels, fontsize=15) ax4.set_ylim(-0.5, 23.5) ax4.set_xlim(0, max(by_hour) * 1.1) for i, v in enumerate(by_hour): ax4.text(v, i, str(v), va='center', fontsize=12) fig5, ax5 = plt.subplots(figsize=(15, 10)) by_day = df['CRASH_DAY_OF_WEEK'].value_counts().sort_index() colors = ['g', '0.75', 'y', 'k', 'b', 'r', 'c'] by_day.plot(kind='barh', ax=ax5, color=colors[:len(by_day)]) ax5.set_xlabel('Collisions', size=20) ax5.set_ylabel('Day of the Week', size=20) ax5.set_title('Total Number of Collisions by Day of the Week', size=20) full_days = range(1, 8) day_labels = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] ax5.set_yticks(full_days) ax5.set_yticklabels(day_labels, fontsize=15) ax5.set_ylim(0.5, 7.5) ax5.set_xlim(0, max(by_day) * 1.1) for i, v in enumerate(by_day): ax5.text(v, i+1, str(v), va='center', fontsize=12) return fig1, fig2, fig3, fig4, fig5 st.set_page_config(layout="wide") # Load the data from JSON json_file = 'chicago_crashes.json' # Update with the actual path collisions_df = load_data(json_file) # Sidebar for navigation st.sidebar.title("Navigation") analysis_type = st.sidebar.radio("Choose Analysis", ["Top 10 Crash Locations", "Monthly Trends"]) if analysis_type == "Top 10 Crash Locations": st.title('Chicago Traffic Crashes Analysis - Top 10 Crash Locations') # Get top 10 crash locations top_locations = get_top_crash_locations(collisions_df) # Display the top locations data st.subheader("Top 10 Crash Locations Data") st.dataframe(top_locations) # Plot the map fig = plot_top_crash_locations(collisions_df, top_locations) col1, col2, col3 = st.columns([1, 3, 1]) with col2: st.pyplot(fig) st.info("The size of the circles represents the count of crashes at each location.") elif analysis_type == "Monthly Trends": st.title('Chicago Traffic Crashes Analysis - Monthly Trends') # Update the charts with the data fig1, fig2, fig3, fig4, fig5 = update_charts(collisions_df) # Display the charts with st.container(): col1, col2 = st.columns(2) col1.pyplot(fig3) col2.pyplot(fig2) with st.container(): col3, col4 = st.columns(2) col3.pyplot(fig1) col4.pyplot(fig5) st.pyplot(fig4) # Display some overall statistics st.sidebar.subheader("Overall Statistics") st.sidebar.write(f"Total number of crashes: {len(collisions_df)}") st.sidebar.write(f"Date range: {collisions_df['CRASH_DATE'].min().date()} to {collisions_df['CRASH_DATE'].max().date()}")
Leave a Comment