Untitled

mail@pastecode.io avatar
unknown
python
4 months ago
7.4 kB
3
Indexable


import pandas as pd
import streamlit as st
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

@st.cache_data
def load_data(json_file):
    df = pd.read_json(json_file)
    df['CRASH_DATE'] = pd.to_datetime(df['CRASH_DATE'])
    return df

@st.cache_data
def get_top_crash_locations(df, n=10):
    location_counts = df.groupby(['LATITUDE', 'LONGITUDE']).size().reset_index(name='COUNT')
    top_locations = location_counts.sort_values('COUNT', ascending=False).head(n)
    return top_locations

def plot_top_crash_locations(df, top_locations):
    fig, ax = plt.subplots(figsize=(10, 10))
    ax.scatter(df['LONGITUDE'], df['LATITUDE'], s=1, color='lightgray', alpha=0.1)

    for _, location in top_locations.iterrows():
        radius = location['COUNT'] * 0.0001
        circle = plt.Circle((location['LONGITUDE'], location['LATITUDE']), radius,
                            color='#FF0000', fill=False, linewidth=2)
        ax.add_artist(circle)

    ax.set_title('Top 10 Crash Locations in Chicago', size=16)
    ax.set_xlabel('Longitude', size=12)
    ax.set_ylabel('Latitude', size=12)
    ax.set_xlim(df['LONGITUDE'].min(), df['LONGITUDE'].max())
    ax.set_ylim(df['LATITUDE'].min(), df['LATITUDE'].max())
    legend_element = [plt.Line2D([0], [0], marker='o', color='w', label='Crash Count',
                                 markerfacecolor='#FF0000', markersize=10, markeredgecolor='#FF0000', markeredgewidth=2)]
    ax.legend(handles=legend_element, loc='upper right')
    plt.tight_layout()
    return fig

def update_charts(df):
    fig1, ax1 = plt.subplots(figsize=(15, 10))
    streets = df['STREET_NAME'].value_counts().head(15)
    streets.plot(kind='barh', ax=ax1)
    ax1.set_xlabel('Collisions', size=20)
    ax1.set_ylabel('Street', size=20)
    ax1.set_title('Total Number of Collisions by Street', size=20)
    ax1.tick_params(axis='both', which='major', labelsize=15)
    for i, v in enumerate(streets):
        ax1.text(v, i, str(v), va='center', fontsize=12)

    fig2, ax2 = plt.subplots(figsize=(15, 10))
    killed_df = df[df['INJURIES_FATAL'] > 0]
    injured_df = df[(df['INJURIES_TOTAL'] > 0) & (df['INJURIES_FATAL'] == 0)]
    nothing_df = df[(df['INJURIES_FATAL'] == 0) & (df['INJURIES_TOTAL'] == 0)]

    ax2.scatter(nothing_df['LONGITUDE'], nothing_df['LATITUDE'], alpha=0.04, s=1, color='blue')
    ax2.scatter(injured_df['LONGITUDE'], injured_df['LATITUDE'], alpha=0.1, s=1, color='yellow')
    ax2.scatter(killed_df['LONGITUDE'], killed_df['LATITUDE'], color='red', s=5)

    blue_patch = mpatches.Patch(label='car body damage', alpha=0.2, color='blue')
    yellow_patch = mpatches.Patch(color='yellow', label='personal injury', alpha=0.5)
    red_patch = mpatches.Patch(color='red', label='fatal accidents')
    
    ax2.legend([blue_patch, yellow_patch, red_patch], 
               ('car body damage', 'personal injury', 'fatal accidents'),
               loc='upper left', prop={'size': 20})
    
    ax2.set_title('Severity of Motor Vehicle Collisions in Chicago', size=20)
    ax2.set_xlim((-87.92, -87.52))
    ax2.set_ylim((41.64, 42.03))
    ax2.set_xlabel('Longitude', size=20)
    ax2.set_ylabel('Latitude', size=20)

    fig3, ax3 = plt.subplots(figsize=(15, 10))
    street_names = df['STREET_NAME'].value_counts().head(10).index.tolist()
    
    ax3.scatter(df['LONGITUDE'], df['LATITUDE'], s=1, color='darkseagreen', alpha=0.1)
    
    colors = ['red', 'blue', 'magenta', 'orange', 'yellow', 'purple', 'black', 'chartreuse', 'brown', 'darkgreen']
    for i, street in enumerate(street_names):
        street_df = df[df['STREET_NAME'] == street]
        ax3.scatter(street_df['LONGITUDE'], street_df['LATITUDE'], s=2, color=colors[i], label=street, alpha=0.5)
    
    ax3.legend(loc='upper left', prop={'size': 12})
    ax3.set_title('Vehicle Collisions in Chicago - Top 10 Streets', size=20)
    ax3.set_xlim((-87.92, -87.52))
    ax3.set_ylim((41.64, 42.03))
    ax3.set_xlabel('Longitude', size=20)
    ax3.set_ylabel('Latitude', size=20)

    fig4, ax4 = plt.subplots(figsize=(15, 10))
    by_hour = df['CRASH_HOUR'].value_counts().sort_index()
    colors = ['g', '0.75', 'y', 'k', 'b', 'r'] * 4
    by_hour.plot(kind='barh', ax=ax4, color=colors[:len(by_hour)])
    ax4.set_xlabel('Collisions', size=20)
    ax4.set_ylabel('Hour of the Day', size=20)
    ax4.set_title('Total Number of Collisions by Hour of the Day', size=20)
    
    full_hours = range(24)
    full_labels = [f"{h:02d}" for h in full_hours]
    ax4.set_yticks(full_hours)
    ax4.set_yticklabels(full_labels, fontsize=15)
    
    ax4.set_ylim(-0.5, 23.5)
    ax4.set_xlim(0, max(by_hour) * 1.1)
    
    for i, v in enumerate(by_hour):
        ax4.text(v, i, str(v), va='center', fontsize=12)

    fig5, ax5 = plt.subplots(figsize=(15, 10))
    by_day = df['CRASH_DAY_OF_WEEK'].value_counts().sort_index()
    colors = ['g', '0.75', 'y', 'k', 'b', 'r', 'c']
    by_day.plot(kind='barh', ax=ax5, color=colors[:len(by_day)])
    ax5.set_xlabel('Collisions', size=20)
    ax5.set_ylabel('Day of the Week', size=20)
    ax5.set_title('Total Number of Collisions by Day of the Week', size=20)
    
    full_days = range(1, 8)
    day_labels = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    ax5.set_yticks(full_days)
    ax5.set_yticklabels(day_labels, fontsize=15)
    
    ax5.set_ylim(0.5, 7.5)
    ax5.set_xlim(0, max(by_day) * 1.1)
    
    for i, v in enumerate(by_day):
        ax5.text(v, i+1, str(v), va='center', fontsize=12)

    return fig1, fig2, fig3, fig4, fig5

st.set_page_config(layout="wide")

# Load the data from JSON
json_file = 'chicago_crashes.json'  # Update with the actual path
collisions_df = load_data(json_file)

# Sidebar for navigation
st.sidebar.title("Navigation")
analysis_type = st.sidebar.radio("Choose Analysis", ["Top 10 Crash Locations", "Monthly Trends"])

if analysis_type == "Top 10 Crash Locations":
    st.title('Chicago Traffic Crashes Analysis - Top 10 Crash Locations')
    
    # Get top 10 crash locations
    top_locations = get_top_crash_locations(collisions_df)
    
    # Display the top locations data
    st.subheader("Top 10 Crash Locations Data")
    st.dataframe(top_locations)
    
    # Plot the map
    fig = plot_top_crash_locations(collisions_df, top_locations)
    col1, col2, col3 = st.columns([1, 3, 1])
    with col2:
        st.pyplot(fig)
    
    st.info("The size of the circles represents the count of crashes at each location.")

elif analysis_type == "Monthly Trends":
    st.title('Chicago Traffic Crashes Analysis - Monthly Trends')
    
    # Update the charts with the data
    fig1, fig2, fig3, fig4, fig5 = update_charts(collisions_df)

    # Display the charts
    with st.container():
        col1, col2 = st.columns(2)
        col1.pyplot(fig3)
        col2.pyplot(fig2)

    with st.container():
        col3, col4 = st.columns(2)
        col3.pyplot(fig1)
        col4.pyplot(fig5)

    st.pyplot(fig4)

# Display some overall statistics
st.sidebar.subheader("Overall Statistics")
st.sidebar.write(f"Total number of crashes: {len(collisions_df)}")
st.sidebar.write(f"Date range: {collisions_df['CRASH_DATE'].min().date()} to {collisions_df['CRASH_DATE'].max().date()}")
Leave a Comment