Untitled

 avatar
unknown
python
2 years ago
3.7 kB
2
Indexable
import streamlit as st
import pandas as pd
import altair as alt
import numpy as np
from datetime import datetime
from datetime import date
from vega_datasets import data

st.header("Covid-19 Statistics")

@st.cache(allow_output_mutation=True)
def load_data():
    return pd.read_csv("covidcast-jhu-csse-confirmed_7dav_incidence_num-2020-03-01-to-2022-10-10.csv")

# construct maps for state id/name
df_state_id = pd.read_csv('state.txt', sep='|')
map_state_id = {row['STUSAB']: row['STATE'] for _, row in df_state_id.iterrows()}
map_state_name = {row['STUSAB']: row['STATE_NAME'] for _, row in df_state_id.iterrows()}

# load covid 19 data
df = load_data()

# clean data
df.loc[df['value'] < 0, 'value'] = 0

# new columns: id and state names
df['id'] = df['geo_value'].str.upper().apply(lambda x: map_state_id[x])
df['state'] = df['geo_value'].str.upper().apply(lambda x: map_state_name[x])
df.rename(columns={'value':'cases'}, inplace=True)

# date (np array)
df_dates = df['time_value'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d').date()).tolist()
df_dates = np.array(df_dates)

##### First choice #####
if st.checkbox("Want to see the Raw Data?"):
    st.write(df)

# labels of all 1's
labels = pd.Series([1] * len(df), index=df.index)

##### Second choice #####
states_select = st.multiselect('Interested in specific states?', df['geo_value'].str.upper().unique())

# fliter labels with selected states
if states_select:
    labels &= df['geo_value'].isin(states_select)


##### Third choice #####
min_date = min(df_dates)
max_date = max(df_dates)
chosen_time_period = st.slider('Specify a period of time!',
                       min_value=min_date,
                       max_value=max_date,
                       value=(min_date, max_date),
                       )

# fliter labels with selected time period
labels &= (df_dates > chosen_time_period[0]) & (df_dates < chosen_time_period[1])



st.write("Click on the states you are interested in.")

state_multi = alt.selection_multi(fields=['state'])

states = alt.topo_feature(data.us_10m.url, feature='states')

df_group_by_state = df[labels].groupby(by='geo_value').agg(cases = ('cases', 'mean'),
                                                           id = ('id', pd.Series.mode),
                                                           state = ('state', pd.Series.mode))

##### First chart: map #####
map_chart = alt.Chart(
    states,
    title="Average numbers of covid-19 comfirmed cases in the chosen time"
).mark_geoshape().encode(
    color='cases:Q',
    opacity=alt.condition(state_multi, alt.value(1), alt.value(0.2)),
    tooltip=['state:N', 'cases:Q'],
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(df_group_by_state, 'id', ['cases', 'state'])
).properties(
    width=400,
    height=240
).project(
    type='albersUsa'
).add_selection(state_multi)



##### Second chart: bar #####
bar_chart = alt.Chart(df_group_by_state.nlargest(15, 'cases')).mark_bar().encode(
        x=alt.X('cases'),
        y=alt.Y('state', sort='-x'),
        color='cases',
        opacity=alt.condition(state_multi, alt.value(1), alt.value(0.2)),
    ).properties(
            width=100,
            height=240,
    ).add_selection(state_multi)


##### Third chart: line #####
value_chart = alt.Chart(
        df[labels],
        title='Average confirmed cases in 7-day',
    ).mark_line().encode(  
        x = alt.X('time_value'),
        y = alt.Y('cases'),
        color = 'state',
        opacity=alt.condition(state_multi, alt.value(1), alt.value(0.1)),
        tooltip=['state'],
    ).properties(
        width=550,
        height=300
    ).add_selection(state_multi)

st.altair_chart((map_chart | bar_chart) & value_chart)





Editor is loading...