Untitled
unknown
python
3 years ago
3.7 kB
13
Indexable
import streamlit as st
import pandas as pd
import altair as alt
import numpy as np
from datetime import datetime
from datetime import date
from vega_datasets import data
st.header("Covid-19 Statistics")
@st.cache(allow_output_mutation=True)
def load_data():
return pd.read_csv("covidcast-jhu-csse-confirmed_7dav_incidence_num-2020-03-01-to-2022-10-10.csv")
# construct maps for state id/name
df_state_id = pd.read_csv('state.txt', sep='|')
map_state_id = {row['STUSAB']: row['STATE'] for _, row in df_state_id.iterrows()}
map_state_name = {row['STUSAB']: row['STATE_NAME'] for _, row in df_state_id.iterrows()}
# load covid 19 data
df = load_data()
# clean data
df.loc[df['value'] < 0, 'value'] = 0
# new columns: id and state names
df['id'] = df['geo_value'].str.upper().apply(lambda x: map_state_id[x])
df['state'] = df['geo_value'].str.upper().apply(lambda x: map_state_name[x])
# date (np array)
df_dates = df['time_value'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d').date()).tolist()
df_dates = np.array(df_dates)
##### First choice #####
if st.checkbox("Want to see the Raw Data?"):
st.write(df)
# labels of all 1's
labels = pd.Series([1] * len(df), index=df.index)
##### Second choice #####
states_select = st.multiselect('Interested in specific states?', df['geo_value'].unique())
# fliter labels with selected states
if states_select:
labels &= df['geo_value'].isin(states_select)
##### Third choice #####
min_date = min(df_dates)
max_date = max(df_dates)
chosen_time_period = st.slider('Specify a period of time!',
min_value=min_date,
max_value=max_date,
value=(min_date, max_date),
)
# fliter labels with selected time period
labels &= (df_dates > chosen_time_period[0]) & (df_dates < chosen_time_period[1])
st.write("Click on the states you are interested in.")
state_multi = alt.selection_multi(fields=['state'])
states = alt.topo_feature(data.us_10m.url, feature='states')
df_group_by_state = df[labels].groupby(by='geo_value').agg(value = ('value', 'mean'),
id = ('id', pd.Series.mode),
state = ('state', pd.Series.mode))
##### First chart: map #####
map_chart = alt.Chart(
states,
title="Average numbers of covid-19 comfirmed cases in the chosen time"
).mark_geoshape().encode(
color='value:Q',
opacity=alt.condition(state_multi, alt.value(1), alt.value(0.2)),
tooltip=['state:N', 'value:Q'],
).transform_lookup(
lookup='id',
from_=alt.LookupData(df_group_by_state, 'id', ['value', 'state'])
).properties(
width=400,
height=240
).project(
type='albersUsa'
).add_selection(state_multi)
##### Second chart: bar #####
bar_chart = alt.Chart(df_group_by_state.nlargest(15, 'value')).mark_bar().encode(
x=alt.X('value'),
y=alt.Y('state', sort='-x'),
color='value',
opacity=alt.condition(state_multi, alt.value(1), alt.value(0.2)),
).properties(
width=100,
height=240,
).add_selection(state_multi)
##### Third chart: line #####
value_chart = alt.Chart(
df[labels],
title='Average confirmed cases in 7-day',
).mark_line().encode(
x = alt.X('time_value'),
y = alt.Y('value'),
color = 'state',
opacity=alt.condition(state_multi, alt.value(1), alt.value(0.1)),
tooltip=['state'],
).properties(
width=550,
height=300
).add_selection(state_multi)
st.altair_chart((map_chart | bar_chart) & value_chart)
Editor is loading...