Untitled
unknown
python
3 years ago
3.7 kB
5
Indexable
import streamlit as st
import pandas as pd
import altair as alt
import numpy as np
from datetime import datetime
from datetime import date
from vega_datasets import data
st.header("Covid-19 Statistics")
@st.cache(allow_output_mutation=True)
def load_data():
return pd.read_csv("covidcast-jhu-csse-confirmed_7dav_incidence_num-2020-03-01-to-2022-10-10.csv")
# construct maps for state id/name
df_state_id = pd.read_csv('state.txt', sep='|')
map_state_id = {row['STUSAB']: row['STATE'] for _, row in df_state_id.iterrows()}
map_state_name = {row['STUSAB']: row['STATE_NAME'] for _, row in df_state_id.iterrows()}
# load covid 19 data
df = load_data()
# clean data
df.loc[df['value'] < 0, 'value'] = 0
# new columns: id and state names
df['id'] = df['geo_value'].str.upper().apply(lambda x: map_state_id[x])
df['state'] = df['geo_value'].str.upper().apply(lambda x: map_state_name[x])
df.rename(columns={'value':'cases'}, inplace=True)
# date (np array)
df_dates = df['time_value'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d').date()).tolist()
df_dates = np.array(df_dates)
##### First choice #####
if st.checkbox("Want to see the Raw Data?"):
st.write(df)
# labels of all 1's
labels = pd.Series([1] * len(df), index=df.index)
##### Second choice #####
states_select = st.multiselect('Interested in specific states?', df['geo_value'].str.upper().unique())
# fliter labels with selected states
if states_select:
labels &= df['geo_value'].isin(states_select)
##### Third choice #####
min_date = min(df_dates)
max_date = max(df_dates)
chosen_time_period = st.slider('Specify a period of time!',
min_value=min_date,
max_value=max_date,
value=(min_date, max_date),
)
# fliter labels with selected time period
labels &= (df_dates > chosen_time_period[0]) & (df_dates < chosen_time_period[1])
st.write("Click on the states you are interested in.")
state_multi = alt.selection_multi(fields=['state'])
states = alt.topo_feature(data.us_10m.url, feature='states')
df_group_by_state = df[labels].groupby(by='geo_value').agg(cases = ('cases', 'mean'),
id = ('id', pd.Series.mode),
state = ('state', pd.Series.mode))
##### First chart: map #####
map_chart = alt.Chart(
states,
title="Average numbers of covid-19 comfirmed cases in the chosen time"
).mark_geoshape().encode(
color='cases:Q',
opacity=alt.condition(state_multi, alt.value(1), alt.value(0.2)),
tooltip=['state:N', 'cases:Q'],
).transform_lookup(
lookup='id',
from_=alt.LookupData(df_group_by_state, 'id', ['cases', 'state'])
).properties(
width=400,
height=240
).project(
type='albersUsa'
).add_selection(state_multi)
##### Second chart: bar #####
bar_chart = alt.Chart(df_group_by_state.nlargest(15, 'cases')).mark_bar().encode(
x=alt.X('cases'),
y=alt.Y('state', sort='-x'),
color='cases',
opacity=alt.condition(state_multi, alt.value(1), alt.value(0.2)),
).properties(
width=100,
height=240,
).add_selection(state_multi)
##### Third chart: line #####
value_chart = alt.Chart(
df[labels],
title='Average confirmed cases in 7-day',
).mark_line().encode(
x = alt.X('time_value'),
y = alt.Y('cases'),
color = 'state',
opacity=alt.condition(state_multi, alt.value(1), alt.value(0.1)),
tooltip=['state'],
).properties(
width=550,
height=300
).add_selection(state_multi)
st.altair_chart((map_chart | bar_chart) & value_chart)
Editor is loading...