Untitled
unknown
python
2 years ago
3.7 kB
2
Indexable
import streamlit as st import pandas as pd import altair as alt import numpy as np from datetime import datetime from datetime import date from vega_datasets import data st.header("Covid-19 Statistics") @st.cache(allow_output_mutation=True) def load_data(): return pd.read_csv("covidcast-jhu-csse-confirmed_7dav_incidence_num-2020-03-01-to-2022-10-10.csv") # construct maps for state id/name df_state_id = pd.read_csv('state.txt', sep='|') map_state_id = {row['STUSAB']: row['STATE'] for _, row in df_state_id.iterrows()} map_state_name = {row['STUSAB']: row['STATE_NAME'] for _, row in df_state_id.iterrows()} # load covid 19 data df = load_data() # clean data df.loc[df['value'] < 0, 'value'] = 0 # new columns: id and state names df['id'] = df['geo_value'].str.upper().apply(lambda x: map_state_id[x]) df['state'] = df['geo_value'].str.upper().apply(lambda x: map_state_name[x]) df.rename(columns={'value':'cases'}, inplace=True) # date (np array) df_dates = df['time_value'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d').date()).tolist() df_dates = np.array(df_dates) ##### First choice ##### if st.checkbox("Want to see the Raw Data?"): st.write(df) # labels of all 1's labels = pd.Series([1] * len(df), index=df.index) ##### Second choice ##### states_select = st.multiselect('Interested in specific states?', df['geo_value'].str.upper().unique()) # fliter labels with selected states if states_select: labels &= df['geo_value'].isin(states_select) ##### Third choice ##### min_date = min(df_dates) max_date = max(df_dates) chosen_time_period = st.slider('Specify a period of time!', min_value=min_date, max_value=max_date, value=(min_date, max_date), ) # fliter labels with selected time period labels &= (df_dates > chosen_time_period[0]) & (df_dates < chosen_time_period[1]) st.write("Click on the states you are interested in.") state_multi = alt.selection_multi(fields=['state']) states = alt.topo_feature(data.us_10m.url, feature='states') df_group_by_state = df[labels].groupby(by='geo_value').agg(cases = ('cases', 'mean'), id = ('id', pd.Series.mode), state = ('state', pd.Series.mode)) ##### First chart: map ##### map_chart = alt.Chart( states, title="Average numbers of covid-19 comfirmed cases in the chosen time" ).mark_geoshape().encode( color='cases:Q', opacity=alt.condition(state_multi, alt.value(1), alt.value(0.2)), tooltip=['state:N', 'cases:Q'], ).transform_lookup( lookup='id', from_=alt.LookupData(df_group_by_state, 'id', ['cases', 'state']) ).properties( width=400, height=240 ).project( type='albersUsa' ).add_selection(state_multi) ##### Second chart: bar ##### bar_chart = alt.Chart(df_group_by_state.nlargest(15, 'cases')).mark_bar().encode( x=alt.X('cases'), y=alt.Y('state', sort='-x'), color='cases', opacity=alt.condition(state_multi, alt.value(1), alt.value(0.2)), ).properties( width=100, height=240, ).add_selection(state_multi) ##### Third chart: line ##### value_chart = alt.Chart( df[labels], title='Average confirmed cases in 7-day', ).mark_line().encode( x = alt.X('time_value'), y = alt.Y('cases'), color = 'state', opacity=alt.condition(state_multi, alt.value(1), alt.value(0.1)), tooltip=['state'], ).properties( width=550, height=300 ).add_selection(state_multi) st.altair_chart((map_chart | bar_chart) & value_chart)
Editor is loading...