Untitled

 avatar
unknown
plain_text
a year ago
1.4 kB
11
Indexable
def _load_data(**kwargs):
    df = pd.read_csv('data/qrdata/ebola_survey.csv')
    return df

def _explore_data(df: pd.DataFrame, **kwargs):
    # 5 number summary
    five_num_summary = df['quarantine'].describe().values.tolist()
    
    # frequency counts 
    freq_counts = df['quarantine'].value_counts().values.tolist()
    
    # relative frequency counts
    rel_freq_counts = (df['quarantine'].value_counts(normalize=True)*100).values.tolist()
    
    ipdb.set_trace()
    return five_num_summary, freq_counts, rel_freq_counts

def _calc_standard_error(df: pd.DataFrame, **kwargs):
    std_error = df['quarantine'].std() / np.sqrt(len(df))
    std_error = round(std_error, 3)
    return std_error

step_dict = OrderedDict()
step_dict['step_load_data'] = SingletonStep(_load_data, {'version': "002"})
step_dict['step_explore_data'] = SingletonStep(_explore_data, {'version': "004", 'df': 'step_load_data'})
step_dict['step_calc_std_error'] = SingletonStep(_calc_standard_error, {'version': "002", 'df': 'step_load_data'})

run_metadata = conduct(CACHE_DIR, step_dict, LOG_DIR)

# Load the exploration result
output_step_explore_data = load_artifact_with_step_name(run_metadata, 'step_explore_data')
print("Five Number Summary: ", output_step_explore_data[0])
print("Frequency Counts: ", output_step_explore_data[1])
print("Relative Frequency Counts: ", output_step_explore_data[2])
Editor is loading...
Leave a Comment