Untitled
unknown
plain_text
a year ago
1.4 kB
13
Indexable
def _load_data(**kwargs):
df = pd.read_csv('data/qrdata/ebola_survey.csv')
return df
def _explore_data(df: pd.DataFrame, **kwargs):
# 5 number summary
five_num_summary = df['quarantine'].describe().values.tolist()
# frequency counts
freq_counts = df['quarantine'].value_counts().values.tolist()
# relative frequency counts
rel_freq_counts = (df['quarantine'].value_counts(normalize=True)*100).values.tolist()
ipdb.set_trace()
return five_num_summary, freq_counts, rel_freq_counts
def _calc_standard_error(df: pd.DataFrame, **kwargs):
std_error = df['quarantine'].std() / np.sqrt(len(df))
std_error = round(std_error, 3)
return std_error
step_dict = OrderedDict()
step_dict['step_load_data'] = SingletonStep(_load_data, {'version': "002"})
step_dict['step_explore_data'] = SingletonStep(_explore_data, {'version': "004", 'df': 'step_load_data'})
step_dict['step_calc_std_error'] = SingletonStep(_calc_standard_error, {'version': "002", 'df': 'step_load_data'})
run_metadata = conduct(CACHE_DIR, step_dict, LOG_DIR)
# Load the exploration result
output_step_explore_data = load_artifact_with_step_name(run_metadata, 'step_explore_data')
print("Five Number Summary: ", output_step_explore_data[0])
print("Frequency Counts: ", output_step_explore_data[1])
print("Relative Frequency Counts: ", output_step_explore_data[2])Editor is loading...
Leave a Comment