Untitled
unknown
plain_text
25 days ago
902 B
1
Indexable
Never
thirtymins_dfs = [] for file in sorted(glob('30*.csv')): print(file) nm = file.split('-')[1].split('.csv')[0] data = pd.read_csv(file) data['file'] = nm for col_name in ['ASA','Avg Wait','Avg Handle','Avg Talk','Avg Hold','Avg ACW']: data[col_name] = pd.to_timedelta(data[col_name]).apply(lambda x: x.total_seconds() * 1e3) data[col_name] = data[col_name].astype('float') thirtymins_dfs.append(data) # all non-null ASA and other timed columns are in hh:mm:ss.mmmm, not milliseconds # for each, quickly check if any duplicate date rows for df in thirtymins_dfs: print(df['file'][0]) print(df['Interval Start'].nunique()) print('----') print(len(df['Interval Start'])) print('----') print(df[df.duplicated(['Interval Start'],keep=False)]) # so no direct duplicates against the data
Leave a Comment