Untitled
unknown
plain_text
5 months ago
3.9 kB
4
Indexable
data = pd.read_csv('/c01/home/lidbpmb/ML/Profitable Claim Prediction/Master_010120_to_280524.csv', ) data = data[data['Claim Status'].str.contains('Findings')].reset_index(drop=True) data['ML_Target_Variable'] = data['Claim Status'].replace('Closed No Findings', 0).replace('Closed Clinical Findings', 1) data = data.drop(remove, axis=1) data = data[~pd.isna(pd.to_datetime(data['ac8_adm_date'], errors='coerce'))].reset_index(drop=True) data = data[~pd.isna(pd.to_datetime(data['ac8_dcg_date'], errors='coerce'))].reset_index(drop=True) data = data[~pd.isna(pd.to_datetime(data['patnt_bdate'], errors='coerce'))].reset_index(drop=True) data = data[~pd.isna(data['paid'])].reset_index(drop=True) drg_mapping = pd.read_excel('Mapping Sheet - Payment Integrity.xlsx', sheet_name='DRG_CODE_2022') cc_mcc = {d[0]:d[1] for i, d in pd.read_excel('Mapping Sheet - Payment Integrity.xlsx', sheet_name='Diag_Code - MCC_CC Mapping').iterrows()} data = pd.merge(left=data, right=drg_mapping, left_on='Original DRG Code', right_on='MS-DRG') data.loc[data['MDC'].isna(), 'MDC'] = 'POST' split = StratifiedShuffleSplit(n_splits=1, test_size=0.25, random_state=42) for train_index, test_index in split.split(data, data['ML_Target_Variable']): train = data.loc[train_index] test = data.loc[test_index] train['pmt_recip_cd'].value_counts(normalize=True) def preprocess_function(data): #set minor groups as other data.loc[~data['DRG Group'].isin(['MCC/CC/Targeted', 'Sepsis', 'Neonatal', 'Other', 'Respiratory', 'CHF', 'CVA Stroke']), 'DRG Group'] = 'Other' data['Prepay'] = data['Prepay'].replace('Yes',1).replace('No', 0) data[['MDC','TYPE']] = data[['MDC','TYPE']].astype(str) data['Medicare'] = data['Plan Type'].apply(lambda x: 1 if x=='MEDICARE' else 0) data['ac8_adm_date'] = pd.to_datetime(data['ac8_adm_date'], errors='coerce') data['ac8_dcg_date'] = pd.to_datetime(data['ac8_dcg_date'], errors='coerce') data['admission_on_weekend'] = pd.to_datetime(data['ac8_adm_date'], errors='coerce').dt.day_of_week.apply(lambda x: 1 if (x==0 or x==1) else 0) data['discharge_on_weekend'] = pd.to_datetime(data['ac8_dcg_date'], errors='coerce').dt.day_of_week.apply(lambda x: 1 if (x==0 or x==1) else 0) data['stay_len'] = (data['ac8_dcg_date']-data['ac8_adm_date']).dt.days data['stay_len_w'] = (data['ac8_dcg_date']-data['ac8_adm_date']).dt.days//7 data['stay_len_m'] = (data['ac8_dcg_date']-data['ac8_adm_date']).dt.days//28 data['patnt_bdate'] = pd.to_datetime(data['patnt_bdate'], errors='coerce') data['Age'] = ((data['ac8_adm_date']-data['patnt_bdate']).dt.days/365).astype(int) data[['paid','charged','eligible']] = data[['paid','charged','eligible']].astype(float).clip(lower=1) data.loc[pd.isna(data['eligible']), 'eligible'] = data.loc[pd.isna(data['eligible']),'paid'] data['paid_eligable_diff'] = (data['eligible']-data['paid']).clip(lower=1) data['Infant'] = data['Age'].apply(lambda x: 1 if x<=2 else 0) data['stay_paid_w'] = data['paid'].astype('float') / (data['stay_len_w'].astype('float')+1) data['stay_paid_m'] = data['paid'].astype('float') / (data['stay_len_m'].astype('float')+1) data['clm_proc_loc'] = data['clm_proc_loc'].astype(str) data['clm_typ'] = data['clm_typ'].apply(lambda x:1 if x=='I' else 0) data['clm_stat_cd'] = data['clm_stat_cd'].apply(lambda x:1 if x=='A' else 0) data['admit_cc_mcc'] = data['admit_diag_cd'].apply(lambda x: cc_mcc[x] if x in cc_mcc.keys() else 0) data['admit_cc_mcc'] = data['admit_cc_mcc'].replace('CC', 1).replace('MCC',2).astype(float) return data
Editor is loading...
Leave a Comment