Untitled
unknown
plain_text
a year ago
3.9 kB
7
Indexable
data = pd.read_csv('/c01/home/lidbpmb/ML/Profitable Claim Prediction/Master_010120_to_280524.csv', )
data = data[data['Claim Status'].str.contains('Findings')].reset_index(drop=True)
data['ML_Target_Variable'] = data['Claim Status'].replace('Closed No Findings', 0).replace('Closed Clinical Findings', 1)
data = data.drop(remove, axis=1)
data = data[~pd.isna(pd.to_datetime(data['ac8_adm_date'], errors='coerce'))].reset_index(drop=True)
data = data[~pd.isna(pd.to_datetime(data['ac8_dcg_date'], errors='coerce'))].reset_index(drop=True)
data = data[~pd.isna(pd.to_datetime(data['patnt_bdate'], errors='coerce'))].reset_index(drop=True)
data = data[~pd.isna(data['paid'])].reset_index(drop=True)
drg_mapping = pd.read_excel('Mapping Sheet - Payment Integrity.xlsx', sheet_name='DRG_CODE_2022')
cc_mcc = {d[0]:d[1] for i, d in pd.read_excel('Mapping Sheet - Payment Integrity.xlsx', sheet_name='Diag_Code - MCC_CC Mapping').iterrows()}
data = pd.merge(left=data, right=drg_mapping, left_on='Original DRG Code', right_on='MS-DRG')
data.loc[data['MDC'].isna(), 'MDC'] = 'POST'
split = StratifiedShuffleSplit(n_splits=1, test_size=0.25, random_state=42)
for train_index, test_index in split.split(data, data['ML_Target_Variable']):
train = data.loc[train_index]
test = data.loc[test_index]
train['pmt_recip_cd'].value_counts(normalize=True)
def preprocess_function(data):
#set minor groups as other
data.loc[~data['DRG Group'].isin(['MCC/CC/Targeted', 'Sepsis', 'Neonatal', 'Other', 'Respiratory', 'CHF',
'CVA Stroke']), 'DRG Group'] = 'Other'
data['Prepay'] = data['Prepay'].replace('Yes',1).replace('No', 0)
data[['MDC','TYPE']] = data[['MDC','TYPE']].astype(str)
data['Medicare'] = data['Plan Type'].apply(lambda x: 1 if x=='MEDICARE' else 0)
data['ac8_adm_date'] = pd.to_datetime(data['ac8_adm_date'], errors='coerce')
data['ac8_dcg_date'] = pd.to_datetime(data['ac8_dcg_date'], errors='coerce')
data['admission_on_weekend'] = pd.to_datetime(data['ac8_adm_date'], errors='coerce').dt.day_of_week.apply(lambda x: 1 if (x==0 or x==1) else 0)
data['discharge_on_weekend'] = pd.to_datetime(data['ac8_dcg_date'], errors='coerce').dt.day_of_week.apply(lambda x: 1 if (x==0 or x==1) else 0)
data['stay_len'] = (data['ac8_dcg_date']-data['ac8_adm_date']).dt.days
data['stay_len_w'] = (data['ac8_dcg_date']-data['ac8_adm_date']).dt.days//7
data['stay_len_m'] = (data['ac8_dcg_date']-data['ac8_adm_date']).dt.days//28
data['patnt_bdate'] = pd.to_datetime(data['patnt_bdate'], errors='coerce')
data['Age'] = ((data['ac8_adm_date']-data['patnt_bdate']).dt.days/365).astype(int)
data[['paid','charged','eligible']] = data[['paid','charged','eligible']].astype(float).clip(lower=1)
data.loc[pd.isna(data['eligible']), 'eligible'] = data.loc[pd.isna(data['eligible']),'paid']
data['paid_eligable_diff'] = (data['eligible']-data['paid']).clip(lower=1)
data['Infant'] = data['Age'].apply(lambda x: 1 if x<=2 else 0)
data['stay_paid_w'] = data['paid'].astype('float') / (data['stay_len_w'].astype('float')+1)
data['stay_paid_m'] = data['paid'].astype('float') / (data['stay_len_m'].astype('float')+1)
data['clm_proc_loc'] = data['clm_proc_loc'].astype(str)
data['clm_typ'] = data['clm_typ'].apply(lambda x:1 if x=='I' else 0)
data['clm_stat_cd'] = data['clm_stat_cd'].apply(lambda x:1 if x=='A' else 0)
data['admit_cc_mcc'] = data['admit_diag_cd'].apply(lambda x: cc_mcc[x] if x in cc_mcc.keys() else 0)
data['admit_cc_mcc'] = data['admit_cc_mcc'].replace('CC', 1).replace('MCC',2).astype(float)
return data
Editor is loading...
Leave a Comment