Untitled
import pandas as pd import pandas_gbq def do_load(df): project_id = 'bmas-eu-ng-rva-uat-data' table_id = 'bmas-eu-ng-rva-data-dev.WEATHER_TEST.Program_Mapping' pandas_gbq.to_gbq(df, table_id, project_id) return df def preprocess(SQL_Query, project_id): test = pd.read_gbq(SQL_Query, project_id=project_id) if 'Summary' in test.columns: test['Summary'] = test['Summary'].fillna('') print('Preprocess completed') return test def do_import(processed_df): df = pd.read_gbq() return df def do_process(finaldf, project_mappingdf): print(finaldf.head()) # Ensure required columns exist required_columns = ['Label', 'Project name', 'Service description', 'SKU description', 'Credit type', 'Program', 'Region', 'Environment'] for col in required_columns: if col not in finaldf.columns: finaldf[col] = '' # Fill NaN labels with the Project name finaldf.loc[finaldf['Label'].isna(), 'Label'] = finaldf['Project name'] finaldf['Label'] = finaldf['Label'].str.replace('project:', '') # Initial conditions finaldf.loc[ (finaldf['Project name'] == '[Charges not specific to a project]') & (finaldf['Service description'] == 'Support'), ['Program', 'Region', 'Environment'] ] = ['All', '', ''] finaldf.loc[ (finaldf['Project name'] == '[Charges not specific to a project]') & (finaldf['Service description'] != 'Support'), 'Program' ] = 'All' finaldf.loc[ (finaldf['Project name'] == '[Charges not specific to a project]') & (finaldf['Service description'] != 'Support'), 'Environment' ] = '' finaldf.loc[ (finaldf['Project name'] == '[Charges not specific to a project]') & (finaldf['Service description'] != 'Support'), 'Region' ] = finaldf['SKU description'].apply(lambda x: 'EU' if 'EU' in str(x) else ('IND' if 'IND' in str(x) else ('AUS' if 'AUS' in str(x) else ('US' if 'US' in str(x) else '')))) def map_region(description): if 'EU' in description: return 'EU' elif 'IND' in description: return 'IND' elif 'AUS' in description: return 'AUS' elif 'US' in description: return 'US' else: return '' finaldf.loc[finaldf['Project name'] == 'bq-admin-ericsson', 'Program'] = 'All' finaldf.loc[finaldf['Project name'] == 'bq-admin-ericsson', 'Environment'] = '' finaldf.loc[finaldf['Project name'] == 'bq-admin-ericsson', 'Region'] = finaldf.loc[finaldf['Label'] == 'bq-admin-ericsson', 'SKU description'].apply(map_region) finaldf.loc[finaldf['Credit type'] == 'PROMOTION', 'Program'] = 'PROMOTION' # Ensure there are no missing Program, Region, Environment before mapping finaldf['Program'] = finaldf['Program'].astype(str) finaldf['Region'] = finaldf['Region'].astype(str) finaldf['Environment'] = finaldf['Environment'].astype(str) # Merge to fill in missing Program, Region, Environment merge_cols = ['Labels', 'Program', 'Region', 'Environment'] merged_df = finaldf.merge(project_mappingdf[merge_cols], on='Label', suffixes=('', '_mapped'), how='left') # Update missing values with mapped values finaldf['Program'] = finaldf['Program'].mask(finaldf['Program'] == '', merged_df['Program_mapped']) finaldf['Region'] = finaldf['Region'].mask(finaldf['Region'] == '', merged_df['Region_mapped']) finaldf['Environment'] = finaldf['Environment'].mask(finaldf['Environment'] == '', merged_df['Environment_mapped']) # Saving output finaldf.to_csv("project_mapping.csv", index=False) print(finaldf.head()) return finaldf if __name__ == '__main__': finaldf = pd.read_csv(r"C:\Users\ensaani\OneDrive - Ericsson\Desktop\NGRVA Projects\GCP Billing\June data\BCSS_Cost table, 2024-06-01 — 2024-06-30.csv") project_mappingdf = pd.read_excel(r"C:\Users\ensaani\Downloads\BCSS, 2024-06-01 — 2024-06-30_P 2.xlsx", sheet_name='Project-Program-Mapping') SQL_Query = 'SELECT * FROM `bmas-eu-ng-rva-data-dev.WEATHER_TEST.Project_Program_Mapping` LIMIT 1000' processed_df = preprocess(SQL_Query, project_id='bmas-eu-ng-rva-uat-data') finaldf = do_process(finaldf, project_mappingdf) print("Done")
Leave a Comment