Untitled
unknown
plain_text
a year ago
4.1 kB
9
Indexable
import pandas as pd
import pandas_gbq
def do_load(df):
project_id = 'bmas-eu-ng-rva-uat-data'
table_id = 'bmas-eu-ng-rva-data-dev.WEATHER_TEST.Project_Program_Mapping'
pandas_gbq.to_gbq(df, table_id, project_id)
return df
# def preprocess(SQL_Query, project_id):
# test = pd.read_gbq(SQL_Query, project_id=project_id)
# test['Summary'] = test['Summary'].fillna('')
# print('Preprocess completed')
# return test
def do_import(processed_df):
df = pd.read_gbq()
return df
def do_process(finaldf, project_mappingdf):
print(finaldf.head())
# Fill NaN labels with the Project name
finaldf.loc[finaldf['Label'].isna(), 'Label'] = finaldf['Project name']
finaldf['Label'] = finaldf['Label'].str.replace('project:', '')
# # Ensure data types are correct
# finaldf['Program'] = finaldf['Program'].astype(str)
# finaldf['Region'] = finaldf['Region'].astype(str)
# finaldf['Environment'] = finaldf['Environment'].astype(str)
# Initial conditions
finaldf.loc[
(finaldf['Project name'] == '[Charges not specific to a project]') & (finaldf['Service description'] == 'Support'),
['Program', 'Region', 'Environment']
] = ['All', '', '']
finaldf.loc[
(finaldf['Project name'] == '[Charges not specific to a project]') &
(finaldf['Service description'] != 'Support'),
'Program'
] = 'All'
finaldf.loc[
(finaldf['Project name'] == '[Charges not specific to a project]') &
(finaldf['Service description'] != 'Support'),
'Environment'
] = ''
finaldf.loc[
(finaldf['Project name'] == '[Charges not specific to a project]') &
(finaldf['Service description'] != 'Support'),
'Region'
] = finaldf['SKU description'].apply(lambda x: 'EU' if 'EU' in str(x) else ('IND' if 'IND' in str(x) else ('AUS' if 'AUS' in str(x) else ('US' if 'US' in str(x) else ''))))
def map_region(description):
if 'EU' in description:
return 'EU'
elif 'IND' in description:
return 'IND'
elif 'AUS' in description:
return 'AUS'
elif 'US' in description:
return 'US'
else:
return ''
finaldf.loc[finaldf['Project name'] == 'bq-admin-ericsson', 'Program'] = 'All'
finaldf.loc[finaldf['Project name'] == 'bq-admin-ericsson', 'Environment'] = ''
finaldf.loc[finaldf['Project name'] == 'bq-admin-ericsson', 'Region'] = finaldf.loc[finaldf['Label'] == 'bq-admin-ericsson', 'SKU description'].apply(map_region)
finaldf.loc[finaldf['Credit type'] == 'PROMOTION', 'Program'] = 'PROMOTION'
# Mapping Project Mapping Sheet to Program Mapping Sheet
project_mappingdf.set_index('Label', inplace=True)
finaldf.set_index('Label', inplace=True)
# Use `update` method to match and update
finaldf.update(project_mappingdf[['Program', 'Region', 'Environment']])
finaldf.reset_index(inplace=True)
# Ensure data types are correct after update
finaldf['Program'] = finaldf['Program'].astype(str)
finaldf['Region'] = finaldf['Region'].astype(str)
finaldf['Environment'] = finaldf['Environment'].astype(str)
# Saving output
finaldf.to_csv("project_mapping2.csv", index=False)
print(finaldf.head())
return finaldf
if __name__ == '__main__':
finaldf = pd.read_csv(r"C:\Users\ensaani\OneDrive - Ericsson\Desktop\NGRVA Projects\GCP Billing\June data\BCSS_Cost table, 2024-06-01 — 2024-06-30.csv")
project_mappingdf = pd.read_excel(r"C:\Users\ensaani\Downloads\BCSS, 2024-06-01 — 2024-06-30_P 2.xlsx", sheet_name='Project-Program-Mapping')
SQL_Query = 'SELECT * FROM `bmas-eu-ng-rva-data-dev.WEATHER_TEST.Project_Program_Mapping` LIMIT 1000'
# processed_df = preprocess(SQL_Query, project_id='bmas-eu-ng-rva-uat-data')
finaldf = do_process(finaldf, project_mappingdf)
print("Done")
Editor is loading...
Leave a Comment