Untitled
unknown
plain_text
a year ago
1.7 kB
2
Indexable
Never
import pandas as pd import json import ast import gc def validateData(value): try: return ast.literal_eval(value) except Exception as e: #apilogger.debug(str(e)) return {} def fetchData(apilogger,connection,sql,configProperties,dataset,entity): #print(sql) pdEngagedPartyData = pd.DataFrame() dictColumns =json.loads( configProperties.get(entity,dataset+'.dictcolumns')) pdEngagedPartyData = pd.read_sql(sql,connection) pdEngagedPartyData.fillna('',inplace=True) if (len(pdEngagedPartyData)>0): if 'partyId' in list(pdEngagedPartyData.columns): pdEngagedPartyData['partyId'] = pdEngagedPartyData['partyId'].astype(str) pdEngagedPartyData['stringifiedAddress'] = pdEngagedPartyData['address1']+',' + \ pdEngagedPartyData['address2']+ ',' + \ pdEngagedPartyData['city'] + ',' + \ pdEngagedPartyData['state'] + ',' + \ pdEngagedPartyData['zipcode'] + ',' + \ pdEngagedPartyData['country'] pdEngagedPartyData['engagedParty'] = pdEngagedPartyData[dictColumns].to_dict(orient = 'records') pdEngagedPartyData['engagedParty'] = pdEngagedPartyData['engagedParty'].astype(str) #print(pdEngagedPartyData['engagedParty'].dtype) pdEngagedPartyData.drop_duplicates(inplace=True) pdEngagedPartyData['engagedParty'] = pdEngagedPartyData['engagedParty'].apply(validateData) #pdEngagedPartyData = pdEngagedPartyData.to_frame() pdEngagedPartyData.to_csv('/app/server/HOBS-DataPipeline/logs/loadCRMSElasticData/pdEngagedPartyData.csv') #print(pdEngagedPartyData) gc.collect() return pdEngagedPartyData if __name__ == '__main__': fetchData(connection)