Untitled
unknown
plain_text
2 years ago
1.8 kB
5
Indexable
import pandas as pd
import json
import ast
import gc
def validateData(value):
try:
return ast.literal_eval(value)
except Exception as e:
#apilogger.debug(str(e))
return {}
def fetchData(apilogger,connection,sql,configProperties,dataset,entity):
#print(sql)
pdEngagedPartyData = pd.DataFrame()
dictColumns =json.loads( configProperties.get(entity,dataset+'.dictcolumns'))
pdEngagedPartyData = pd.read_sql(sql,connection)
pdEngagedPartyData.fillna('',inplace=True)
apilogger.info('lenth of pdEngagedPartyData - ',len(pdEngagedPartyData))
pdEngagedPartyData['stringifiedAddress'] = ''
if (len(pdEngagedPartyData)>0):
if 'partyId' in list(pdEngagedPartyData.columns):
pdEngagedPartyData['partyId'] = pdEngagedPartyData['partyId'].astype(str)
pdEngagedPartyData['stringifiedAddress'] = pdEngagedPartyData['address1']+',' + \
pdEngagedPartyData['address2']+ ',' + \
pdEngagedPartyData['city'] + ',' + \
pdEngagedPartyData['state'] + ',' + \
pdEngagedPartyData['zipcode'] + ',' + \
pdEngagedPartyData['country']
pdEngagedPartyData['engagedParty'] = pdEngagedPartyData[dictColumns].to_dict(orient = 'records')
pdEngagedPartyData['engagedParty'] = pdEngagedPartyData['engagedParty'].astype(str)
#print(pdEngagedPartyData['engagedParty'].dtype)
pdEngagedPartyData.drop_duplicates(inplace=True)
pdEngagedPartyData['engagedParty'] = pdEngagedPartyData['engagedParty'].apply(validateData)
#pdEngagedPartyData = pdEngagedPartyData.to_frame()
pdEngagedPartyData.to_csv('/app/server/HOBS-DataPipeline/logs/loadCRMSElasticData/pdEngagedPartyData.csv')
#print(pdEngagedPartyData)
gc.collect()
return pdEngagedPartyData
if __name__ == '__main__':
fetchData(connection)
Editor is loading...