Untitled
unknown
plain_text
2 years ago
2.0 kB
13
Indexable
import pandas as pd
import json
import ast
import gc
def validateData(value):
try:
return ast.literal_eval(value)
except Exception as e:
# apilogger.debug(str(e))
return {}
def fetchData(apilogger, connection, sql, configProperties, dataset, entity):
# print(sql)
pdEngagedPartyData = pd.DataFrame()
dictColumns = json.loads(configProperties.get(entity, dataset + '.dictcolumns'))
pdEngagedPartyData = pd.read_sql(sql, connection)
pdEngagedPartyData.fillna('', inplace=True)
if (len(pdEngagedPartyData) > 0):
if 'partyId' in list(pdEngagedPartyData.columns):
pdEngagedPartyData['partyId'] = pdEngagedPartyData['partyId'].astype(str)
pdEngagedPartyData['stringifiedAddress'] = pdEngagedPartyData['address1'] + ',' + \
pdEngagedPartyData['address2'] + ',' + \
pdEngagedPartyData['city'] + ',' + \
pdEngagedPartyData['state'] + ',' + \
pdEngagedPartyData['zipcode'] + ',' + \
pdEngagedPartyData['country']
pdEngagedPartyData['engagedParty'] = pdEngagedPartyData[dictColumns].to_dict(orient='records')
pdEngagedPartyData['engagedParty'] = pdEngagedPartyData['engagedParty'].astype(str)
# print(pdEngagedPartyData['engagedParty'].dtype)
pdEngagedPartyData.drop_duplicates(inplace=True)
pdEngagedPartyData['engagedParty'] = pdEngagedPartyData['engagedParty'].apply(validateData)
# pdEngagedPartyData = pdEngagedPartyData.to_frame()
pdEngagedPartyData.to_csv('/app/server/HOBS-DataPipeline/logs/loadCRMSElasticData/pdEngagedPartyData.csv')
# print(pdEngagedPartyData)
gc.collect()
return pdEngagedPartyData
if __name__ == '__main__':
fetchData(connection)
Editor is loading...