Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
1.8 kB
1
Indexable
Never
import pandas as pd
import json
import ast
import gc

def validateData(value):
        try:
                return ast.literal_eval(value)

        except Exception as e:
                #apilogger.debug(str(e))
                return {} 


def fetchData(apilogger,connection,sql,configProperties,dataset,entity):
	#print(sql)
	pdEngagedPartyData = pd.DataFrame()
	dictColumns =json.loads( configProperties.get(entity,dataset+'.dictcolumns'))
	pdEngagedPartyData = pd.read_sql(sql,connection)
	pdEngagedPartyData.fillna('',inplace=True)
    apilogger.info('lenth of pdEngagedPartyData - ',len(pdEngagedPartyData))
    pdEngagedPartyData['stringifiedAddress'] = ''
	if (len(pdEngagedPartyData)>0):
		if 'partyId' in list(pdEngagedPartyData.columns):
			pdEngagedPartyData['partyId'] = pdEngagedPartyData['partyId'].astype(str)
		pdEngagedPartyData['stringifiedAddress'] = pdEngagedPartyData['address1']+',' + \
								pdEngagedPartyData['address2']+ ',' + \
								pdEngagedPartyData['city'] + ',' + \
								pdEngagedPartyData['state'] + ',' + \
								pdEngagedPartyData['zipcode'] + ',' + \
								pdEngagedPartyData['country']

	pdEngagedPartyData['engagedParty'] = pdEngagedPartyData[dictColumns].to_dict(orient = 'records')
	pdEngagedPartyData['engagedParty'] = pdEngagedPartyData['engagedParty'].astype(str)
	#print(pdEngagedPartyData['engagedParty'].dtype)
	pdEngagedPartyData.drop_duplicates(inplace=True)
	
	pdEngagedPartyData['engagedParty'] = pdEngagedPartyData['engagedParty'].apply(validateData)
	#pdEngagedPartyData = pdEngagedPartyData.to_frame()
	pdEngagedPartyData.to_csv('/app/server/HOBS-DataPipeline/logs/loadCRMSElasticData/pdEngagedPartyData.csv')
	#print(pdEngagedPartyData)
	gc.collect()
	return  pdEngagedPartyData


if __name__ == '__main__':
	fetchData(connection)