Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
2.2 kB
2
Indexable
Never
import pandas as pd
import json
import ast
import gc


def validateData(value):
    try:
        return ast.literal_eval(value)

    except Exception as e:
        # apilogger.debug(str(e))
        return {}


def fetchData(apilogger, connection, sql, configProperties, dataset, entity):
    # print(sql)
    pdEngagedPartyData = pd.DataFrame()
    dictColumns = json.loads(configProperties.get(entity, dataset + '.dictcolumns'))
    pdEngagedPartyData = pd.read_sql(sql, connection)
    pdEngagedPartyData.fillna('', inplace=True)
    apilogger.info('lenth of pdEngagedPartyData - ', len(pdEngagedPartyData))
    pdEngagedPartyData['stringifiedAddress'] = ''
    if (len(pdEngagedPartyData) > 0):
        if 'partyId' in list(pdEngagedPartyData.columns):
            pdEngagedPartyData['partyId'] = pdEngagedPartyData['partyId'].astype(str)
        pdEngagedPartyData['stringifiedAddress'] = pdEngagedPartyData['address1'] + ',' + \
                                                   pdEngagedPartyData['address2'] + ',' + \
                                                   pdEngagedPartyData['city'] + ',' + \
                                                   pdEngagedPartyData['state'] + ',' + \
                                                   pdEngagedPartyData['zipcode'] + ',' + \
                                                   pdEngagedPartyData['country']

        pdEngagedPartyData['engagedParty'] = pdEngagedPartyData[dictColumns].to_dict(orient='records')
        pdEngagedPartyData['engagedParty'] = pdEngagedPartyData['engagedParty'].astype(str)
        # print(pdEngagedPartyData['engagedParty'].dtype)
        pdEngagedPartyData.drop_duplicates(inplace=True)

        pdEngagedPartyData['engagedParty'] = pdEngagedPartyData['engagedParty'].apply(validateData)
        # pdEngagedPartyData = pdEngagedPartyData.to_frame()
        pdEngagedPartyData.to_csv('/app/server/HOBS-DataPipeline/logs/loadCRMSElasticData/pdEngagedPartyData.csv')
        # print(pdEngagedPartyData)
        gc.collect()
        return pdEngagedPartyData


if __name__ == '__main__':
    fetchData(connection)

iam getting Nonetype has no attribute len()