Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
15 kB
4
Indexable
# ================================================================================================================================================================================
# PPM PRODUCT CATALOG (PC) REPLICATION - CE CUSTOM TABLES REPLICATE
#   DATE     AUTHOR     VER   CHANGE DESCRIPTION
# --------  ---------  -----  ------------------
# 21.08.23  Veerendra    1.0   The below script replicates ppm table records dynamically from "etl_ppm_replication_master" "PC_EXT"
#
# python3 ppm_pc_ext_insert.py --releaseId 275.2 --releaseType DEPLOYED --replicationTarget SIT --opId HOB --buId DEFAULT --replicationJobId REP_990_234
# ================================================================================================================================================================================

import pandas as pd
import json
from base64 import b64decode as base_b64decode
import logging
from pandas import read_sql as pd_read_sql
import sys
from sqlalchemy import create_engine
import argparse
from io import TextIOBase as io_TextIOBase
from json import load as json_load
import os


# define Python user-defined exceptions
class Error(Exception):
    """Base class for other exceptions"""
    pass


# define Python user-defined exceptions
class ETL_PPM_REPLICATION_MASTER_ERROR(Error):
    pass


class DB_CONNECTION_ERROR(Error):
    pass


def setDbConnection(logging, json_data, serverInfo, encoding):
    from sqlalchemy import create_engine as sqlalchemy_create_engine
    from base64 import b64decode as base_b64decode
    import mysql.connector
    try:
        cnx = cursor = schema = db_type = None
        encrypt = json_data.get(serverInfo, {}).get('ENCRYPT')
        host = json_data.get(serverInfo, {}).get('DB_HOST')
        port = json_data.get(serverInfo, {}).get('DB_PORT')
        user = json_data.get(serverInfo, {}).get('DB_USER')
        db_type = json_data.get(serverInfo, {}).get('DB_TYPE')
        schema = json_data.get(serverInfo, {}).get('DB_SCHEMA')
        if encrypt == 'Y':
            password = base_b64decode(json_data.get(serverInfo, {}).get('DB_PASSWORD')).decode('utf-8')
        else:
            password = json_data.get(serverInfo, {}).get('DB_PASSWORD')

        if db_type in ('MYSQL', 'MARIA'):
            connection_text = ('mysql+mysqlconnector://%s:%s@%s:%s/%s' % (user
                                                                          , password
                                                                          , host
                                                                          , port
                                                                          , schema))
        elif db_type == 'ORACLE':
            connection_text = ('oracle://%s:%s@%s:%s/%s' % (user
                                                            , password
                                                            , host
                                                            , port
                                                            , json_data.get(serverInfo, {}).get('DB_SID')
                                                            ))
        cnx = sqlalchemy_create_engine(connection_text
                                       , encoding=encoding)
                                       # ,fast_executemany=True
                                       #, connect_args={'connect_timeout': 600})
        cursor = cnx.connect()
        logging.info(f"Connected to database server {serverInfo}: {host}:{port}/{schema}")
    # except mysql.connector.Error as dberr:
    #    logging.error("DATABASE CONNECTION ERROR")
    #    logging.error("Error - {} . Line No - {} ".format(str(dberr), str(sys.exc_info()[-1].tb_lineno)))
    #    cnx = cursor = schema = None
    except Exception as dbexp:
        logging.error("DATABASE CONNECTION EXCEPTION")
        logging.error("Error - {} . Line No - {} ".format(str(dbexp), str(sys.exc_info()[-1].tb_lineno)))
        cnx = cursor = schema = None

    return cnx, cursor, schema, db_type


def main(args, json_data, log_file, logging, encoding):
    try:
        releaseId = args.releaseId
        opId = args.opId
        buId = args.buId
        replicationTarget = args.replicationTarget
        replicationJobId = args.replicationJobId
        return_flag = True
        STATUS = "InProgress"
        STATUS_MESSAGE = "Insertion of federation tables successful."
        replicationTarget_EXT = replicationTarget + '_EXT'
        failed_entities = []

        # Connect to PPM_PC database
        connection_ppm, cursor_ppm, schema_ppm, db_type_ppm = setDbConnection(logging, json_data, 'PPM_PC', encoding)

        # Connect to source database
        connection_source, cursor_source, schema_source, db_type_source = setDbConnection(logging, json_data, 'SOURCE',
                                                                                          encoding)

        # Connect to source_ext database
        #connection_source_ext, cursor_source_ext, schema_source_ext, db_type_source_ext = setDbConnection(logging,
                                                                                                          #json_data,
                                                                                                          #'SOURCE_EXT',
                                                                                                          #encoding)

        # Connect to target_ext database
        connection_ext, cursor_ext, schema_ext, db_type_ext = setDbConnection(logging, json_data, 'SIT',
                                                                              encoding)

        if not (connection_ppm and connection_source  and connection_ext):
            raise DB_CONNECTION_ERROR

        # Fetch data from the etl_ppm_replication_master table
        primary_query = f"SELECT * FROM {schema_ppm}.etl_ppm_replication_master WHERE eprm_catalog='SC' AND eprm_enabled_flg='Y'"
        df = pd_read_sql(primary_query, con=connection_ppm)
        target_query = f"SELECT * FROM {schema_ext}.etl_ppm_replication_master WHERE eprm_catalog='SC' AND eprm_enabled_flg='Y'"
        target_df = pd_read_sql(target_query, con=connection_ext)
        df['eprm_table_col_pk']=df['eprm_table_col_pk'].str.lower()
        target_df['eprm_table_col_pk']=target_df['eprm_table_col_pk'].str.lower()
        if 'eprm_table_col_pk' not in df.columns:

            logging.error("'eprm_table_col_pk' column not found in source_df.")

        elif 'eprm_table_col_pk' not in target_df.columns:

            logging.error("'eprm_table_col_pk' column not found in target_df.")

        elif df['eprm_table_col_pk'].dtype != target_df['eprm_table_col_pk'].dtype:

            logging.error("'eprm_table_col_pk' column has different data types in source_df and target_df.")

        else:

            # Continue with the merging and data comparison steps

            keycolumn = 'eprm_table_col_pk'

            merged_df = df.merge(target_df, on=keycolumn, suffixes=('_source', '_target'), how='inner')

            differences = merged_df[merged_df.filter(like='_source').ne(merged_df.filter(like='_target')).any(axis=1)]

            differences.to_csv('/app/scripts/PPM_Release_Management/Product_Catalog_ETL/logs/difference.csv',
                               index=False)
        logging.info(f"-- ++++++++++++++++++++++++++++++++++ BACK UPDATE STATUS FOR UI ++++++++++++++++++ \n")
        if len(failed_entities) > 0:
            return_flag = False
            STATUS = "Error"
            STATUS_MESSAGE = str(failed_entities).replace("'", '').replace('"', '')

        logging.info("STATUS: %s" % STATUS)
        logging.info("STATUS_MESSAGE: %s" % STATUS_MESSAGE)

        query_update = f"UPDATE {schema_source}.ppm_replication_status SET status='" + STATUS + "'" \
                       + ", status_message='" + STATUS_MESSAGE + "'" \
                       + ", error_description=NULL" \
                       + ", updated_by='" + replicationJobId + "' /**/" \
                       + " WHERE replication_job_id='" + replicationJobId + "' AND release_id='" + str(releaseId) + "'"
        query_ppm_update = f"UPDATE {schema_source}.ppm_release_master SET replication_status='" + STATUS + "'" \
                           + ", updated_by='" + replicationJobId + "' /**/" \
                           + " WHERE release_id='" + str(releaseId) + "'"

        if db_type_source == 'ORACLE':
            query_update = query_update.replace('/**/', ', updated_date=SYSDATE')
            query_ppm_update = query_ppm_update.replace('/**/', ', updated_on=SYSDATE')
        elif db_type_source in ('MARIA', 'MYSQL'):
            query_update = query_update.replace('/**/', ', updated_date=NOW()')
            query_ppm_update = query_ppm_update.replace('/**/', ', updated_on=NOW()')
        logging.info("-- + ppm_replication_status - UPDATE \n")
        logging.info(query_update + ";\n")
        logging.info("-- + ppm_release_master - UPDATE \n")
        logging.info(query_ppm_update + ";\n")
        logging.info(f"-- ++++++++++++++++++++++++++++++++++ FIN ++++++++++++++++++++++++++++++++++++++++ \n")
        res = cursor_source.execute(query_update)
        logging.info("query_update: %s" % res)
        res = cursor_source.execute(query_ppm_update)
        logging.info("query_ppm_update: %s" % res)
    except DB_CONNECTION_ERROR:
        logging.error("EXCEPTION: DB CONNECTION ERROR PC_EXT")
        return_flag = False
    except ETL_PPM_REPLICATION_MASTER_ERROR:
        STATUS_MESSAGE = "NO RECORDS PRESENT IN etl_ppm_replication_master TABLE"
        logging.error("EXCEPTION:" + STATUS_MESSAGE)
        return_flag = False
    except Exception as e:
        logging.error("Error - {} . Line No - {} ".format(str(e), str(sys.exc_info()[-1].tb_lineno)))
        return_flag = False

    return return_flag


if __name__ == '__main__':
    import logging
    from configparser import ConfigParser as conf_ConfigParser

    statFile = ""
    try:
        parser = argparse.ArgumentParser(description="PPM Product Catalog Replication Script")
        parser.add_argument('--releaseId', required=True, help="Release ID")
        parser.add_argument('--releaseType', required=True, help="Release Type")
        parser.add_argument('--replicationTarget', required=True, help="Replication Target")
        parser.add_argument('--opId', required=True, help="Operation ID")
        parser.add_argument('--buId', required=True, help="Business Unit ID")
        parser.add_argument('--replicationJobId', required=True, help="Replication Job ID")
        args = parser.parse_args()
        replicationJobId = args.replicationJobId
        json_file_path = "/app/scripts/PPM_Release_Management/Product_Catalog_ETL/config/dna.json"
        conf_file_path = "/app/scripts/PPM_Release_Management/Product_Catalog_ETL/config/ppm_pc_replication.conf"
        log_file = f'/app/scripts/PPM_Release_Management/Product_Catalog_ETL/logs/{replicationJobId}_ppm_pc_replication_update_dna.log'
        statFile = f'/app/scripts/PPM_Release_Management/Product_Catalog_ETL/logs/{replicationJobId}_ppm_pc_replication_insert_update_dna.status'
        args = parser.parse_args()
        statFile = open(statFile, "w")
        # Set up logging
        CONFIG = conf_ConfigParser()
        CONFIG.read(conf_file_path)

        logging.basicConfig(filename=log_file
                            , level=CONFIG.get('CONFIG_LOGGING', 'LOG_LEVEL', raw=True)
                            , format=CONFIG.get('CONFIG_LOG_FORMAT', 'LOG_FORMAT_DISP', raw=True)
                            , datefmt=CONFIG.get('CONFIG_LOG_FORMAT', 'LOG_FORMAT_DATE', raw=True)
                            )
        logging.info('LOGGER initiated')

        encoding = CONFIG.get('CONFIG_GENERIC', 'DB_CHARSET', raw=True)

        # Read JSON data from file
        if not os.path.exists(json_file_path):
            logging.error("CREDENTIAL FILE MISSING")
            logging.error("CREDENTIAL FILE: %s" % json_file_path)
            raise FileNotFoundError("CREDENTIAL FILE MISSING")
        with open(json_file_path) as json_file:
            json_data = json_load(json_file)

        if main(args, json_data, log_file, logging, encoding):
            print("Insertion of data successful")
            statFile.write("SUCCESS")
        else:
            statFile.write("FAILED")

    except FileNotFoundError as ferr:
        print("Error - {} . Line No - {} ".format(str(ferr), str(sys.exc_info()[-1].tb_lineno)))
        statFile.write("FAILED")

    except Exception as err:
        print("Error - {} . Line No - {} ".format(str(err), str(sys.exc_info()[-1].tb_lineno)))
        statFile.write("FAILED")

    if isinstance(statFile, io_TextIOBase):
        statFile.close()


so in the above iam comparing content of two dataframes which has fteched query results and iam comparing based on the primarykey column actually i have below coulumns in my both the databases 
eprm_seq_nbr_source,eprm_catalog_source,eprm_entity_type_source,eprm_table_name_source,eprm_table_col_pk,eprm_table_col_pk_seq_source,eprm_table_type_source,eprm_table_alias_source,eprm_parent_table_name_source,eprm_join_cols_reim_source,eprm_join_cols_entity_source,eprm_join_with_reim_source,eprm_enabled_flg_source,eprm_reim_entity_id_source,eprm_reim_entity_ref_id_source,eprm_reim_entity_ref_id_1_source,eprm_reim_entity_ref_id_2_source,eprm_reim_entity_desc_source,eprm_reim_version_source,eprm_remarks_source,eprm_created_by_source,eprm_created_on_source,eprm_updated_by_source,eprm_updated_on_source,eprm_seq_nbr_target,eprm_catalog_target,eprm_entity_type_target,eprm_table_name_target,eprm_table_col_pk_seq_target,eprm_table_type_target,eprm_table_alias_target,eprm_parent_table_name_target,eprm_join_cols_reim_target,eprm_join_cols_entity_target,eprm_join_with_reim_target,eprm_enabled_flg_target,eprm_reim_entity_id_target,eprm_reim_entity_ref_id_target,eprm_reim_entity_ref_id_1_target,eprm_reim_entity_ref_id_2_target,eprm_reim_entity_desc_target,eprm_reim_version_target,eprm_remarks_target,eprm_created_by_target,eprm_created_on_target,eprm_updated_by_target,eprm_updated_on_target

and now i want to ignore the content comparision only these eprm_created_on,eprm_updated_by and  eprm_updated_on and i want to procedd with another