Untitled
unknown
plain_text
a year ago
15 kB
3
Indexable
Never
# ================================================================================================================================================================================ # PPM PRODUCT CATALOG (PC) REPLICATION - CE CUSTOM TABLES REPLICATE # DATE AUTHOR VER CHANGE DESCRIPTION # -------- --------- ----- ------------------ # 21.08.23 Veerendra 1.0 The below script replicates ppm table records dynamically from "etl_ppm_replication_master" "PC_EXT" # # python3 ppm_pc_ext_insert.py --releaseId 275.2 --releaseType DEPLOYED --replicationTarget SIT --opId HOB --buId DEFAULT --replicationJobId REP_990_234 # ================================================================================================================================================================================ import pandas as pd import json from base64 import b64decode as base_b64decode import logging from pandas import read_sql as pd_read_sql import sys from sqlalchemy import create_engine import argparse from io import TextIOBase as io_TextIOBase from json import load as json_load import os # define Python user-defined exceptions class Error(Exception): """Base class for other exceptions""" pass # define Python user-defined exceptions class ETL_PPM_REPLICATION_MASTER_ERROR(Error): pass class DB_CONNECTION_ERROR(Error): pass def setDbConnection(logging, json_data, serverInfo, encoding): from sqlalchemy import create_engine as sqlalchemy_create_engine from base64 import b64decode as base_b64decode import mysql.connector try: cnx = cursor = schema = db_type = None encrypt = json_data.get(serverInfo, {}).get('ENCRYPT') host = json_data.get(serverInfo, {}).get('DB_HOST') port = json_data.get(serverInfo, {}).get('DB_PORT') user = json_data.get(serverInfo, {}).get('DB_USER') db_type = json_data.get(serverInfo, {}).get('DB_TYPE') schema = json_data.get(serverInfo, {}).get('DB_SCHEMA') if encrypt == 'Y': password = base_b64decode(json_data.get(serverInfo, {}).get('DB_PASSWORD')).decode('utf-8') else: password = json_data.get(serverInfo, {}).get('DB_PASSWORD') if db_type in ('MYSQL', 'MARIA'): connection_text = ('mysql+mysqlconnector://%s:%s@%s:%s/%s' % (user , password , host , port , schema)) elif db_type == 'ORACLE': connection_text = ('oracle://%s:%s@%s:%s/%s' % (user , password , host , port , json_data.get(serverInfo, {}).get('DB_SID') )) cnx = sqlalchemy_create_engine(connection_text , encoding=encoding) # ,fast_executemany=True #, connect_args={'connect_timeout': 600}) cursor = cnx.connect() logging.info(f"Connected to database server {serverInfo}: {host}:{port}/{schema}") # except mysql.connector.Error as dberr: # logging.error("DATABASE CONNECTION ERROR") # logging.error("Error - {} . Line No - {} ".format(str(dberr), str(sys.exc_info()[-1].tb_lineno))) # cnx = cursor = schema = None except Exception as dbexp: logging.error("DATABASE CONNECTION EXCEPTION") logging.error("Error - {} . Line No - {} ".format(str(dbexp), str(sys.exc_info()[-1].tb_lineno))) cnx = cursor = schema = None return cnx, cursor, schema, db_type def main(args, json_data, log_file, logging, encoding): try: releaseId = args.releaseId opId = args.opId buId = args.buId replicationTarget = args.replicationTarget replicationJobId = args.replicationJobId return_flag = True STATUS = "InProgress" STATUS_MESSAGE = "Insertion of federation tables successful." replicationTarget_EXT = replicationTarget + '_EXT' failed_entities = [] # Connect to PPM_PC database connection_ppm, cursor_ppm, schema_ppm, db_type_ppm = setDbConnection(logging, json_data, 'PPM_PC', encoding) # Connect to source database connection_source, cursor_source, schema_source, db_type_source = setDbConnection(logging, json_data, 'SOURCE', encoding) # Connect to source_ext database #connection_source_ext, cursor_source_ext, schema_source_ext, db_type_source_ext = setDbConnection(logging, #json_data, #'SOURCE_EXT', #encoding) # Connect to target_ext database connection_ext, cursor_ext, schema_ext, db_type_ext = setDbConnection(logging, json_data, 'SIT', encoding) if not (connection_ppm and connection_source and connection_ext): raise DB_CONNECTION_ERROR # Fetch data from the etl_ppm_replication_master table primary_query = f"SELECT * FROM {schema_ppm}.etl_ppm_replication_master WHERE eprm_catalog='SC' AND eprm_enabled_flg='Y'" df = pd_read_sql(primary_query, con=connection_ppm) target_query = f"SELECT * FROM {schema_ext}.etl_ppm_replication_master WHERE eprm_catalog='SC' AND eprm_enabled_flg='Y'" target_df = pd_read_sql(target_query, con=connection_ext) df['eprm_table_col_pk']=df['eprm_table_col_pk'].str.lower() target_df['eprm_table_col_pk']=target_df['eprm_table_col_pk'].str.lower() if 'eprm_table_col_pk' not in df.columns: logging.error("'eprm_table_col_pk' column not found in source_df.") elif 'eprm_table_col_pk' not in target_df.columns: logging.error("'eprm_table_col_pk' column not found in target_df.") elif df['eprm_table_col_pk'].dtype != target_df['eprm_table_col_pk'].dtype: logging.error("'eprm_table_col_pk' column has different data types in source_df and target_df.") else: # Continue with the merging and data comparison steps keycolumn = 'eprm_table_col_pk' merged_df = df.merge(target_df, on=keycolumn, suffixes=('_source', '_target'), how='inner') differences = merged_df[merged_df.filter(like='_source').ne(merged_df.filter(like='_target')).any(axis=1)] differences.to_csv('/app/scripts/PPM_Release_Management/Product_Catalog_ETL/logs/difference.csv', index=False) logging.info(f"-- ++++++++++++++++++++++++++++++++++ BACK UPDATE STATUS FOR UI ++++++++++++++++++ \n") if len(failed_entities) > 0: return_flag = False STATUS = "Error" STATUS_MESSAGE = str(failed_entities).replace("'", '').replace('"', '') logging.info("STATUS: %s" % STATUS) logging.info("STATUS_MESSAGE: %s" % STATUS_MESSAGE) query_update = f"UPDATE {schema_source}.ppm_replication_status SET status='" + STATUS + "'" \ + ", status_message='" + STATUS_MESSAGE + "'" \ + ", error_description=NULL" \ + ", updated_by='" + replicationJobId + "' /**/" \ + " WHERE replication_job_id='" + replicationJobId + "' AND release_id='" + str(releaseId) + "'" query_ppm_update = f"UPDATE {schema_source}.ppm_release_master SET replication_status='" + STATUS + "'" \ + ", updated_by='" + replicationJobId + "' /**/" \ + " WHERE release_id='" + str(releaseId) + "'" if db_type_source == 'ORACLE': query_update = query_update.replace('/**/', ', updated_date=SYSDATE') query_ppm_update = query_ppm_update.replace('/**/', ', updated_on=SYSDATE') elif db_type_source in ('MARIA', 'MYSQL'): query_update = query_update.replace('/**/', ', updated_date=NOW()') query_ppm_update = query_ppm_update.replace('/**/', ', updated_on=NOW()') logging.info("-- + ppm_replication_status - UPDATE \n") logging.info(query_update + ";\n") logging.info("-- + ppm_release_master - UPDATE \n") logging.info(query_ppm_update + ";\n") logging.info(f"-- ++++++++++++++++++++++++++++++++++ FIN ++++++++++++++++++++++++++++++++++++++++ \n") res = cursor_source.execute(query_update) logging.info("query_update: %s" % res) res = cursor_source.execute(query_ppm_update) logging.info("query_ppm_update: %s" % res) except DB_CONNECTION_ERROR: logging.error("EXCEPTION: DB CONNECTION ERROR PC_EXT") return_flag = False except ETL_PPM_REPLICATION_MASTER_ERROR: STATUS_MESSAGE = "NO RECORDS PRESENT IN etl_ppm_replication_master TABLE" logging.error("EXCEPTION:" + STATUS_MESSAGE) return_flag = False except Exception as e: logging.error("Error - {} . Line No - {} ".format(str(e), str(sys.exc_info()[-1].tb_lineno))) return_flag = False return return_flag if __name__ == '__main__': import logging from configparser import ConfigParser as conf_ConfigParser statFile = "" try: parser = argparse.ArgumentParser(description="PPM Product Catalog Replication Script") parser.add_argument('--releaseId', required=True, help="Release ID") parser.add_argument('--releaseType', required=True, help="Release Type") parser.add_argument('--replicationTarget', required=True, help="Replication Target") parser.add_argument('--opId', required=True, help="Operation ID") parser.add_argument('--buId', required=True, help="Business Unit ID") parser.add_argument('--replicationJobId', required=True, help="Replication Job ID") args = parser.parse_args() replicationJobId = args.replicationJobId json_file_path = "/app/scripts/PPM_Release_Management/Product_Catalog_ETL/config/dna.json" conf_file_path = "/app/scripts/PPM_Release_Management/Product_Catalog_ETL/config/ppm_pc_replication.conf" log_file = f'/app/scripts/PPM_Release_Management/Product_Catalog_ETL/logs/{replicationJobId}_ppm_pc_replication_update_dna.log' statFile = f'/app/scripts/PPM_Release_Management/Product_Catalog_ETL/logs/{replicationJobId}_ppm_pc_replication_insert_update_dna.status' args = parser.parse_args() statFile = open(statFile, "w") # Set up logging CONFIG = conf_ConfigParser() CONFIG.read(conf_file_path) logging.basicConfig(filename=log_file , level=CONFIG.get('CONFIG_LOGGING', 'LOG_LEVEL', raw=True) , format=CONFIG.get('CONFIG_LOG_FORMAT', 'LOG_FORMAT_DISP', raw=True) , datefmt=CONFIG.get('CONFIG_LOG_FORMAT', 'LOG_FORMAT_DATE', raw=True) ) logging.info('LOGGER initiated') encoding = CONFIG.get('CONFIG_GENERIC', 'DB_CHARSET', raw=True) # Read JSON data from file if not os.path.exists(json_file_path): logging.error("CREDENTIAL FILE MISSING") logging.error("CREDENTIAL FILE: %s" % json_file_path) raise FileNotFoundError("CREDENTIAL FILE MISSING") with open(json_file_path) as json_file: json_data = json_load(json_file) if main(args, json_data, log_file, logging, encoding): print("Insertion of data successful") statFile.write("SUCCESS") else: statFile.write("FAILED") except FileNotFoundError as ferr: print("Error - {} . Line No - {} ".format(str(ferr), str(sys.exc_info()[-1].tb_lineno))) statFile.write("FAILED") except Exception as err: print("Error - {} . Line No - {} ".format(str(err), str(sys.exc_info()[-1].tb_lineno))) statFile.write("FAILED") if isinstance(statFile, io_TextIOBase): statFile.close() so in the above iam comparing content of two dataframes which has fteched query results and iam comparing based on the primarykey column actually i have below coulumns in my both the databases eprm_seq_nbr_source,eprm_catalog_source,eprm_entity_type_source,eprm_table_name_source,eprm_table_col_pk,eprm_table_col_pk_seq_source,eprm_table_type_source,eprm_table_alias_source,eprm_parent_table_name_source,eprm_join_cols_reim_source,eprm_join_cols_entity_source,eprm_join_with_reim_source,eprm_enabled_flg_source,eprm_reim_entity_id_source,eprm_reim_entity_ref_id_source,eprm_reim_entity_ref_id_1_source,eprm_reim_entity_ref_id_2_source,eprm_reim_entity_desc_source,eprm_reim_version_source,eprm_remarks_source,eprm_created_by_source,eprm_created_on_source,eprm_updated_by_source,eprm_updated_on_source,eprm_seq_nbr_target,eprm_catalog_target,eprm_entity_type_target,eprm_table_name_target,eprm_table_col_pk_seq_target,eprm_table_type_target,eprm_table_alias_target,eprm_parent_table_name_target,eprm_join_cols_reim_target,eprm_join_cols_entity_target,eprm_join_with_reim_target,eprm_enabled_flg_target,eprm_reim_entity_id_target,eprm_reim_entity_ref_id_target,eprm_reim_entity_ref_id_1_target,eprm_reim_entity_ref_id_2_target,eprm_reim_entity_desc_target,eprm_reim_version_target,eprm_remarks_target,eprm_created_by_target,eprm_created_on_target,eprm_updated_by_target,eprm_updated_on_target and now i want to ignore the content comparision only these eprm_created_on,eprm_updated_by and eprm_updated_on and i want to procedd with another