# ================================================================================================================================================================================
# PPM PRODUCT CATALOG (PC) REPLICATION - CE CUSTOM TABLES REPLICATE
# DATE AUTHOR VER CHANGE DESCRIPTION
# -------- --------- ----- ------------------
# 21.08.23 Veerendra 1.0 The below script replicates ppm table records dynamically from "etl_ppm_replication_master" "PC_EXT"
#
# python3 ppm_pc_ext_insert.py --releaseId 275.2 --releaseType DEPLOYED --replicationTarget SIT --opId HOB --buId DEFAULT --replicationJobId REP_990_234
# ================================================================================================================================================================================
import pandas as pd
import json
from base64 import b64decode as base_b64decode
import logging
from pandas import read_sql as pd_read_sql
import sys
from sqlalchemy import create_engine
import argparse
from io import TextIOBase as io_TextIOBase
from json import load as json_load
import os
# define Python user-defined exceptions
class Error(Exception):
"""Base class for other exceptions"""
pass
# define Python user-defined exceptions
class ETL_PPM_REPLICATION_MASTER_ERROR(Error):
pass
class DB_CONNECTION_ERROR(Error):
pass
def setDbConnection(logging, json_data, serverInfo, encoding):
from sqlalchemy import create_engine as sqlalchemy_create_engine
from base64 import b64decode as base_b64decode
import mysql.connector
try:
cnx = cursor = schema = db_type = None
encrypt = json_data.get(serverInfo, {}).get('ENCRYPT')
host = json_data.get(serverInfo, {}).get('DB_HOST')
port = json_data.get(serverInfo, {}).get('DB_PORT')
user = json_data.get(serverInfo, {}).get('DB_USER')
db_type = json_data.get(serverInfo, {}).get('DB_TYPE')
schema = json_data.get(serverInfo, {}).get('DB_SCHEMA')
if encrypt == 'Y':
password = base_b64decode(json_data.get(serverInfo, {}).get('DB_PASSWORD')).decode('utf-8')
else:
password = json_data.get(serverInfo, {}).get('DB_PASSWORD')
if db_type in ('MYSQL', 'MARIA'):
connection_text = ('mysql+mysqlconnector://%s:%s@%s:%s/%s' % (user
, password
, host
, port
, schema))
elif db_type == 'ORACLE':
connection_text = ('oracle://%s:%s@%s:%s/%s' % (user
, password
, host
, port
, json_data.get(serverInfo, {}).get('DB_SID')
))
cnx = sqlalchemy_create_engine(connection_text
, encoding=encoding)
# ,fast_executemany=True
#, connect_args={'connect_timeout': 600})
cursor = cnx.connect()
logging.info(f"Connected to database server {serverInfo}: {host}:{port}/{schema}")
# except mysql.connector.Error as dberr:
# logging.error("DATABASE CONNECTION ERROR")
# logging.error("Error - {} . Line No - {} ".format(str(dberr), str(sys.exc_info()[-1].tb_lineno)))
# cnx = cursor = schema = None
except Exception as dbexp:
logging.error("DATABASE CONNECTION EXCEPTION")
logging.error("Error - {} . Line No - {} ".format(str(dbexp), str(sys.exc_info()[-1].tb_lineno)))
cnx = cursor = schema = None
return cnx, cursor, schema, db_type
def main(args, json_data, log_file, logging, encoding):
try:
releaseId = args.releaseId
opId = args.opId
buId = args.buId
replicationTarget = args.replicationTarget
replicationJobId = args.replicationJobId
return_flag = True
STATUS = "InProgress"
STATUS_MESSAGE = "Insertion of federation tables successful."
replicationTarget_EXT = replicationTarget + '_EXT'
failed_entities = []
# Connect to PPM_PC database
connection_ppm, cursor_ppm, schema_ppm, db_type_ppm = setDbConnection(logging, json_data, 'PPM_PC', encoding)
# Connect to source database
connection_source, cursor_source, schema_source, db_type_source = setDbConnection(logging, json_data, 'SOURCE',
encoding)
# Connect to source_ext database
#connection_source_ext, cursor_source_ext, schema_source_ext, db_type_source_ext = setDbConnection(logging,
#json_data,
#'SOURCE_EXT',
#encoding)
# Connect to target_ext database
connection_ext, cursor_ext, schema_ext, db_type_ext = setDbConnection(logging, json_data, 'SIT',
encoding)
if not (connection_ppm and connection_source and connection_ext):
raise DB_CONNECTION_ERROR
# Fetch data from the etl_ppm_replication_master table
primary_query = f"SELECT * FROM {schema_ppm}.etl_ppm_replication_master WHERE eprm_catalog='SC' AND eprm_enabled_flg='Y'"
df = pd_read_sql(primary_query, con=connection_ppm)
target_query = f"SELECT * FROM {schema_ext}.etl_ppm_replication_master WHERE eprm_catalog='SC' AND eprm_enabled_flg='Y'"
target_df = pd_read_sql(target_query, con=connection_ext)
df['eprm_table_col_pk']=df['eprm_table_col_pk'].str.lower()
target_df['eprm_table_col_pk']=target_df['eprm_table_col_pk'].str.lower()
if 'eprm_table_col_pk' not in df.columns:
logging.error("'eprm_table_col_pk' column not found in source_df.")
elif 'eprm_table_col_pk' not in target_df.columns:
logging.error("'eprm_table_col_pk' column not found in target_df.")
elif df['eprm_table_col_pk'].dtype != target_df['eprm_table_col_pk'].dtype:
logging.error("'eprm_table_col_pk' column has different data types in source_df and target_df.")
else:
# Continue with the merging and data comparison steps
keycolumn = 'eprm_table_col_pk'
merged_df = df.merge(target_df, on=keycolumn, suffixes=('_source', '_target'), how='inner')
differences = merged_df[merged_df.filter(like='_source').ne(merged_df.filter(like='_target')).any(axis=1)]
differences.to_csv('/app/scripts/PPM_Release_Management/Product_Catalog_ETL/logs/difference.csv',
index=False)
logging.info(f"-- ++++++++++++++++++++++++++++++++++ BACK UPDATE STATUS FOR UI ++++++++++++++++++ \n")
if len(failed_entities) > 0:
return_flag = False
STATUS = "Error"
STATUS_MESSAGE = str(failed_entities).replace("'", '').replace('"', '')
logging.info("STATUS: %s" % STATUS)
logging.info("STATUS_MESSAGE: %s" % STATUS_MESSAGE)
query_update = f"UPDATE {schema_source}.ppm_replication_status SET status='" + STATUS + "'" \
+ ", status_message='" + STATUS_MESSAGE + "'" \
+ ", error_description=NULL" \
+ ", updated_by='" + replicationJobId + "' /**/" \
+ " WHERE replication_job_id='" + replicationJobId + "' AND release_id='" + str(releaseId) + "'"
query_ppm_update = f"UPDATE {schema_source}.ppm_release_master SET replication_status='" + STATUS + "'" \
+ ", updated_by='" + replicationJobId + "' /**/" \
+ " WHERE release_id='" + str(releaseId) + "'"
if db_type_source == 'ORACLE':
query_update = query_update.replace('/**/', ', updated_date=SYSDATE')
query_ppm_update = query_ppm_update.replace('/**/', ', updated_on=SYSDATE')
elif db_type_source in ('MARIA', 'MYSQL'):
query_update = query_update.replace('/**/', ', updated_date=NOW()')
query_ppm_update = query_ppm_update.replace('/**/', ', updated_on=NOW()')
logging.info("-- + ppm_replication_status - UPDATE \n")
logging.info(query_update + ";\n")
logging.info("-- + ppm_release_master - UPDATE \n")
logging.info(query_ppm_update + ";\n")
logging.info(f"-- ++++++++++++++++++++++++++++++++++ FIN ++++++++++++++++++++++++++++++++++++++++ \n")
res = cursor_source.execute(query_update)
logging.info("query_update: %s" % res)
res = cursor_source.execute(query_ppm_update)
logging.info("query_ppm_update: %s" % res)
except DB_CONNECTION_ERROR:
logging.error("EXCEPTION: DB CONNECTION ERROR PC_EXT")
return_flag = False
except ETL_PPM_REPLICATION_MASTER_ERROR:
STATUS_MESSAGE = "NO RECORDS PRESENT IN etl_ppm_replication_master TABLE"
logging.error("EXCEPTION:" + STATUS_MESSAGE)
return_flag = False
except Exception as e:
logging.error("Error - {} . Line No - {} ".format(str(e), str(sys.exc_info()[-1].tb_lineno)))
return_flag = False
return return_flag
if __name__ == '__main__':
import logging
from configparser import ConfigParser as conf_ConfigParser
statFile = ""
try:
parser = argparse.ArgumentParser(description="PPM Product Catalog Replication Script")
parser.add_argument('--releaseId', required=True, help="Release ID")
parser.add_argument('--releaseType', required=True, help="Release Type")
parser.add_argument('--replicationTarget', required=True, help="Replication Target")
parser.add_argument('--opId', required=True, help="Operation ID")
parser.add_argument('--buId', required=True, help="Business Unit ID")
parser.add_argument('--replicationJobId', required=True, help="Replication Job ID")
args = parser.parse_args()
replicationJobId = args.replicationJobId
json_file_path = "/app/scripts/PPM_Release_Management/Product_Catalog_ETL/config/dna.json"
conf_file_path = "/app/scripts/PPM_Release_Management/Product_Catalog_ETL/config/ppm_pc_replication.conf"
log_file = f'/app/scripts/PPM_Release_Management/Product_Catalog_ETL/logs/{replicationJobId}_ppm_pc_replication_update_dna.log'
statFile = f'/app/scripts/PPM_Release_Management/Product_Catalog_ETL/logs/{replicationJobId}_ppm_pc_replication_insert_update_dna.status'
args = parser.parse_args()
statFile = open(statFile, "w")
# Set up logging
CONFIG = conf_ConfigParser()
CONFIG.read(conf_file_path)
logging.basicConfig(filename=log_file
, level=CONFIG.get('CONFIG_LOGGING', 'LOG_LEVEL', raw=True)
, format=CONFIG.get('CONFIG_LOG_FORMAT', 'LOG_FORMAT_DISP', raw=True)
, datefmt=CONFIG.get('CONFIG_LOG_FORMAT', 'LOG_FORMAT_DATE', raw=True)
)
logging.info('LOGGER initiated')
encoding = CONFIG.get('CONFIG_GENERIC', 'DB_CHARSET', raw=True)
# Read JSON data from file
if not os.path.exists(json_file_path):
logging.error("CREDENTIAL FILE MISSING")
logging.error("CREDENTIAL FILE: %s" % json_file_path)
raise FileNotFoundError("CREDENTIAL FILE MISSING")
with open(json_file_path) as json_file:
json_data = json_load(json_file)
if main(args, json_data, log_file, logging, encoding):
print("Insertion of data successful")
statFile.write("SUCCESS")
else:
statFile.write("FAILED")
except FileNotFoundError as ferr:
print("Error - {} . Line No - {} ".format(str(ferr), str(sys.exc_info()[-1].tb_lineno)))
statFile.write("FAILED")
except Exception as err:
print("Error - {} . Line No - {} ".format(str(err), str(sys.exc_info()[-1].tb_lineno)))
statFile.write("FAILED")
if isinstance(statFile, io_TextIOBase):
statFile.close()
so in the above iam comparing content of two dataframes which has fteched query results and iam comparing based on the primarykey column actually i have below coulumns in my both the databases
eprm_seq_nbr_source,eprm_catalog_source,eprm_entity_type_source,eprm_table_name_source,eprm_table_col_pk,eprm_table_col_pk_seq_source,eprm_table_type_source,eprm_table_alias_source,eprm_parent_table_name_source,eprm_join_cols_reim_source,eprm_join_cols_entity_source,eprm_join_with_reim_source,eprm_enabled_flg_source,eprm_reim_entity_id_source,eprm_reim_entity_ref_id_source,eprm_reim_entity_ref_id_1_source,eprm_reim_entity_ref_id_2_source,eprm_reim_entity_desc_source,eprm_reim_version_source,eprm_remarks_source,eprm_created_by_source,eprm_created_on_source,eprm_updated_by_source,eprm_updated_on_source,eprm_seq_nbr_target,eprm_catalog_target,eprm_entity_type_target,eprm_table_name_target,eprm_table_col_pk_seq_target,eprm_table_type_target,eprm_table_alias_target,eprm_parent_table_name_target,eprm_join_cols_reim_target,eprm_join_cols_entity_target,eprm_join_with_reim_target,eprm_enabled_flg_target,eprm_reim_entity_id_target,eprm_reim_entity_ref_id_target,eprm_reim_entity_ref_id_1_target,eprm_reim_entity_ref_id_2_target,eprm_reim_entity_desc_target,eprm_reim_version_target,eprm_remarks_target,eprm_created_by_target,eprm_created_on_target,eprm_updated_by_target,eprm_updated_on_target
and now i want to ignore the content comparision only these eprm_created_on,eprm_updated_by and eprm_updated_on and i want to procedd with another