Untitled

import pandas as pd
import numpy as np

primary_query = f"SELECT * FROM {schema_ppm}.etl_ppm_replication_master WHERE eprm_catalog='SC' AND eprm_enabled_flg='Y'"
primary_df = pd.read_sql(primary_query, con=connection_ppm)

for index, row in primary_df.iterrows():
    table_name = row['eprm_table_name']

    source_query = f"SELECT * FROM {schema_source}.{table_name}"
    source_df = pd.read_sql(source_query, connection_source)

    target_query = f"SELECT * FROM {schema_ext}.{table_name}"
    target_df = pd.read_sql(target_query, connection_ext)

    # Find rows with differences
    diff_df = source_df.compare(target_df, keep_shape=True)

    if not diff_df.empty:
        # Generate an update query dynamically
        update_query = f"UPDATE {table_name} SET "
        column_updates = []

        for column_name in diff_df.columns:
            source_col = diff_df[column_name]['self']
            target_col = diff_df[column_name]['other']

            # Identify rows where values are different
            diff_rows = np.where(source_col != target_col)[0]

            for row_number in diff_rows:
                source_val = source_col.iloc[row_number]
                target_val = target_col.iloc[row_number]

                # Include the column update in the query
                column_updates.append(f"{column_name} = '{source_val}'")

        update_query += ", ".join(column_updates)
        print(update_query)

        # Execute the update query
        cursor_ext.execute(update_query)
Editor is loading...