Untitled

import pandas as pd

# Define your primary query
primary_query = f"SELECT * FROM {schema_ppm}.etl_ppm_replication_master WHERE eprm_catalog='SC' AND eprm_enabled_flg='Y'"

# Execute the primary query and get the primary dataframe
primary_df = pd.read_sql(primary_query, con=connection_ppm)

# Iterate through the primary dataframe
for index, row in primary_df.iterrows():
    # Get the table name from the 'eprm_table_name' column
    table_name = row['eprm_table_name']
    
    # Create source and target queries
    source_query = f"SELECT * FROM {schema_source}.{table_name}"
    target_query = f"SELECT * FROM {schema_ext}.{table_name}"
    
    # Execute source and target queries to get dataframes
    source_df = pd.read_sql(source_query, connection_source)
    target_df = pd.read_sql(target_query, connection_ext)
    
    # Iterate through rows in source dataframe
    for source_index, source_row in source_df.iterrows():
        # Get the primary key value from the source dataframe
        pk_column_name = row['eprm_table_col_pk']
        pk_value = source_row[pk_column_name]
        
        # Find the corresponding row in the target dataframe based on the primary key
        target_row = target_df[target_df[pk_column_name] == pk_value]
        
        # Check for differences between source and target rows
        for column_name in source_df.columns:
            source_val = source_row[column_name]
            target_val = target_row[column_name].values[0]  # Get the value from the target dataframe
            
            # If values are different, construct an update query
            if source_val != target_val:
                update_query = f"UPDATE {schema_ext}.{table_name} SET {column_name} = '{source_val}' WHERE {pk_column_name} = '{pk_value}'"
                
                # Execute the update query to update the differing record in the target
                # Be sure to execute this query using your database connection

# End of loop
Editor is loading...