Untitled
plain_text
5 days ago
1.9 kB
2
Indexable
Never
import pandas as pd # Define your primary query primary_query = f"SELECT * FROM {schema_ppm}.etl_ppm_replication_master WHERE eprm_catalog='SC' AND eprm_enabled_flg='Y'" # Execute the primary query and get the primary dataframe primary_df = pd.read_sql(primary_query, con=connection_ppm) # Iterate through the primary dataframe for index, row in primary_df.iterrows(): # Get the table name from the 'eprm_table_name' column table_name = row['eprm_table_name'] # Create source and target queries source_query = f"SELECT * FROM {schema_source}.{table_name}" target_query = f"SELECT * FROM {schema_ext}.{table_name}" # Execute source and target queries to get dataframes source_df = pd.read_sql(source_query, connection_source) target_df = pd.read_sql(target_query, connection_ext) # Find differences between source and target dataframes differences = source_df.compare(target_df, keep_shape=True) # Iterate through the differences for diff_index, diff_row in differences.iterrows(): # Construct the dynamic update statement update_columns = [] for column_name, diff_value in diff_row.items(): update_columns.append(f"{column_name} = '{diff_value}'") # Get the primary key and its value pk_column_name = row['eprm_table_col_pk'] pk_value = diff_row[pk_column_name] # Construct the WHERE condition where_condition = f"WHERE {pk_column_name} = '{pk_value}'" # Construct the dynamic update query update_query = f"UPDATE {schema_ext}.{table_name} SET {', '.join(update_columns)} {where_condition}" # Execute the update query to update the differing record in the target # Be sure to execute this query using your database connection # End of loop