Untitled
plain_text
3 days ago
1.5 kB
1
Indexable
Never
import pandas as pd import numpy as np primary_query = f"SELECT * FROM {schema_ppm}.etl_ppm_replication_master WHERE eprm_catalog='SC' AND eprm_enabled_flg='Y'" primary_df = pd.read_sql(primary_query, con=connection_ppm) for index, row in primary_df.iterrows(): table_name = row['eprm_table_name'] source_query = f"SELECT * FROM {schema_source}.{table_name}" source_df = pd.read_sql(source_query, connection_source) target_query = f"SELECT * FROM {schema_ext}.{table_name}" target_df = pd.read_sql(target_query, connection_ext) # Find rows with differences diff_df = source_df.compare(target_df, keep_shape=True) if not diff_df.empty: # Generate an update query dynamically update_query = f"UPDATE {table_name} SET " column_updates = [] for column_name in diff_df.columns: source_col = diff_df[column_name]['self'] target_col = diff_df[column_name]['other'] # Identify rows where values are different diff_rows = np.where(source_col != target_col)[0] for row_number in diff_rows: source_val = source_col.iloc[row_number] target_val = target_col.iloc[row_number] # Include the column update in the query column_updates.append(f"{column_name} = '{source_val}'") update_query += ", ".join(column_updates) print(update_query) # Execute the update query cursor_ext.execute(update_query)