# Assuming source_df and target_df have unique primary keys
# Merge source_df and target_df based on the primary key
merged_df = source_df.merge(target_df, on=pk, how='left', suffixes=('', '_target'))
# Create a boolean mask to identify rows where the target values are missing (NaN)
insert_mask = merged_df[f'{pk}_target'].isna()
# Filter rows where target values are missing (these need to be inserted)
rows_to_insert = merged_df[insert_mask]
# Filter rows where target values exist but are different (these need to be updated)
rows_to_update = merged_df[~insert_mask]
# Iterate over rows to insert
for index, row in rows_to_insert.iterrows():
# Generate an INSERT query dynamically and execute it
insert_query = f"INSERT INTO {schema_ext}.{table_name} ({', '.join(insert_columns)}) VALUES ({', '.join(insert_values)})"
try:
print(insert_query)
# cursor_ext.execute(insert_query)
except Exception as e:
continue
# Iterate over rows to update
for index, row in rows_to_update.iterrows():
# Generate an UPDATE query dynamically and execute it
update_query = f"UPDATE {schema_ext}.{table_name} SET {', '.join(column_updates)} WHERE {eprm_table_col_pk} = '{row[pk]}'"
try:
print(update_query)
# cursor_ext.execute(update_query)
except Exception as e:
continue