Untitled

# Assuming source_df and target_df have unique primary keys

# Merge source_df and target_df based on the primary key
merged_df = source_df.merge(target_df, on=pk, how='left', suffixes=('', '_target'))

# Create a boolean mask to identify rows where the target values are missing (NaN)
insert_mask = merged_df[f'{pk}_target'].isna()

# Filter rows where target values are missing (these need to be inserted)
rows_to_insert = merged_df[insert_mask]

# Filter rows where target values exist but are different (these need to be updated)
rows_to_update = merged_df[~insert_mask]

# Iterate over rows to insert
for index, row in rows_to_insert.iterrows():
    # Generate an INSERT query dynamically and execute it
    insert_query = f"INSERT INTO {schema_ext}.{table_name} ({', '.join(insert_columns)}) VALUES ({', '.join(insert_values)})"
    try:
        print(insert_query)
        # cursor_ext.execute(insert_query)
    except Exception as e:
        continue

# Iterate over rows to update
for index, row in rows_to_update.iterrows():
    # Generate an UPDATE query dynamically and execute it
    update_query = f"UPDATE {schema_ext}.{table_name} SET {', '.join(column_updates)} WHERE {eprm_table_col_pk} = '{row[pk]}'"
    try:
        print(update_query)
        # cursor_ext.execute(update_query)
    except Exception as e:
        continue
Editor is loading...