Untitled

mail@pastecode.io avatarunknown
plain_text
9 days ago
2.1 kB
2
Indexable
Never
primary_query=f"SELECT * FROM {schema_ppm}.etl_ppm_replication_master WHERE eprm_catalog='SC' AND eprm_enabled_flg='Y'"
        primary_df=pd_read_sql(primary_query,con=connection_ppm)
        for index, row in primary_df.iterrows():
            table_name = row['eprm_table_name']

            source_query = f"SELECT * FROM {schema_source}.{table_name}"
            source_df = pd.read_sql(source_query, connection_source)

            target_query = f"SELECT * FROM {schema_ext}.{table_name}"
            target_df = pd.read_sql(target_query, connection_ext)
            if len(source_df) != len(target_df):
                print(f"Error: {table_name} in Source and Target databases have different row counts.")
            else:
                for index, (source_row, target_row) in enumerate(zip(source_df.iterrows(), target_df.iterrows())):
                    source_row_data = source_row[1]
                    target_row_data = target_row[1]
                    row_number = index + 1

                    if not source_row_data.equals(target_row_data):
                        # Generate an update query dynamically
                        update_query = f"UPDATE {table_name} SET "
                        column_updates = []

                        for column_name, source_val in source_row_data.items():
                            target_val = target_row_data[column_name]
                            if source_val != target_val:
                                column_updates.append(f"{column_name} = '{source_val}'")

                        update_query += ", ".join(column_updates)
                        update_query=f" where rownum = {row_number}"
                        print(update_query)
                        cursor_ext.execute(update_query)

in the above python code iam comapring the differences and in below line iam iterating again again for all rows its there any shortcut to check all once at a time ca you optimize them
for index, (source_row, target_row) in enumerate(zip(source_df.iterrows(), target_df.iterrows())):