Untitled

 avatar
unknown
plain_text
a month ago
2.3 kB
1
Indexable
def find_previous_seq_id_flow(df, start_seq_id, specific_target=None):
    """
    Finds a sequence of previous seq_ids based on source and target values.
    Optionally filters the starting point by a specific target.
    """
    flow = []
    current_seq_id = start_seq_id

    while current_seq_id:
        # Filter rows corresponding to the current_seq_id
        rows = df[df['Sequence'] == current_seq_id]
        if rows.empty:
            break  # Exit if no rows are found for the current_seq_id

        # Optionally filter rows by the specific target
        if specific_target:
            rows = rows[rows['Target'] == specific_target]

        # Exit if no rows match the target filter
        if rows.empty:
            break

        # Extract the first row (or the first match)
        row = rows.iloc[0]

        # Extract source values for the current seq_id
        source_values = preprocess_values(row['Source'])
        if not source_values:
            break  # Exit if no source values are found

        # Take the first source value
        first_source = source_values[0]

        # Filter DataFrame to include rows with seq_id less than the current_seq_id
        previous_df = df[df['Sequence'] < current_seq_id].copy()

        # Parse the target values in the filtered DataFrame
        previous_df['parsed_targets'] = previous_df['Target'].apply(
            lambda x: preprocess_values(x)
        )

        # Find the seq_id where the first_source is in the parsed_targets
        matching_rows = previous_df[previous_df['parsed_targets'].apply(
            lambda targets: first_source in targets
        )]

        # Check if any matching rows exist
        if not matching_rows.empty:
            previous_seq_id = matching_rows['Sequence'].max()
            # Append the current_seq_id to the flow list and move to the previous_seq_id
            flow.append(current_seq_id)
            current_seq_id = int(previous_seq_id)
        else:
            break  # Exit if no previous_seq_id is found

    # Append the final seq_id to the flow list if it is valid and not already included
    if current_seq_id not in flow and not pd.isna(current_seq_id):
        flow.append(current_seq_id)

    return flow
Leave a Comment