Untitled
unknown
plain_text
10 months ago
2.3 kB
3
Indexable
def find_previous_seq_id_flow(df, start_seq_id, specific_target=None):
"""
Finds a sequence of previous seq_ids based on source and target values.
Optionally filters the starting point by a specific target.
"""
flow = []
current_seq_id = start_seq_id
while current_seq_id:
# Filter rows corresponding to the current_seq_id
rows = df[df['Sequence'] == current_seq_id]
if rows.empty:
break # Exit if no rows are found for the current_seq_id
# Optionally filter rows by the specific target
if specific_target:
rows = rows[rows['Target'] == specific_target]
# Exit if no rows match the target filter
if rows.empty:
break
# Extract the first row (or the first match)
row = rows.iloc[0]
# Extract source values for the current seq_id
source_values = preprocess_values(row['Source'])
if not source_values:
break # Exit if no source values are found
# Take the first source value
first_source = source_values[0]
# Filter DataFrame to include rows with seq_id less than the current_seq_id
previous_df = df[df['Sequence'] < current_seq_id].copy()
# Parse the target values in the filtered DataFrame
previous_df['parsed_targets'] = previous_df['Target'].apply(
lambda x: preprocess_values(x)
)
# Find the seq_id where the first_source is in the parsed_targets
matching_rows = previous_df[previous_df['parsed_targets'].apply(
lambda targets: first_source in targets
)]
# Check if any matching rows exist
if not matching_rows.empty:
previous_seq_id = matching_rows['Sequence'].max()
# Append the current_seq_id to the flow list and move to the previous_seq_id
flow.append(current_seq_id)
current_seq_id = int(previous_seq_id)
else:
break # Exit if no previous_seq_id is found
# Append the final seq_id to the flow list if it is valid and not already included
if current_seq_id not in flow and not pd.isna(current_seq_id):
flow.append(current_seq_id)
return flow
Editor is loading...
Leave a Comment