Untitled

The whole code is written in jupyter notebook
This code can be used for one program at a time
Can you make a single program or class where I can give list of programs and it will return me a result df which appends and return result for all given programs ?

Make sure you dont skip any step not modify any step that causes problem in getting the end results

The entire code works perfectly fine only make sure it works when I give a list of program names and return a single result dataframe which will consists of result of all the program names

import networkx as nx
import pandas as pd
import sqlite3
import re

import matplotlib.pyplot as plt

# conn    = sqlite3.connect(r"app_db.db")
conn    = sqlite3.connect(r"/c01/home/lidtd3o/APP_K/app_db_pc.db")
program = "PAJSFN20"
data    = pd.read_sql_query(con=conn, sql=f"""SELECT * FROM BRE_LOGIC_TBL WHERE Program_Name='{program}'""")
# data = data.applymap(cnvert_to_float)

def find_valid_rows(df, thresholds):
    for threshold in thresholds:
        def check_condition(row):
            seq_num = row['Sequence']
            cond_seq = row['Conditional_Seq']

            if (seq_num - cond_seq) <= threshold:
                return False

            subset = df[(df['Sequence'] >= cond_seq) & (df['Sequence'] <= seq_num)]

            return subset['Target'].notna().any()
        filtered_rows = df[df.apply(check_condition, axis=1)]
        if not filtered_rows.empty:
            print(f"Threshold > {threshold}:")
            return filtered_rows
    return pd.DataFrame()
thresholds = [50,45,40,35,30,25,20,15,10,5]

fil = find_valid_rows(data,thresholds)

fil = fil.drop_duplicates(subset=['Paragraph_Name'], keep='last')

fil = fil.loc[[fil.eval('Sequence - Conditional_Seq').idxmax()]].reset_index()

start = int(fil['Conditional_Seq'][0])
end = int(fil['Sequence'][0])

fil_df = data[(data['Sequence'] >= start) & (data['Sequence'] <= end)]

import ast
fil_df[['Source', 'Target']] = fil_df[['Source', 'Target']].applymap(lambda x: ast.literal_eval(x) if isinstance(x, str) and x else None)

seq_list = fil_df.loc[fil_df['Keywords']
                      .isin(['IF', 'ELSE', 'EVALUATE', 'WHEN']), 'Sequence'].tolist()

fil_df_1 = fil_df[fil_df[['Source', 'Target']].notna().all(axis=1)]

fil_df_2 = fil_df_1[fil_df_1['Conditional_Seq'].isin(seq_list)]

import pandas as pd
import ast
def is_literal(value):
    try:
        result = ast.literal_eval(value)
        return isinstance(result, str)
    except (ValueError, SyntaxError):
        return False

fil_df_3 = fil_df_2[~fil_df_2['Source'].apply(lambda x: is_literal(x[0]) if x else False) & ~fil_df_2['Target'].apply(lambda x: is_literal(x[0]) if x else False)]

if fil_df_3.empty:
    print("Filtering based on Target only")
    fil_df_3 = fil_df_2[~fil_df_2['Target'].apply(lambda x: is_literal(x[0]) if x else False)]

if fil_df_3.empty:
    print("Filtering based on Source only")
    fil_df_3 = fil_df_2[~fil_df_2['Source'].apply(lambda x: is_literal(x[0]) if x else False)]

def filter_and_select(df):
    if len(df) <=1:
        return df
    new_df = df[~df['Target'].str.contains(r'\bOF\b', regex=True, na=False)]
    target_df = new_df if not new_df.empty else df
    return target_df.loc[target_df['Conditional_Seq'].idxmin()]


if fil_df_3.empty:
    print("Choose other program")
else:
    result = filter_and_select(fil_df_3)

print(result) # Final result
Editor is loading...