Untitled
unknown
plain_text
9 months ago
2.9 kB
5
Indexable
import networkx as nx
import pandas as pd
import sqlite3
import re
import matplotlib.pyplot as plt
import numpy as np
import ast
from functions import fill_conditional_seq, program_to_graph
def process_data(ground_truth_file, database_file, output_file):
# Load the ground truth CSV
pgms_csv = pd.read_csv(ground_truth_file)
list_of_programs = pgms_csv['Program_Name'].to_list()
list_of_identifiers = pgms_csv['Identifier'].to_list()
# Connect to the SQLite database
conn = sqlite3.connect(database_file)
query = f"""SELECT * FROM BRE_LOGIC_TBL WHERE Program_Name IN ({','.join([f"'{p}'" for p in list_of_programs])})"""
data = pd.read_sql_query(con=conn, sql=query)
# Process 'Source_Value'
data['Source_Value'] = data['Source'].apply(lambda x: ast.literal_eval(x)[0] if isinstance(x, str) else x)
# Create 'Identifier' column
data['Identifier'] = data[['Program_Name', 'Target_Value', 'Source_Value', 'Children', 'Parents']].astype(str).agg('/'.join, axis=1)
# Filter data based on identifiers
data = data[data['Identifier'].isin(list_of_identifiers)]
# Remove duplicates
data = data.drop_duplicates(subset=['Identifier'], keep='first')
# Create 'Actuals' column
data['Actuals'] = data[['Program_Name', 'Sequence']].astype(str).agg('_'.join, axis=1)
# Map 'Program_Snippet'
data['Program_Snippet'] = data['Program_Name'].map(pgms_csv.set_index('Program_Name')['Program_Snippet'])
# Save to CSV
data.to_csv(output_file, index=False)
# Example usage
process_data("ground_truth.csv", "app_db_pc.db", "ground_truth.csv")
actuals = pd.read_csv("ground_truth.csv")
conn = sqlite3.connect(r"my_db.db")
count = 0
for i in list(actuals['Program_Name'].unique()):
program = i
# print(program)
data = pd.read_sql_query(con=conn, sql=f"""SELECT * FROM BRE_LOGIC_TBL WHERE Program_Name='{program}'""")
data = data.astype(str)
data = data.replace("nan", None).replace("None", None)
data['Conditional_Seq'] = fill_conditional_seq(data)
data.Sequence = data.Sequence.astype(int)
data = data.sort_values(by='Sequence')
G = program_to_graph(data)
actual = list(actuals.loc[actuals['Program_Name'] == program, 'Actuals'].values)
path = list(nx.shortest_path(G, target=str(actual[0])).items())[-1][-1]
whole_path1 = []
for i in path:
whole_path1.append(G.nodes[i]['statement'])
# print(actuals.loc[actuals['Program_Name'] == program, 'Program_Snippet'].values[0])
# print(type(actuals.loc[actuals['Program_Name'] =
if ast.literal_eval(actuals.loc[actuals['Program_Name'] == program, 'Program_Snippet'].values[0]) == whole_path1:
print()
print(program)
print("Passed !!")
else:
print()
print(program)
print("Failed !!")
# print(f"Done for File Number : {count}, Name : {program}")
count = count + 1Editor is loading...
Leave a Comment