Untitled
unknown
plain_text
10 months ago
1.6 kB
11
Indexable
import networkx as nx
import pandas as pd
import sqlite3
import re
import matplotlib.pyplot as plt
import numpy as np
import ast
from functions import fill_conditional_seq, program_to_graph
def process_data(ground_truth_file, database_file, output_file):
# Load the ground truth CSV
pgms_csv = pd.read_csv(ground_truth_file)
list_of_programs = pgms_csv['Program_Name'].to_list()
list_of_identifiers = pgms_csv['Identifier'].to_list()
# Connect to the SQLite database
conn = sqlite3.connect(database_file)
query = f"""SELECT * FROM BRE_LOGIC_TBL WHERE Program_Name IN ({','.join([f"'{p}'" for p in list_of_programs])})"""
data = pd.read_sql_query(con=conn, sql=query)
# Process 'Source_Value'
data['Source_Value'] = data['Source'].apply(lambda x: ast.literal_eval(x)[0] if isinstance(x, str) else x)
# Create 'Identifier' column
data['Identifier'] = data[['Program_Name', 'Target_Value', 'Source_Value', 'Children', 'Parents']].astype(str).agg('/'.join, axis=1)
# Filter data based on identifiers
data = data[data['Identifier'].isin(list_of_identifiers)]
# Remove duplicates
data = data.drop_duplicates(subset=['Identifier'], keep='first')
# Create 'Actuals' column
data['Actuals'] = data[['Program_Name', 'Sequence']].astype(str).agg('_'.join, axis=1)
# Map 'Program_Snippet'
data['Program_Snippet'] = data['Program_Name'].map(pgms_csv.set_index('Program_Name')['Program_Snippet'])
# Save to CSV
data.to_csv(output_file, index=False)
# Example usage
process_data("ground_truth.csv", "app_db_pc.db", "ground_truth.csv")
Editor is loading...
Leave a Comment