Untitled
unknown
plain_text
9 months ago
7.5 kB
8
Indexable
import os
import pandas as pd
import numpy as np
import re
from datetime import datetime
# Function to read files in a folder and return their content
def read_files_in_folder(folder_path):
data = ""
files = os.listdir(folder_path)
for file in files:
file_path = os.path.join(folder_path, file)
if os.path.isfile(file_path):
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
file_content = f.read()
if file_content.strip():
data += file_content + "\n"
else:
print(f"No content in {file}.")
except Exception as e:
print(f"Could not read {file}: {e}")
return data
# Function to parse the data and extract relevant information
def parse_data(data):
blocks = data.split("}") # Split data into blocks by '}'
measurements = []
test_batch, test_time, platform = None, None, "SCI_H07"
for block in blocks:
# Extract test batch and time from @BTEST
if "@BTEST" in block:
test_batch_match = re.findall(r"@BTEST\|(.+?)\|", block)
test_time_match = re.findall(r"@BTEST\|.+?\|\d+\|(.+?)\|", block)
test_batch = test_batch_match[0] if test_batch_match else None
test_time = test_time_match[0] if test_time_match else None
# Extract each @BLOCK section
if "@BLOCK" in block:
block_matches = re.findall(r"@BLOCK\|(.+?)\|(\d{2})", block)
for test_name, pass_fail_code in block_matches:
pass_fail = "Pass" if pass_fail_code == "00" else "Fail"
# Loop through each category/measurement within the @BLOCK
category_matches = re.findall(
r"@([A-Z-]+)\|.*?\|([+-]?\d+\.\d+E[+-]\d+)(?:\|(.+?))?\{", block
)
for category_match in category_matches:
test_category = category_match[0] # Extract the test category
measurement = float(category_match[1]) # Extract the measurement
subtest = category_match[2] if category_match[2] else "None" # Extract subtest or set to "None"
# Initialize limits
upper_limit = lower_limit = nominal_value = np.nan
# Find corresponding limits within the same block for the current category
lim2_match = re.findall(
r"@LIM2\|([+-]?\d+\.\d+E[+-]\d+)\|([+-]?\d+\.\d+E[+-]\d+)", block
)
lim3_match = re.findall(
r"@LIM3\|([+-]?\d+\.\d+E[+-]\d+)\|([+-]?\d+\.\d+E[+-]\d+)\|([+-]?\d+\.\d+E[+-]\d+)", block
)
# Extract limits, prioritize LIM3 over LIM2
if lim3_match:
nominal_value, upper_limit, lower_limit = map(float, lim3_match[0])
elif lim2_match:
upper_limit, lower_limit = map(float, lim2_match[0])
# Store parsed data for each subtest
if test_batch and test_time:
measurements.append({
"Tester": platform,
"Test Name": test_name,
"Subtest": subtest, # Ensure subtest is recorded
"Test Batch": test_batch,
"Test Time": test_time,
"Category": test_category,
"Measurement": measurement,
"Upper Limit": upper_limit,
"Nominal Value": nominal_value,
"Lower Limit": lower_limit,
"Pass/Fail": pass_fail,
})
return pd.DataFrame(measurements)
# Function to generate Cpk data for each test
def generate_cpk_data(group):
cpk_data_list = [] # List to collect Cpk data for each subtest
for subtest, subtest_group in group.groupby('Subtest'):
mean_value = subtest_group['Measurement'].mean()
std_dev = subtest_group['Measurement'].std(ddof=0)
max_meas = subtest_group['Measurement'].max()
min_meas = subtest_group['Measurement'].min()
upper_limit = subtest_group['Upper Limit'].iloc[0]
lower_limit = subtest_group['Lower Limit'].iloc[0]
nominal_value = subtest_group['Nominal Value'].iloc[0]
# Cpk calculations
cpk_plus = (upper_limit - mean_value) / (3 * std_dev) if std_dev else np.nan
cpk_minus = (mean_value - lower_limit) / (3 * std_dev) if std_dev else np.nan
cpk = min(cpk_plus, cpk_minus) if not np.isnan(cpk_plus) and not np.isnan(cpk_minus) else np.nan
# Tolerance calculations
tol_plus = (upper_limit - nominal_value) / nominal_value * 100 if not np.isnan(nominal_value) else np.nan
tol_minus = (lower_limit - nominal_value) / nominal_value * 100 if not np.isnan(nominal_value) else np.nan
cpk_data_list.append({
"Tester": subtest_group['Tester'].iloc[0],
"Test Name": subtest_group['Test Name'].iloc[0],
"Subtest": subtest,
"Mean": mean_value,
"StdDev": std_dev,
"Max Meas": max_meas,
"Min Meas": min_meas,
"CPK": cpk,
"Nominal": nominal_value,
"+Lim": upper_limit,
"-Lim": lower_limit,
"+Tol": tol_plus,
"-Tol": tol_minus,
"CPK+": cpk_plus,
"CPK-": cpk_minus
})
return pd.DataFrame(cpk_data_list)
# Main execution function
def main():
folder = input("Enter the folder path containing the files: ")
raw_data = read_files_in_folder(folder)
if raw_data.strip():
measurements_df = parse_data(raw_data)
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
output_folder = f"test_results_{now}"
tests_folder = os.path.join(output_folder, "Tests")
os.makedirs(tests_folder, exist_ok=True)
# Initialize an empty DataFrame to store all Cpk data
cpk_summary_df = pd.DataFrame()
# Save each test's data and Cpk sheet in a separate Excel workbook
for test_name, group in measurements_df.groupby('Test Name'):
cpk_data = generate_cpk_data(group)
# Append to the overall Cpk summary DataFrame
cpk_summary_df = pd.concat([cpk_summary_df, cpk_data], ignore_index=True)
test_file = os.path.join(tests_folder, f"{test_name}.xlsx")
with pd.ExcelWriter(test_file) as writer:
group.to_excel(writer, sheet_name='Raw Data', index=False)
cpk_data.to_excel(writer, sheet_name='CPK', index=False)
print(f"Saved raw data and Cpk for '{test_name}' to: {test_file}")
# Save the overall Cpk summary in the output folder
cpk_summary_file = os.path.join(output_folder, "cpk_summary.xlsx")
cpk_summary_df.to_excel(cpk_summary_file, index=False)
print(f"Saved Cpk summary to: {cpk_summary_file}")
else:
print("No data to process.")
# Execute the main function
if __name__ == "__main__":
main()
Editor is loading...
Leave a Comment