Untitled
unknown
plain_text
5 months ago
7.5 kB
3
Indexable
import os import pandas as pd import numpy as np import re from datetime import datetime # Function to read files in a folder and return their content def read_files_in_folder(folder_path): data = "" files = os.listdir(folder_path) for file in files: file_path = os.path.join(folder_path, file) if os.path.isfile(file_path): try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: file_content = f.read() if file_content.strip(): data += file_content + "\n" else: print(f"No content in {file}.") except Exception as e: print(f"Could not read {file}: {e}") return data # Function to parse the data and extract relevant information def parse_data(data): blocks = data.split("}") # Split data into blocks by '}' measurements = [] test_batch, test_time, platform = None, None, "SCI_H07" for block in blocks: # Extract test batch and time from @BTEST if "@BTEST" in block: test_batch_match = re.findall(r"@BTEST\|(.+?)\|", block) test_time_match = re.findall(r"@BTEST\|.+?\|\d+\|(.+?)\|", block) test_batch = test_batch_match[0] if test_batch_match else None test_time = test_time_match[0] if test_time_match else None # Extract each @BLOCK section if "@BLOCK" in block: block_matches = re.findall(r"@BLOCK\|(.+?)\|(\d{2})", block) for test_name, pass_fail_code in block_matches: pass_fail = "Pass" if pass_fail_code == "00" else "Fail" # Loop through each category/measurement within the @BLOCK category_matches = re.findall( r"@([A-Z-]+)\|.*?\|([+-]?\d+\.\d+E[+-]\d+)(?:\|(.+?))?\{", block ) for category_match in category_matches: test_category = category_match[0] # Extract the test category measurement = float(category_match[1]) # Extract the measurement subtest = category_match[2] if category_match[2] else "None" # Extract subtest or set to "None" # Initialize limits upper_limit = lower_limit = nominal_value = np.nan # Find corresponding limits within the same block for the current category lim2_match = re.findall( r"@LIM2\|([+-]?\d+\.\d+E[+-]\d+)\|([+-]?\d+\.\d+E[+-]\d+)", block ) lim3_match = re.findall( r"@LIM3\|([+-]?\d+\.\d+E[+-]\d+)\|([+-]?\d+\.\d+E[+-]\d+)\|([+-]?\d+\.\d+E[+-]\d+)", block ) # Extract limits, prioritize LIM3 over LIM2 if lim3_match: nominal_value, upper_limit, lower_limit = map(float, lim3_match[0]) elif lim2_match: upper_limit, lower_limit = map(float, lim2_match[0]) # Store parsed data for each subtest if test_batch and test_time: measurements.append({ "Tester": platform, "Test Name": test_name, "Subtest": subtest, # Ensure subtest is recorded "Test Batch": test_batch, "Test Time": test_time, "Category": test_category, "Measurement": measurement, "Upper Limit": upper_limit, "Nominal Value": nominal_value, "Lower Limit": lower_limit, "Pass/Fail": pass_fail, }) return pd.DataFrame(measurements) # Function to generate Cpk data for each test def generate_cpk_data(group): cpk_data_list = [] # List to collect Cpk data for each subtest for subtest, subtest_group in group.groupby('Subtest'): mean_value = subtest_group['Measurement'].mean() std_dev = subtest_group['Measurement'].std(ddof=0) max_meas = subtest_group['Measurement'].max() min_meas = subtest_group['Measurement'].min() upper_limit = subtest_group['Upper Limit'].iloc[0] lower_limit = subtest_group['Lower Limit'].iloc[0] nominal_value = subtest_group['Nominal Value'].iloc[0] # Cpk calculations cpk_plus = (upper_limit - mean_value) / (3 * std_dev) if std_dev else np.nan cpk_minus = (mean_value - lower_limit) / (3 * std_dev) if std_dev else np.nan cpk = min(cpk_plus, cpk_minus) if not np.isnan(cpk_plus) and not np.isnan(cpk_minus) else np.nan # Tolerance calculations tol_plus = (upper_limit - nominal_value) / nominal_value * 100 if not np.isnan(nominal_value) else np.nan tol_minus = (lower_limit - nominal_value) / nominal_value * 100 if not np.isnan(nominal_value) else np.nan cpk_data_list.append({ "Tester": subtest_group['Tester'].iloc[0], "Test Name": subtest_group['Test Name'].iloc[0], "Subtest": subtest, "Mean": mean_value, "StdDev": std_dev, "Max Meas": max_meas, "Min Meas": min_meas, "CPK": cpk, "Nominal": nominal_value, "+Lim": upper_limit, "-Lim": lower_limit, "+Tol": tol_plus, "-Tol": tol_minus, "CPK+": cpk_plus, "CPK-": cpk_minus }) return pd.DataFrame(cpk_data_list) # Main execution function def main(): folder = input("Enter the folder path containing the files: ") raw_data = read_files_in_folder(folder) if raw_data.strip(): measurements_df = parse_data(raw_data) now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") output_folder = f"test_results_{now}" tests_folder = os.path.join(output_folder, "Tests") os.makedirs(tests_folder, exist_ok=True) # Initialize an empty DataFrame to store all Cpk data cpk_summary_df = pd.DataFrame() # Save each test's data and Cpk sheet in a separate Excel workbook for test_name, group in measurements_df.groupby('Test Name'): cpk_data = generate_cpk_data(group) # Append to the overall Cpk summary DataFrame cpk_summary_df = pd.concat([cpk_summary_df, cpk_data], ignore_index=True) test_file = os.path.join(tests_folder, f"{test_name}.xlsx") with pd.ExcelWriter(test_file) as writer: group.to_excel(writer, sheet_name='Raw Data', index=False) cpk_data.to_excel(writer, sheet_name='CPK', index=False) print(f"Saved raw data and Cpk for '{test_name}' to: {test_file}") # Save the overall Cpk summary in the output folder cpk_summary_file = os.path.join(output_folder, "cpk_summary.xlsx") cpk_summary_df.to_excel(cpk_summary_file, index=False) print(f"Saved Cpk summary to: {cpk_summary_file}") else: print("No data to process.") # Execute the main function if __name__ == "__main__": main()
Editor is loading...
Leave a Comment