Untitled

import os
import pandas as pd
import numpy as np
import re
from datetime import datetime

# Function to read files in a folder and return their content
def read_files_in_folder(folder_path):
    data = ""
    files = os.listdir(folder_path)

    for file in files:
        file_path = os.path.join(folder_path, file)
        if os.path.isfile(file_path):
            try:
                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                    file_content = f.read()
                if file_content.strip():
                    data += file_content + "\n"
                else:
                    print(f"No content in {file}.")
            except Exception as e:
                print(f"Could not read {file}: {e}")
    return data

# Function to parse the data and extract relevant information
def parse_data(data):
    blocks = data.split("}")  # Split data into blocks by '}'
    measurements = []

    test_batch, test_time, platform = None, None, "SCI_H07"

    for block in blocks:
        # Extract test batch and time from @BTEST
        if "@BTEST" in block:
            test_batch_match = re.findall(r"@BTEST\|(.+?)\|", block)
            test_time_match = re.findall(r"@BTEST\|.+?\|\d+\|(.+?)\|", block)
            test_batch = test_batch_match[0] if test_batch_match else None
            test_time = test_time_match[0] if test_time_match else None

        # Extract each @BLOCK section
        if "@BLOCK" in block:
            block_matches = re.findall(r"@BLOCK\|(.+?)\|(\d{2})", block)
            for test_name, pass_fail_code in block_matches:
                pass_fail = "Pass" if pass_fail_code == "00" else "Fail"

                # Loop through each category/measurement within the @BLOCK
                category_matches = re.findall(
                    r"@([A-Z-]+)\|.*?\|([+-]?\d+\.\d+E[+-]\d+)(?:\|(.+?))?\{", block
                )

                for category_match in category_matches:
                    test_category = category_match[0]  # Extract the test category
                    measurement = float(category_match[1])  # Extract the measurement
                    subtest = category_match[2] if category_match[2] else "None"  # Extract subtest or set to "None"

                    # Initialize limits
                    upper_limit = lower_limit = nominal_value = np.nan

                    # Find corresponding limits within the same block for the current category
                    lim2_match = re.findall(
                        r"@LIM2\|([+-]?\d+\.\d+E[+-]\d+)\|([+-]?\d+\.\d+E[+-]\d+)", block
                    )
                    lim3_match = re.findall(
                        r"@LIM3\|([+-]?\d+\.\d+E[+-]\d+)\|([+-]?\d+\.\d+E[+-]\d+)\|([+-]?\d+\.\d+E[+-]\d+)", block
                    )

                    # Extract limits, prioritize LIM3 over LIM2
                    if lim3_match:
                        nominal_value, upper_limit, lower_limit = map(float, lim3_match[0])
                    elif lim2_match:
                        upper_limit, lower_limit = map(float, lim2_match[0])

                    # Store parsed data for each subtest
                    if test_batch and test_time:
                        measurements.append({
                            "Tester": platform,
                            "Test Name": test_name,
                            "Subtest": subtest,  # Ensure subtest is recorded
                            "Test Batch": test_batch,
                            "Test Time": test_time,
                            "Category": test_category,
                            "Measurement": measurement,
                            "Upper Limit": upper_limit,
                            "Nominal Value": nominal_value,
                            "Lower Limit": lower_limit,
                            "Pass/Fail": pass_fail,
                        })

    return pd.DataFrame(measurements)

# Function to generate Cpk data for each test
def generate_cpk_data(group):
    cpk_data_list = []  # List to collect Cpk data for each subtest

    for subtest, subtest_group in group.groupby('Subtest'):
        mean_value = subtest_group['Measurement'].mean()
        std_dev = subtest_group['Measurement'].std(ddof=0)
        max_meas = subtest_group['Measurement'].max()
        min_meas = subtest_group['Measurement'].min()
        upper_limit = subtest_group['Upper Limit'].iloc[0]
        lower_limit = subtest_group['Lower Limit'].iloc[0]
        nominal_value = subtest_group['Nominal Value'].iloc[0]

        # Cpk calculations
        cpk_plus = (upper_limit - mean_value) / (3 * std_dev) if std_dev else np.nan
        cpk_minus = (mean_value - lower_limit) / (3 * std_dev) if std_dev else np.nan
        cpk = min(cpk_plus, cpk_minus) if not np.isnan(cpk_plus) and not np.isnan(cpk_minus) else np.nan

        # Tolerance calculations
        tol_plus = (upper_limit - nominal_value) / nominal_value * 100 if not np.isnan(nominal_value) else np.nan
        tol_minus = (lower_limit - nominal_value) / nominal_value * 100 if not np.isnan(nominal_value) else np.nan

        cpk_data_list.append({
            "Tester": subtest_group['Tester'].iloc[0],
            "Test Name": subtest_group['Test Name'].iloc[0],
            "Subtest": subtest,
            "Mean": mean_value,
            "StdDev": std_dev,
            "Max Meas": max_meas,
            "Min Meas": min_meas,
            "CPK": cpk,
            "Nominal": nominal_value,
            "+Lim": upper_limit,
            "-Lim": lower_limit,
            "+Tol": tol_plus,
            "-Tol": tol_minus,
            "CPK+": cpk_plus,
            "CPK-": cpk_minus
        })

    return pd.DataFrame(cpk_data_list)

# Main execution function
def main():
    folder = input("Enter the folder path containing the files: ")
    raw_data = read_files_in_folder(folder)

    if raw_data.strip():
        measurements_df = parse_data(raw_data)
        now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        output_folder = f"test_results_{now}"
        tests_folder = os.path.join(output_folder, "Tests")
        os.makedirs(tests_folder, exist_ok=True)

        # Initialize an empty DataFrame to store all Cpk data
        cpk_summary_df = pd.DataFrame()

        # Save each test's data and Cpk sheet in a separate Excel workbook
        for test_name, group in measurements_df.groupby('Test Name'):
            cpk_data = generate_cpk_data(group)

            # Append to the overall Cpk summary DataFrame
            cpk_summary_df = pd.concat([cpk_summary_df, cpk_data], ignore_index=True)

            test_file = os.path.join(tests_folder, f"{test_name}.xlsx")
            with pd.ExcelWriter(test_file) as writer:
                group.to_excel(writer, sheet_name='Raw Data', index=False)
                cpk_data.to_excel(writer, sheet_name='CPK', index=False)

            print(f"Saved raw data and Cpk for '{test_name}' to: {test_file}")

        # Save the overall Cpk summary in the output folder
        cpk_summary_file = os.path.join(output_folder, "cpk_summary.xlsx")
        cpk_summary_df.to_excel(cpk_summary_file, index=False)
        print(f"Saved Cpk summary to: {cpk_summary_file}")
    else:
        print("No data to process.")

# Execute the main function
if __name__ == "__main__":
    main()
Editor is loading...