
2 months ago
17 kB
import pandas as pd
import re
import numpy as np

# Existing parse_data function
def parse_data(data):
    blocks = data.split("{@BLOCK")  # Split only at {@BLOCK
    global part_number  # Declare it globally at the start of the function

    measurements = []
    platform = "NAN"  # Default to NAN if no platform is found
    part_number = ""  # Default to NAN if no part number is found

    skipped_count = 0  # Track number of skipped lines

    for block in blocks:
        # Ensure the block begins with {@BLOCK
        block = "{@BLOCK" + block if not block.startswith("{@BLOCK") else block

        # Parse the platform if `@BATCH` and `{@BTEST` are found
        if "@BATCH" in block and "{@BTEST" in block:
            batch_line = re.search(r"\{@BATCH[^\{]*\{@BTEST[^\{]*", block)
            if batch_line:
                parts = batch_line.group().split('|')

                # Parse platform (9th split logic)
                if len(parts) >= 15:
                    platform = parts[9].strip()  # 9th split is index 8 (0-based indexing)

                # Parse part number (from index 1 or fallback logic)
                if len(parts) > 1 and parts[1].strip():
                    part_number = parts[1].strip()
                    for i in range(10, len(parts)):  # Start from the 10th part
                        part = parts[i].strip()
                        if part:
                            part_number = part.split("{@BATCH")[0].strip() if "{@BATCH" in part else part.strip()

        # Define the pattern to match measurement data
        pattern = r"\{@([\w-]+)\|([^\|]*)\|([^\|]*)\|?([^\|{]*)\{@(LIM2|LIM3)\|([+-]?\d*\.?\d+(?:E[+-]?\d+)?)(?:\|([+-]?\d*\.?\d+(?:E[+-]?\d+)?))?(?:\|([+-]?\d*\.?\d+(?:E[+-]?\d+)?))"
        matches = re.findall(pattern, block)
        a = []

        # Process each match
        for match in matches:
            prefix = match[0]
            if prefix not in {"BLOCK", "BTEST", "BATCH"}:  # Ignore these prefixes

        # Process `@BTEST` if present
        if "{@BTEST" in block:
            dataBlockSplit = str(block).split('@BTEST')[1].split("{")[0].split("|")

            runs = dataBlockSplit[-1].strip()
            serial = dataBlockSplit[1].strip()
            datatime = dataBlockSplit[3].strip() if len(dataBlockSplit) > 3 else ""

        # Extract test details from `@BLOCK`
        block_matches = re.findall(r"@BLOCK\|([^|]+)\|(\d{2})", block)
        for test_name, pass_fail_code in block_matches:
            # Clean test name
            test_name = re.sub(r'^\d+%', '', test_name)

            pass_fail = "Pass" if pass_fail_code == "00" else "Fail"
            category_matches = re.findall(
                r"@([A-Z-]+)\|(\d)\|([+-]?\d*\.?\d+(?:E[+-]?\d+)?)(?:\|(.+?))?\{", block

            # Extract limits
            lim2_match = re.findall(
                r"@LIM2\|([+-]?\d*\.?\d+(?:E[+-]?\d+)?)\|([+-]?\d*\.?\d+(?:E[+-]?\d+)?)", block
            lim3_match = re.findall(

            if lim3_match:
                for num in range(len(lim3_match)):
                    category_match = category_matches[num]
                    typeValue = a[num]

                    subtest = category_match[3] if category_match[3] else "None"
                    pass_fail_code = category_match[1]  # Extract pass/fail code (0 or 1)
                    measurement = float(category_match[2])  # Extract measurement

                    pass_fail = "Pass" if pass_fail_code == "0" else "Fail"

                    upper_limit = lower_limit = nominal_value = np.nan

                    nominal_value, upper_limit, lower_limit = map(float, lim3_match[num])
                    # Assuming nominal_value, upper_limit, and lower_limit are defined
                    tol_plus = (upper_limit - nominal_value) / nominal_value * 100 if not np.isnan(
                        nominal_value) else np.nan
                    tol_minus = (lower_limit - nominal_value) / nominal_value * 100 if not np.isnan(
                        nominal_value) else np.nan

                    # Format tol_plus and tol_minus to 1 decimal place
                    tol_plus = round(tol_plus, 1) if not np.isnan(tol_plus) else np.nan
                    tol_minus = round(tol_minus, 1) if not np.isnan(tol_minus) else np.nan

                    # Skip logic: Skip if typeValue is "A-JUM" and measurement is negative
                    if typeValue == "A-JUM" and measurement < 0:
                        skipped_count += 1  # Increment the skip counter
                        continue  # Skip this iteration

                    # Ensure tol_minus is positive (absolute value)
                    tol_minus = abs(tol_minus)

                        "Test Name": test_name,
                        "DateTime": datatime,
                        "Subtest": subtest,
                        "Serial#": serial,
                        "Measure": measurement,


            elif lim2_match:

                for num in range(len(lim2_match)):
                    if num < len(category_matches):
                        category_match = category_matches[num]
                        # Continue processing
                        # Handle mismatch, e.g., skip or log an error
                        print(f"Warning: No matching category for index {num} in block.")
                        continue  # Skip the current iteration
                    typeValue = a[num]
                    subtest = category_match[3] if category_match[3] else "None"
                    pass_fail_code = category_match[1]
                    measurement = float(category_match[2])

                    pass_fail = "Pass" if pass_fail_code == "0" else "Fail"

                    upper_limit = lower_limit = nominal_value = np.nan

                    upper_limit, lower_limit = map(float, lim2_match[num])
                    tol_plus = (upper_limit - nominal_value) / nominal_value * 100 if not np.isnan(
                        nominal_value) else np.nan
                    tol_minus = (lower_limit - nominal_value) / nominal_value * 100 if not np.isnan(
                        nominal_value) else np.nan

                    # Skip logic: Skip if typeValue is "A-JUM" and measurement is negative
                    if typeValue == "A-JUM" and measurement < 0:
                        skipped_count += 1  # Increment the skip counter
                        continue  # Skip this iteration

                        "Test Name": test_name,
                        "DateTime": datatime,
                        "Subtest": subtest,
                        "Serial#": serial,
                        "Measure": measurement,


    return pd.DataFrame(measurements), skipped_count

# Simple input and print example
def input_and_parse_data():
    # Take input from the user
    data = """



    # Parse the data using the existing function
    df, skipped = parse_data(data)

    # Redirect output to a text file
    output_file = "parsed_output.txt"
    with open(output_file, "w") as f:
        # Write parsed details to the file
        f.write("\nParsed Data:\n")
        f.write(df.to_string(index=False))  # Ensure readable DataFrame output
        f.write(f"\nSkipped Count: {skipped}\n")

    print(f"Output written to {output_file}")

# Call the function to input and parse data
Editor is loading...
Leave a Comment