Untitled

 avatar
unknown
plain_text
6 days ago
7.5 kB
3
Indexable
import pandas as pd
import openpyxl
import glob
import os
from excel_formatter import apply_formatting

input_folder = "workdir"  
output_file = "combined_output.xlsx"

def process_excel_files():
    file_pattern = "*.xlsx"
    file_paths = glob.glob(os.path.join(input_folder, file_pattern))

    rows_data_exported_device = []
    rows_import_config = []

    systemnetwork_data = []
    systemnetwork_data_added = False

    io_data = []
    io_data_added = False

    # List to collect all Advanced DataFrames for merging
    df_list_advanced = []

    for file_path in file_paths:
        wb = openpyxl.load_workbook(file_path, data_only=True)

        # 1. Process "Exported device info" sheet
        sheet_exported_device = wb["Exported device info"]
        device_name = sheet_exported_device["B4"].value
        row_dict_exported_device = {
            "Device name": device_name,
            "Model Name": sheet_exported_device["B1"].value,
            "Serial Number": sheet_exported_device["B2"].value,
            "Slot Name": sheet_exported_device["B3"].value
        }   
        rows_data_exported_device.append(row_dict_exported_device)

        # 2. Process "Import config" sheet
        sheet_import_config = wb["Import config"]
        row_dict_import_config = {
            "Device name": device_name,
            "Category": sheet_import_config["B1"].value,
            "Advanced": sheet_import_config["B2"].value,
            "System, Network": sheet_import_config["B3"].value,
            "I/O": sheet_import_config["B4"].value
        }
        rows_import_config.append(row_dict_import_config)

        # 3. Process "Advanced" sheet with new merging logic
        df_adv = pd.read_excel(file_path, sheet_name="Advanced")
        
        # Forward fill Category, SubCategory, item
        for col in ["Category", "SubCategory", "item"]:
            df_adv[col] = df_adv[col].ffill()
        
        # Drop rows where all of Category, SubCategory, item are NaN
        df_adv.dropna(how='all', subset=["Category", "SubCategory", "item"], inplace=True)
        
        # Rename 'value' column to include FILENAME (not device name) for uniqueness
        file_stub = os.path.splitext(os.path.basename(file_path))[0]  # Get filename without extension
        if 'value' in df_adv.columns:
            df_adv.rename(columns={'value': f'value_{file_stub}'}, inplace=True)
        
        df_list_advanced.append(df_adv)

        # 4. Process "System,Network" sheet (existing logic)
        df_sn = pd.read_excel(file_path, sheet_name="System,Network", header=None, index_col=None)
        sn_cols = df_sn.iloc[:, :3].T
        sn_vals = df_sn.iloc[:, 3:4].T

        if not systemnetwork_data_added:
            sn_cols.insert(0, "Device name", ["Device name", "", ""])
            systemnetwork_data.append(sn_cols)
            systemnetwork_data_added = True

        sn_vals.insert(0, "Device name", device_name)
        systemnetwork_data.append(sn_vals)

        # 5. Process "IO" sheet (existing logic)
        df_io = pd.read_excel(file_path, sheet_name="IO", header=None, index_col=None)
        io_cols = df_io.iloc[:, :3].T
        io_vals = df_io.iloc[:, 3:4].T

        if not io_data_added:
            io_cols.insert(0, "Device name", ["Device name", "", ""])
            io_data.append(io_cols)
            io_data_added = True

        io_vals.insert(0, "Device name", device_name)
        io_data.append(io_vals)

    # Create final DataFrames for the first two sheets
    df_exported_device = pd.DataFrame(
        rows_data_exported_device, 
        columns=["Device name", "Model Name", "Serial Number", "Slot Name"]
    )

    df_import_config = pd.DataFrame(
        rows_import_config, 
        columns=["Device name", "Category", "Advanced", "System, Network", "I/O"]
    )

    # Merge Advanced sheets
    merged_advanced = pd.DataFrame()
    if df_list_advanced:
        if len(df_list_advanced) == 1:
            merged_advanced = df_list_advanced[0]
        else:
            # Find the largest DataFrame
            largest_idx = max(range(len(df_list_advanced)), key=lambda i: len(df_list_advanced[i]))
            largest_df = df_list_advanced.pop(largest_idx)
            
            merged_advanced = largest_df.copy()
            for df in df_list_advanced:
                # Merge without allowing suffixes (ensure columns are uniquely named)
                merged_advanced = pd.merge(
                    merged_advanced,
                    df,
                    on=["Category", "SubCategory", "item"],
                    how="outer",
                    suffixes=('', '_DROP')  # Explicitly handle suffixes
                )
                # Drop columns ending with '_DROP'
                merged_advanced = merged_advanced.loc[:, ~merged_advanced.columns.str.endswith('_DROP')]
        
            # Generate sorting keys based on largest_df's structure
            cat_seq = []
            subcat_seq = {}
            item_seq = {}

            for row in largest_df.itertuples(index=False):
                c = row.Category
                s = row.SubCategory
                i = row.item
                if c not in cat_seq:
                    cat_seq.append(c)
                    subcat_seq[c] = []
                if s not in subcat_seq[c]:
                    subcat_seq[c].append(s)
                if (c, s) not in item_seq:
                    item_seq[(c, s)] = []
                if i not in item_seq[(c, s)]:
                    item_seq[(c, s)].append(i)

            def get_sort_key(row):
                c = row['Category']
                s = row['SubCategory']
                i = row['item']
                cat_rank = cat_seq.index(c) if c in cat_seq else len(cat_seq)
                sub_rank = subcat_seq[c].index(s) if c in subcat_seq and s in subcat_seq[c] else 999999
                item_rank = item_seq.get((c, s), []).index(i) if (c, s) in item_seq and i in item_seq[(c, s)] else 999999
                return (cat_rank, sub_rank, item_rank)

            if not merged_advanced.empty:
                merged_advanced['__sort_key'] = merged_advanced.apply(get_sort_key, axis=1)
                merged_advanced.sort_values('__sort_key', inplace=True)
                merged_advanced.drop('__sort_key', axis=1, inplace=True)
        
        # Fill NaN with empty string
        merged_advanced.fillna('', inplace=True)

    # Concatenate other sheets
    combined_df_systemnetwork = pd.concat(systemnetwork_data, ignore_index=True)
    combined_df_io = pd.concat(io_data, ignore_index=True)

    # Write to Excel
    try:
        with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
            df_import_config.to_excel(writer, sheet_name="Import config", index=False)
            df_exported_device.to_excel(writer, sheet_name="Exported device info", index=False)

            # Write merged Advanced sheet if not empty
            if not merged_advanced.empty:
                merged_advanced.to_excel(writer, sheet_name="Advanced", index=False)

            # Existing sheets
            combined_df_systemnetwork.to_excel(writer, sheet_name="System,Network", index=False, header=False)
            combined_df_io.to_excel(writer, sheet_name="IO", index=False, header=False)

        print("Data extracted and merged successfully!")

        apply_formatting(output_file)
    except PermissionError as e:
        print(f"Error: {e}")
        print("The file may be open in another program. Please close the file and try again.")
Leave a Comment