Untitled
import pandas as pd import openpyxl import glob import os from excel_formatter import apply_formatting input_folder = "workdir" output_file = "combined_output.xlsx" def process_excel_files(): file_pattern = "*.xlsx" file_paths = glob.glob(os.path.join(input_folder, file_pattern)) rows_data_exported_device = [] rows_import_config = [] systemnetwork_data = [] systemnetwork_data_added = False io_data = [] io_data_added = False # List to collect all Advanced DataFrames for merging df_list_advanced = [] for file_path in file_paths: wb = openpyxl.load_workbook(file_path, data_only=True) # 1. Process "Exported device info" sheet sheet_exported_device = wb["Exported device info"] device_name = sheet_exported_device["B4"].value row_dict_exported_device = { "Device name": device_name, "Model Name": sheet_exported_device["B1"].value, "Serial Number": sheet_exported_device["B2"].value, "Slot Name": sheet_exported_device["B3"].value } rows_data_exported_device.append(row_dict_exported_device) # 2. Process "Import config" sheet sheet_import_config = wb["Import config"] row_dict_import_config = { "Device name": device_name, "Category": sheet_import_config["B1"].value, "Advanced": sheet_import_config["B2"].value, "System, Network": sheet_import_config["B3"].value, "I/O": sheet_import_config["B4"].value } rows_import_config.append(row_dict_import_config) # 3. Process "Advanced" sheet with new merging logic df_adv = pd.read_excel(file_path, sheet_name="Advanced") # Forward fill Category, SubCategory, item for col in ["Category", "SubCategory", "item"]: df_adv[col] = df_adv[col].ffill() # Drop rows where all of Category, SubCategory, item are NaN df_adv.dropna(how='all', subset=["Category", "SubCategory", "item"], inplace=True) # Rename 'value' column to include FILENAME (not device name) for uniqueness file_stub = os.path.splitext(os.path.basename(file_path))[0] # Get filename without extension if 'value' in df_adv.columns: df_adv.rename(columns={'value': f'value_{file_stub}'}, inplace=True) df_list_advanced.append(df_adv) # 4. Process "System,Network" sheet (existing logic) df_sn = pd.read_excel(file_path, sheet_name="System,Network", header=None, index_col=None) sn_cols = df_sn.iloc[:, :3].T sn_vals = df_sn.iloc[:, 3:4].T if not systemnetwork_data_added: sn_cols.insert(0, "Device name", ["Device name", "", ""]) systemnetwork_data.append(sn_cols) systemnetwork_data_added = True sn_vals.insert(0, "Device name", device_name) systemnetwork_data.append(sn_vals) # 5. Process "IO" sheet (existing logic) df_io = pd.read_excel(file_path, sheet_name="IO", header=None, index_col=None) io_cols = df_io.iloc[:, :3].T io_vals = df_io.iloc[:, 3:4].T if not io_data_added: io_cols.insert(0, "Device name", ["Device name", "", ""]) io_data.append(io_cols) io_data_added = True io_vals.insert(0, "Device name", device_name) io_data.append(io_vals) # Create final DataFrames for the first two sheets df_exported_device = pd.DataFrame( rows_data_exported_device, columns=["Device name", "Model Name", "Serial Number", "Slot Name"] ) df_import_config = pd.DataFrame( rows_import_config, columns=["Device name", "Category", "Advanced", "System, Network", "I/O"] ) # Merge Advanced sheets merged_advanced = pd.DataFrame() if df_list_advanced: if len(df_list_advanced) == 1: merged_advanced = df_list_advanced[0] else: # Find the largest DataFrame largest_idx = max(range(len(df_list_advanced)), key=lambda i: len(df_list_advanced[i])) largest_df = df_list_advanced.pop(largest_idx) merged_advanced = largest_df.copy() for df in df_list_advanced: # Merge without allowing suffixes (ensure columns are uniquely named) merged_advanced = pd.merge( merged_advanced, df, on=["Category", "SubCategory", "item"], how="outer", suffixes=('', '_DROP') # Explicitly handle suffixes ) # Drop columns ending with '_DROP' merged_advanced = merged_advanced.loc[:, ~merged_advanced.columns.str.endswith('_DROP')] # Generate sorting keys based on largest_df's structure cat_seq = [] subcat_seq = {} item_seq = {} for row in largest_df.itertuples(index=False): c = row.Category s = row.SubCategory i = row.item if c not in cat_seq: cat_seq.append(c) subcat_seq[c] = [] if s not in subcat_seq[c]: subcat_seq[c].append(s) if (c, s) not in item_seq: item_seq[(c, s)] = [] if i not in item_seq[(c, s)]: item_seq[(c, s)].append(i) def get_sort_key(row): c = row['Category'] s = row['SubCategory'] i = row['item'] cat_rank = cat_seq.index(c) if c in cat_seq else len(cat_seq) sub_rank = subcat_seq[c].index(s) if c in subcat_seq and s in subcat_seq[c] else 999999 item_rank = item_seq.get((c, s), []).index(i) if (c, s) in item_seq and i in item_seq[(c, s)] else 999999 return (cat_rank, sub_rank, item_rank) if not merged_advanced.empty: merged_advanced['__sort_key'] = merged_advanced.apply(get_sort_key, axis=1) merged_advanced.sort_values('__sort_key', inplace=True) merged_advanced.drop('__sort_key', axis=1, inplace=True) # Fill NaN with empty string merged_advanced.fillna('', inplace=True) # Concatenate other sheets combined_df_systemnetwork = pd.concat(systemnetwork_data, ignore_index=True) combined_df_io = pd.concat(io_data, ignore_index=True) # Write to Excel try: with pd.ExcelWriter(output_file, engine="openpyxl") as writer: df_import_config.to_excel(writer, sheet_name="Import config", index=False) df_exported_device.to_excel(writer, sheet_name="Exported device info", index=False) # Write merged Advanced sheet if not empty if not merged_advanced.empty: merged_advanced.to_excel(writer, sheet_name="Advanced", index=False) # Existing sheets combined_df_systemnetwork.to_excel(writer, sheet_name="System,Network", index=False, header=False) combined_df_io.to_excel(writer, sheet_name="IO", index=False, header=False) print("Data extracted and merged successfully!") apply_formatting(output_file) except PermissionError as e: print(f"Error: {e}") print("The file may be open in another program. Please close the file and try again.")
Leave a Comment