Untitled
unknown
plain_text
9 months ago
7.0 kB
5
Indexable
import pandas as pd
import openpyxl
import glob
import os
from excel_formatter import apply_formatting
input_folder = "workdir"
output_file = "combined_output.xlsx"
def process_excel_files():
file_pattern = "*.xlsx"
file_paths = glob.glob(os.path.join(input_folder, file_pattern))
rows_data_exported_device = []
rows_import_config = []
systemnetwork_data = []
systemnetwork_data_added = False
io_data = []
io_data_added = False
# List to collect all Advanced DataFrames for merging
df_list_advanced = []
for file_path in file_paths:
wb = openpyxl.load_workbook(file_path, data_only=True)
# 1. Process "Exported device info" sheet
sheet_exported_device = wb["Exported device info"]
device_name = sheet_exported_device["B4"].value
row_dict_exported_device = {
"Device name": device_name,
"Model Name": sheet_exported_device["B1"].value,
"Serial Number": sheet_exported_device["B2"].value,
"Slot Name": sheet_exported_device["B3"].value
}
rows_data_exported_device.append(row_dict_exported_device)
# 2. Process "Import config" sheet
sheet_import_config = wb["Import config"]
row_dict_import_config = {
"Device name": device_name,
"Category": sheet_import_config["B1"].value,
"Advanced": sheet_import_config["B2"].value,
"System, Network": sheet_import_config["B3"].value,
"I/O": sheet_import_config["B4"].value
}
rows_import_config.append(row_dict_import_config)
# 3. Process "Advanced" sheet with new merging logic
df_adv = pd.read_excel(file_path, sheet_name="Advanced")
# Forward fill Category, SubCategory, item
for col in ["Category", "SubCategory", "item"]:
df_adv[col] = df_adv[col].ffill()
# Drop rows where all of Category, SubCategory, item are NaN
df_adv.dropna(how='all', subset=["Category", "SubCategory", "item"], inplace=True)
# Rename 'value' column to include device name
if 'value' in df_adv.columns:
df_adv.rename(columns={'value': f'value_{device_name}'}, inplace=True)
df_list_advanced.append(df_adv)
# 4. Process "System,Network" sheet (existing logic)
df_sn = pd.read_excel(file_path, sheet_name="System,Network", header=None, index_col=None)
sn_cols = df_sn.iloc[:, :3].T
sn_vals = df_sn.iloc[:, 3:4].T
if not systemnetwork_data_added:
sn_cols.insert(0, "Device name", ["Device name", "", ""])
systemnetwork_data.append(sn_cols)
systemnetwork_data_added = True
sn_vals.insert(0, "Device name", device_name)
systemnetwork_data.append(sn_vals)
# 5. Process "IO" sheet (existing logic)
df_io = pd.read_excel(file_path, sheet_name="IO", header=None, index_col=None)
io_cols = df_io.iloc[:, :3].T
io_vals = df_io.iloc[:, 3:4].T
if not io_data_added:
io_cols.insert(0, "Device name", ["Device name", "", ""])
io_data.append(io_cols)
io_data_added = True
io_vals.insert(0, "Device name", device_name)
io_data.append(io_vals)
# Create final DataFrames for the first two sheets
df_exported_device = pd.DataFrame(
rows_data_exported_device,
columns=["Device name", "Model Name", "Serial Number", "Slot Name"]
)
df_import_config = pd.DataFrame(
rows_import_config,
columns=["Device name", "Category", "Advanced", "System, Network", "I/O"]
)
# Merge Advanced sheets
merged_advanced = pd.DataFrame()
if df_list_advanced:
if len(df_list_advanced) == 1:
merged_advanced = df_list_advanced[0]
else:
# Find the largest DataFrame
largest_idx = max(range(len(df_list_advanced)), key=lambda i: len(df_list_advanced[i]))
largest_df = df_list_advanced.pop(largest_idx)
merged_advanced = largest_df.copy()
for df in df_list_advanced:
merged_advanced = pd.merge(
merged_advanced,
df,
on=["Category", "SubCategory", "item"],
how="outer"
)
# Generate sorting keys based on largest_df's structure
cat_seq = []
subcat_seq = {}
item_seq = {}
for row in largest_df.itertuples(index=False):
c = row.Category
s = row.SubCategory
i = row.item
if c not in cat_seq:
cat_seq.append(c)
subcat_seq[c] = []
if s not in subcat_seq[c]:
subcat_seq[c].append(s)
if (c, s) not in item_seq:
item_seq[(c, s)] = []
if i not in item_seq[(c, s)]:
item_seq[(c, s)].append(i)
def get_sort_key(row):
c = row['Category']
s = row['SubCategory']
i = row['item']
cat_rank = cat_seq.index(c) if c in cat_seq else len(cat_seq)
sub_rank = subcat_seq[c].index(s) if c in subcat_seq and s in subcat_seq[c] else 999999
item_rank = item_seq.get((c, s), []).index(i) if (c, s) in item_seq and i in item_seq[(c, s)] else 999999
return (cat_rank, sub_rank, item_rank)
merged_advanced['__sort_key'] = merged_advanced.apply(get_sort_key, axis=1)
merged_advanced.sort_values('__sort_key', inplace=True)
merged_advanced.drop('__sort_key', axis=1, inplace=True)
# Fill NaN with empty string
merged_advanced.fillna('', inplace=True)
# Concatenate other sheets
combined_df_systemnetwork = pd.concat(systemnetwork_data, ignore_index=True)
combined_df_io = pd.concat(io_data, ignore_index=True)
# Write to Excel
try:
with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
df_import_config.to_excel(writer, sheet_name="Import config", index=False)
df_exported_device.to_excel(writer, sheet_name="Exported device info", index=False)
# Write merged Advanced sheet if not empty
if not merged_advanced.empty:
merged_advanced.to_excel(writer, sheet_name="Advanced", index=False)
# Existing sheets
combined_df_systemnetwork.to_excel(writer, sheet_name="System,Network", index=False, header=False)
combined_df_io.to_excel(writer, sheet_name="IO", index=False, header=False)
print("Data extracted and merged successfully!")
apply_formatting(output_file)
except PermissionError as e:
print(f"Error: {e}")
print("The file may be open in another program. Please close the file and try again.")Editor is loading...
Leave a Comment