Untitled
unknown
plain_text
7 months ago
3.8 kB
1
Indexable
Never
import pandas as pd import tkinter as tk from tkinter import filedialog, messagebox from openpyxl import Workbook from openpyxl.styles import PatternFill def browse_files(): file_paths = filedialog.askopenfilenames(filetypes=[("Excel Files", "*.xlsx")]) if len(file_paths) != 2: messagebox.showerror("Error", "Please select exactly two Excel files.") return process_files(file_paths) def find_unique_values(row): try: set1 = sorted(set(str(row['Column1']).split(', '))) except KeyError: set1 = [] # Handle the case where 'Column1' is missing except Exception as e: print(f"Error occurred with 'Column1': {e}") set1 = [] # Handle other exceptions try: set2 = sorted(set(str(row['Column2']).split(', '))) except KeyError: set2 = [] # Handle the case where 'Column2' is missing except Exception as e: print(f"Error occurred with 'Column2': {e}") set2 = [] # Handle other exceptions unique_to_col1 = ', '.join(set1) unique_to_col2 = ', '.join(set2) return pd.Series([unique_to_col1, unique_to_col2], index=['Column3', 'Column4']) def process_files(file_paths): try: dfs = [pd.read_excel(file_path) for file_path in file_paths] unique_identifier = ['Requirement(s)', 'Specification(s)'] # Compare files and find discrepancies merged_df = pd.merge(*dfs, on=unique_identifier, how='outer', suffixes=('_file1', '_file2')) discrepancy_df = merged_df[merged_df.apply(lambda x: not x[::2].equals(x[1::2]), axis=1)] # Highlight differing cells with pd.ExcelWriter('discrepancies.xlsx', engine='openpyxl') as writer: discrepancy_df.to_excel(writer, sheet_name='Discrepancies', index=False) # Highlight differing cells wb = writer.book ws = wb['Discrepancies'] red_fill = PatternFill(start_color='FFFF0000', end_color='FFFF0000', fill_type='solid') for row in ws.iter_rows(min_row=2, max_row=ws.max_row, min_col=1, max_col=ws.max_column): for cell in row: if cell.column_letter.endswith('_file1') and cell.value != cell.offset(column=1).value: cell.fill = red_fill elif cell.column_letter.endswith('_file2') and cell.value != cell.offset(column=-1).value: cell.fill = red_fill # Read data from input Excel file df = pd.read_excel(file_paths[0]) # Apply find_unique_values function to each row df[['Column3', 'Column4']] = df.apply(find_unique_values, axis=1) # Create Column5 by combining unique values from both columns def combine_unique_values(row): set1 = sorted(set(str(row['Column1']).split(', '))) set2 = sorted(set(str(row['Column2']).split(', '))) unique_values = sorted(set1.union(set2)) return ', '.join(unique_values) df['Column5'] = df.apply(combine_unique_values, axis=1) # Reorder columns df = df[['Column1', 'Column2', 'Column3', 'Column4', 'Column5']] # Save output to Excel file df.to_excel("output_file.xlsx", index=False) messagebox.showinfo("Success", "Output saved to 'output_file.xlsx'. Discrepancies exported to 'discrepancies.xlsx'.") except Exception as e: messagebox.showerror("Error", str(e)) # Create the main application window app = tk.Tk() app.title("Excel Discrepancy Checker") # Create a button to browse files browse_button = tk.Button(app, text="Browse Files", command=browse_files) browse_button.pack(padx=20, pady=10) # Run the Tkinter event loop app.mainloop()
Leave a Comment