Untitled

import pandas as pd
import tkinter as tk
from tkinter import filedialog, messagebox
from openpyxl import Workbook
from openpyxl.styles import PatternFill

def browse_files():
    file_paths = filedialog.askopenfilenames(filetypes=[("Excel Files", "*.xlsx")])
    if len(file_paths) != 2:
        messagebox.showerror("Error", "Please select exactly two Excel files.")
        return
    process_files(file_paths)

def find_unique_values(row):
    try:
        set1 = sorted(set(str(row['Column1']).split(', ')))
    except KeyError:
        set1 = []  # Handle the case where 'Column1' is missing
    except Exception as e:
        print(f"Error occurred with 'Column1': {e}")
        set1 = []  # Handle other exceptions
        
    try:
        set2 = sorted(set(str(row['Column2']).split(', ')))
    except KeyError:
        set2 = []  # Handle the case where 'Column2' is missing
    except Exception as e:
        print(f"Error occurred with 'Column2': {e}")
        set2 = []  # Handle other exceptions
    
    unique_to_col1 = ', '.join(set1)
    unique_to_col2 = ', '.join(set2)
    return pd.Series([unique_to_col1, unique_to_col2], index=['Column3', 'Column4'])

def process_files(file_paths):
    try:
        dfs = [pd.read_excel(file_path) for file_path in file_paths]
        unique_identifier = ['Requirement(s)', 'Specification(s)']  

        # Compare files and find discrepancies
        merged_df = pd.merge(*dfs, on=unique_identifier, how='outer', suffixes=('_file1', '_file2'))
        discrepancy_df = merged_df[merged_df.apply(lambda x: not x[::2].equals(x[1::2]), axis=1)]

        # Highlight differing cells
        with pd.ExcelWriter('discrepancies.xlsx', engine='openpyxl') as writer:
            discrepancy_df.to_excel(writer, sheet_name='Discrepancies', index=False)

            # Highlight differing cells
            wb = writer.book
            ws = wb['Discrepancies']
            red_fill = PatternFill(start_color='FFFF0000', end_color='FFFF0000', fill_type='solid')

            for row in ws.iter_rows(min_row=2, max_row=ws.max_row, min_col=1, max_col=ws.max_column):
                for cell in row:
                    if cell.column_letter.endswith('_file1') and cell.value != cell.offset(column=1).value:
                        cell.fill = red_fill
                    elif cell.column_letter.endswith('_file2') and cell.value != cell.offset(column=-1).value:
                        cell.fill = red_fill

        # Read data from input Excel file
        df = pd.read_excel(file_paths[0])

        # Apply find_unique_values function to each row
        df[['Column3', 'Column4']] = df.apply(find_unique_values, axis=1)

        # Create Column5 by combining unique values from both columns
        def combine_unique_values(row):
            set1 = sorted(set(str(row['Column1']).split(', ')))
            set2 = sorted(set(str(row['Column2']).split(', ')))
            unique_values = sorted(set1.union(set2))
            return ', '.join(unique_values)

        df['Column5'] = df.apply(combine_unique_values, axis=1)

        # Reorder columns
        df = df[['Column1', 'Column2', 'Column3', 'Column4', 'Column5']]

        # Save output to Excel file
        df.to_excel("output_file.xlsx", index=False)

        messagebox.showinfo("Success", "Output saved to 'output_file.xlsx'. Discrepancies exported to 'discrepancies.xlsx'.")
    except Exception as e:
        messagebox.showerror("Error", str(e))

# Create the main application window
app = tk.Tk()
app.title("Excel Discrepancy Checker")

# Create a button to browse files
browse_button = tk.Button(app, text="Browse Files", command=browse_files)
browse_button.pack(padx=20, pady=10)

# Run the Tkinter event loop
app.mainloop()
Editor is loading...