Untitled
unknown
plain_text
a month ago
18 kB
1
Indexable
Never
import logging import os import sys import datetime import openpyxl from concurrent.futures import ThreadPoolExecutor, as_completed import xlwings as xw from openpyxl.drawing.image import Image from openpyxl.cell.rich_text import CellRichText from Checksum import Checksum class ExcelComparator: def __init__(self, test_env_file, base_line_file): self.test_env_file = os.path.abspath(test_env_file) self.base_line_file = os.path.abspath(base_line_file) # Always enable editing and save the files first self.preprocess_files(self.test_env_file, self.base_line_file) # Check if the files are different using their checksums if Checksum.md5(self.test_env_file) != Checksum.md5(self.base_line_file): try: self.wb_new = openpyxl.load_workbook(self.test_env_file, data_only=False, rich_text=True) self.wb_old = openpyxl.load_workbook(self.base_line_file, data_only=False, rich_text=True) except Exception as e: print(f"Error loading workbooks: {e}", file=sys.stderr) sys.exit(1) else: self.wb_new = None self.wb_old = None @staticmethod def serialize_value(value): if isinstance(value, datetime.datetime): return value.isoformat() return str(value) @staticmethod def get_cell_details(cell): cell_details = { 'value': ExcelComparator.serialize_value(cell.value), 'data_type': str(cell.data_type), 'font': cell.font.__dict__, 'border': cell.border.__dict__, 'fill': cell.fill.__dict__, 'number_format': str(cell.number_format), 'alignment': cell.alignment.__dict__, 'protection': cell.protection.__dict__, 'hyperlink': str(cell.hyperlink), 'comment': str(cell.comment), 'rich_text': ExcelComparator.serialize_value(cell.value) if isinstance(cell.value, CellRichText) else None, 'formula': ExcelComparator.serialize_value(cell.value) if cell.data_type == 'f' else None } return cell_details @staticmethod def get_image_details(sheet): images = [] for drawing in sheet._images: if isinstance(drawing, Image): try: width = getattr(drawing, 'width', None) height = getattr(drawing, 'height', None) anchor = getattr(drawing, 'anchor', None) # If anchor is a complex object with _from attribute if hasattr(anchor, '_from'): position = (anchor._from.col, anchor._from.row) else: position = anchor # Assuming anchor is a simple attribute like 'A1' image_details = { 'name': drawing.path, 'size': (width, height), 'position': position } images.append(image_details) except Exception as e: print(f"Error processing image: {e}") return images @staticmethod def compare_images(images_old, images_new, sheet_name): mismatches = [] old_image_names_positions = {(img['name'], img['position']) for img in images_old} new_image_names_positions = {(img['name'], img['position']) for img in images_new} added_images = new_image_names_positions - old_image_names_positions removed_images = old_image_names_positions - new_image_names_positions for img_name, img_position in added_images: img_new = next(img for img in images_new if img['name'] == img_name and img['position'] == img_position) mismatches.append({ 'sheet_name': sheet_name, 'cell_reference': f"Image at {img_new['position']}", 'difference_type': 'Image Added', 'base_line_value': 'N/A', 'test_env_value': img_new['name'], 'details': f"New size: {img_new['size']}\nPosition: {img_new['position']}" }) for img_name, img_position in removed_images: img_old = next(img for img in images_old if img['name'] == img_name and img['position'] == img_position) mismatches.append({ 'sheet_name': sheet_name, 'cell_reference': f"Image at {img_old['position']}", 'difference_type': 'Image Removed', 'base_line_value': img_old['name'], 'test_env_value': 'N/A', 'details': f"Old size: {img_old['size']}\nPosition: {img_old['position']}" }) for img_name, img_position in old_image_names_positions & new_image_names_positions: img_old = next(img for img in images_old if img['name'] == img_name and img['position'] == img_position) img_new = next(img for img in images_new if img['name'] == img_name and img['position'] == img_position) if img_old['position'] != img_new['position']: mismatches.append({ 'sheet_name': sheet_name, 'cell_reference': f"Image at {img_old['position']} to {img_new['position']}", 'difference_type': 'Image Position Changed', 'base_line_value': img_old['position'], 'test_env_value': img_new['position'], 'details': f"Image: {img_old['name']}\nOld position: {img_old['position']}\nNew position: {img_new['position']}" }) if img_old['size'] != img_new['size']: mismatches.append({ 'sheet_name': sheet_name, 'cell_reference': f"Image at {img_old['position']}", 'difference_type': 'Image Size Changed', 'base_line_value': img_old['size'], 'test_env_value': img_new['size'], 'details': f"Image: {img_old['name']}\nOld size: {img_old['size']}\nNew size: {img_new['size']}" }) return mismatches @staticmethod def compare_rich_text_objects(old_rich_text, new_rich_text): def extract_text(rich_text): if hasattr(rich_text, 'text'): return rich_text.text elif isinstance(rich_text, str): return rich_text return "" old_text = extract_text(old_rich_text) new_text = extract_text(new_rich_text) return old_text != new_text @staticmethod def extract_and_compare_formats(old_format, new_format): def compare_dicts(dict1, dict2): differences = [] for key in set(dict1.keys()).union(set(dict2.keys())): value1 = dict1.get(key) value2 = dict2.get(key) if value1 != value2: differences.append((key, 'diff')) return differences differences = compare_dicts(old_format, new_format) return differences @staticmethod def compare_values(old_value, new_value): if isinstance(old_value, datetime.datetime) and isinstance(new_value, datetime.datetime): return old_value != new_value return str(old_value) != str(new_value) @staticmethod def check_content(sheet_name, ws_new, ws_old): output_list = [] max_row = max(ws_new.max_row, ws_old.max_row) max_column = max(ws_new.max_column, ws_old.max_column) for row in range(1, max_row + 1): for col in range(1, max_column + 1): cell_new = ws_new.cell(row=row, column=col) cell_old = ws_old.cell(row=row, column=col) if ExcelComparator.compare_values(cell_old.value, cell_new.value): output_list.append({ 'sheet_name': sheet_name, 'cell_reference': cell_old.coordinate, 'difference_type': 'Value Mismatch', 'base_line_value': str(cell_old.value), 'test_env_value': str(cell_new.value), 'details': 'Cell value mismatch' }) if cell_old.data_type == 'f' or cell_new.data_type == 'f': if cell_old.value != cell_new.value: output_list.append({ 'sheet_name': sheet_name, 'cell_reference': cell_old.coordinate, 'difference_type': 'Formula Mismatch', 'base_line_value': str(cell_old.value), 'test_env_value': str(cell_new.value), 'details': 'Formula mismatch' }) return output_list @staticmethod def check_format(sheet_name, ws_new, ws_old): output_list = [] max_row = max(ws_new.max_row, ws_old.max_row) max_column = max(ws_new.max_column, ws_old.max_column) for row in range(1, max_row + 1): for col in range(1, max_column + 1): cell_new = ws_new.cell(row=row, column=col) cell_old = ws_old.cell(row=row, column=col) old_details = ExcelComparator.get_cell_details(cell_old) new_details = ExcelComparator.get_cell_details(cell_new) # Exclude value and formula mismatches from format checks if ExcelComparator.compare_values(cell_old.value, cell_new.value) or \ (cell_old.data_type == 'f' or cell_new.data_type == 'f') and cell_old.value != cell_new.value: continue mismatches = { 'rich_text': ExcelComparator.compare_rich_text_objects(old_details['rich_text'], new_details['rich_text']) or isinstance(old_details['rich_text'], CellRichText) != isinstance( new_details['rich_text'], CellRichText), 'font': old_details['font'] != new_details['font'], 'border': old_details['border'] != new_details['border'], 'fill': old_details['fill'] != new_details['fill'], 'number_format': old_details['number_format'] != new_details['number_format'], 'alignment': old_details['alignment'] != new_details['alignment'] } if any(mismatches.values()): details = "\n".join( [f"{key.replace('_', ' ').title()} differences" for key, mismatch in mismatches.items() if mismatch]) output_list.append({ 'sheet_name': sheet_name, 'cell_reference': str(cell_old.coordinate), 'difference_type': 'Format Mismatch', 'base_line_value': str(old_details['value']), 'test_env_value': str(new_details['value']), 'details': details }) images_old = ExcelComparator.get_image_details(ws_old) images_new = ExcelComparator.get_image_details(ws_new) image_mismatches = ExcelComparator.compare_images(images_old, images_new, sheet_name) output_list.extend(image_mismatches) return output_list @staticmethod def check_sheet_order_and_missing(wb_new, wb_old): output_list = [] sheet_order_mismatch = { "status": True, "details": "" } new_sheets = wb_new.sheetnames old_sheets = wb_old.sheetnames common_sheets = set(new_sheets).intersection(set(old_sheets)) missing_in_new = set(old_sheets) - set(new_sheets) missing_in_old = set(new_sheets) - set(old_sheets) if missing_in_new: output_list.append({ 'sheet_name': 'N/A', 'cell_reference': 'N/A', 'difference_type': 'Missing Sheets in New File', 'base_line_value': ', '.join(missing_in_new), 'test_env_value': 'N/A', 'details': 'Sheets are present in old file but missing in new file.' }) if missing_in_old: output_list.append({ 'sheet_name': 'N/A', 'cell_reference': 'N/A', 'difference_type': 'Missing Sheets in Old File', 'base_line_value': 'N/A', 'test_env_value': ', '.join(missing_in_old), 'details': 'Sheets are present in new file but missing in old file.' }) common_sheets_list = [sheet for sheet in old_sheets if sheet in common_sheets] new_sheets_list = [sheet for sheet in new_sheets if sheet in common_sheets] if common_sheets_list != new_sheets_list: sheet_order_mismatch["status"] = False mismatch_details = [] min_length = min(len(common_sheets_list), len(new_sheets_list)) for i in range(min_length): if common_sheets_list[i] != new_sheets_list[i]: mismatch_details.append(f"Position {i + 1}: {common_sheets_list[i]} -> {new_sheets_list[i]}") if len(new_sheets_list) > min_length: mismatch_details.append(f"Additional sheets in new file: {new_sheets_list[min_length:]}") elif len(common_sheets_list) > min_length: mismatch_details.append(f"Additional sheets in old file: {common_sheets_list[min_length:]}") sheet_order_mismatch["details"] = "\n".join(mismatch_details) output_list.append({ 'sheet_name': 'N/A', 'cell_reference': 'N/A', 'difference_type': 'Sheet Order Mismatch', 'base_line_value': str(common_sheets_list), 'test_env_value': str(new_sheets_list), 'details': "\n".join(mismatch_details) }) return output_list, sheet_order_mismatch @staticmethod def enable_editing_and_save(file_path): try: app = xw.App(visible=False) workbook = app.books.open(file_path) workbook.save() workbook.close() app.quit() logging.info(f"Enabled editing and saved the file {file_path}") except Exception as e: logging.error(f"Failed to enable editing and save the file {file_path}: {e}") sys.exit(1) def preprocess_files(self, *file_paths): """Enable editing and save all given file paths.""" for file_path in file_paths: abs_path = os.path.abspath(file_path) ExcelComparator.enable_editing_and_save(abs_path) def compare_files(self, compare_type): if self.wb_new is None or self.wb_old is None: return { "overall_status": True, "sheet_order_comparison": {"status": True, "details": ""} if compare_type == "content" else None, "sheet_results": {}, "sheet_mismatches": {} } # Check for sheet order and missing sheets sheet_order_mismatches, sheet_order_mismatch = self.check_sheet_order_and_missing(self.wb_new, self.wb_old) sheet_results = {} new_sheets = set(self.wb_new.sheetnames) old_sheets = set(self.wb_old.sheetnames) common_sheets = new_sheets.intersection(old_sheets) with ThreadPoolExecutor() as executor: futures = {} for sheet_name in common_sheets: if compare_type == "content": futures[sheet_name] = executor.submit(self.check_content, sheet_name, self.wb_new[sheet_name], self.wb_old[sheet_name]) elif compare_type == "format": futures[sheet_name] = executor.submit(self.check_format, sheet_name, self.wb_new[sheet_name], self.wb_old[sheet_name]) for sheet_name, future in futures.items(): result = future.result() if result: sheet_results[sheet_name] = { "status": False, "mismatches": result } else: sheet_results[sheet_name] = {"status": True} # Integrate sheet order and missing sheets comparison in content comparison if compare_type == "content": overall_status = all(sheet["status"] for sheet in sheet_results.values()) and sheet_order_mismatch["status"] sheet_results["sheet_order"] = {"status": sheet_order_mismatch["status"], "details": sheet_order_mismatch["details"]} combined_result = { "overall_status": overall_status, "sheet_order_comparison": sheet_order_mismatch, "sheet_results": sheet_results } else: combined_result = { "overall_status": all(sheet["status"] for sheet in sheet_results.values()), "sheet_results": sheet_results } return combined_result def do_content_compare(self): return self.compare_files("content") def do_format_compare(self): return self.compare_files("format")
Leave a Comment