Untitled

mail@pastecode.io avatar
unknown
plain_text
a month ago
18 kB
1
Indexable
Never
import logging
import os
import sys
import datetime

import openpyxl
from concurrent.futures import ThreadPoolExecutor, as_completed
import xlwings as xw
from openpyxl.drawing.image import Image
from openpyxl.cell.rich_text import CellRichText

from Checksum import Checksum


class ExcelComparator:
    def __init__(self, test_env_file, base_line_file):
        self.test_env_file = os.path.abspath(test_env_file)
        self.base_line_file = os.path.abspath(base_line_file)

        # Always enable editing and save the files first
        self.preprocess_files(self.test_env_file, self.base_line_file)

        # Check if the files are different using their checksums
        if Checksum.md5(self.test_env_file) != Checksum.md5(self.base_line_file):
            try:
                self.wb_new = openpyxl.load_workbook(self.test_env_file, data_only=False, rich_text=True)
                self.wb_old = openpyxl.load_workbook(self.base_line_file, data_only=False, rich_text=True)
            except Exception as e:
                print(f"Error loading workbooks: {e}", file=sys.stderr)
                sys.exit(1)
        else:
            self.wb_new = None
            self.wb_old = None

    @staticmethod
    def serialize_value(value):
        if isinstance(value, datetime.datetime):
            return value.isoformat()
        return str(value)

    @staticmethod
    def get_cell_details(cell):
        cell_details = {
            'value': ExcelComparator.serialize_value(cell.value),
            'data_type': str(cell.data_type),
            'font': cell.font.__dict__,
            'border': cell.border.__dict__,
            'fill': cell.fill.__dict__,
            'number_format': str(cell.number_format),
            'alignment': cell.alignment.__dict__,
            'protection': cell.protection.__dict__,
            'hyperlink': str(cell.hyperlink),
            'comment': str(cell.comment),
            'rich_text': ExcelComparator.serialize_value(cell.value) if isinstance(cell.value, CellRichText) else None,
            'formula': ExcelComparator.serialize_value(cell.value) if cell.data_type == 'f' else None
        }
        return cell_details

    @staticmethod
    def get_image_details(sheet):
        images = []
        for drawing in sheet._images:
            if isinstance(drawing, Image):
                try:
                    width = getattr(drawing, 'width', None)
                    height = getattr(drawing, 'height', None)
                    anchor = getattr(drawing, 'anchor', None)

                    # If anchor is a complex object with _from attribute
                    if hasattr(anchor, '_from'):
                        position = (anchor._from.col, anchor._from.row)
                    else:
                        position = anchor  # Assuming anchor is a simple attribute like 'A1'

                    image_details = {
                        'name': drawing.path,
                        'size': (width, height),
                        'position': position
                    }
                    images.append(image_details)
                except Exception as e:
                    print(f"Error processing image: {e}")
        return images

    @staticmethod
    def compare_images(images_old, images_new, sheet_name):
        mismatches = []

        old_image_names_positions = {(img['name'], img['position']) for img in images_old}
        new_image_names_positions = {(img['name'], img['position']) for img in images_new}

        added_images = new_image_names_positions - old_image_names_positions
        removed_images = old_image_names_positions - new_image_names_positions

        for img_name, img_position in added_images:
            img_new = next(img for img in images_new if img['name'] == img_name and img['position'] == img_position)
            mismatches.append({
                'sheet_name': sheet_name,
                'cell_reference': f"Image at {img_new['position']}",
                'difference_type': 'Image Added',
                'base_line_value': 'N/A',
                'test_env_value': img_new['name'],
                'details': f"New size: {img_new['size']}\nPosition: {img_new['position']}"
            })

        for img_name, img_position in removed_images:
            img_old = next(img for img in images_old if img['name'] == img_name and img['position'] == img_position)
            mismatches.append({
                'sheet_name': sheet_name,
                'cell_reference': f"Image at {img_old['position']}",
                'difference_type': 'Image Removed',
                'base_line_value': img_old['name'],
                'test_env_value': 'N/A',
                'details': f"Old size: {img_old['size']}\nPosition: {img_old['position']}"
            })

        for img_name, img_position in old_image_names_positions & new_image_names_positions:
            img_old = next(img for img in images_old if img['name'] == img_name and img['position'] == img_position)
            img_new = next(img for img in images_new if img['name'] == img_name and img['position'] == img_position)
            if img_old['position'] != img_new['position']:
                mismatches.append({
                    'sheet_name': sheet_name,
                    'cell_reference': f"Image at {img_old['position']} to {img_new['position']}",
                    'difference_type': 'Image Position Changed',
                    'base_line_value': img_old['position'],
                    'test_env_value': img_new['position'],
                    'details': f"Image: {img_old['name']}\nOld position: {img_old['position']}\nNew position: {img_new['position']}"
                })
            if img_old['size'] != img_new['size']:
                mismatches.append({
                    'sheet_name': sheet_name,
                    'cell_reference': f"Image at {img_old['position']}",
                    'difference_type': 'Image Size Changed',
                    'base_line_value': img_old['size'],
                    'test_env_value': img_new['size'],
                    'details': f"Image: {img_old['name']}\nOld size: {img_old['size']}\nNew size: {img_new['size']}"
                })

        return mismatches

    @staticmethod
    def compare_rich_text_objects(old_rich_text, new_rich_text):
        def extract_text(rich_text):
            if hasattr(rich_text, 'text'):
                return rich_text.text
            elif isinstance(rich_text, str):
                return rich_text
            return ""

        old_text = extract_text(old_rich_text)
        new_text = extract_text(new_rich_text)

        return old_text != new_text

    @staticmethod
    def extract_and_compare_formats(old_format, new_format):
        def compare_dicts(dict1, dict2):
            differences = []
            for key in set(dict1.keys()).union(set(dict2.keys())):
                value1 = dict1.get(key)
                value2 = dict2.get(key)
                if value1 != value2:
                    differences.append((key, 'diff'))
            return differences

        differences = compare_dicts(old_format, new_format)
        return differences

    @staticmethod
    def compare_values(old_value, new_value):
        if isinstance(old_value, datetime.datetime) and isinstance(new_value, datetime.datetime):
            return old_value != new_value
        return str(old_value) != str(new_value)

    @staticmethod
    def check_content(sheet_name, ws_new, ws_old):
        output_list = []

        max_row = max(ws_new.max_row, ws_old.max_row)
        max_column = max(ws_new.max_column, ws_old.max_column)

        for row in range(1, max_row + 1):
            for col in range(1, max_column + 1):
                cell_new = ws_new.cell(row=row, column=col)
                cell_old = ws_old.cell(row=row, column=col)

                if ExcelComparator.compare_values(cell_old.value, cell_new.value):
                    output_list.append({
                        'sheet_name': sheet_name,
                        'cell_reference': cell_old.coordinate,
                        'difference_type': 'Value Mismatch',
                        'base_line_value': str(cell_old.value),
                        'test_env_value': str(cell_new.value),
                        'details': 'Cell value mismatch'
                    })

                if cell_old.data_type == 'f' or cell_new.data_type == 'f':
                    if cell_old.value != cell_new.value:
                        output_list.append({
                            'sheet_name': sheet_name,
                            'cell_reference': cell_old.coordinate,
                            'difference_type': 'Formula Mismatch',
                            'base_line_value': str(cell_old.value),
                            'test_env_value': str(cell_new.value),
                            'details': 'Formula mismatch'
                        })

        return output_list

    @staticmethod
    def check_format(sheet_name, ws_new, ws_old):
        output_list = []

        max_row = max(ws_new.max_row, ws_old.max_row)
        max_column = max(ws_new.max_column, ws_old.max_column)

        for row in range(1, max_row + 1):
            for col in range(1, max_column + 1):
                cell_new = ws_new.cell(row=row, column=col)
                cell_old = ws_old.cell(row=row, column=col)

                old_details = ExcelComparator.get_cell_details(cell_old)
                new_details = ExcelComparator.get_cell_details(cell_new)

                # Exclude value and formula mismatches from format checks
                if ExcelComparator.compare_values(cell_old.value, cell_new.value) or \
                        (cell_old.data_type == 'f' or cell_new.data_type == 'f') and cell_old.value != cell_new.value:
                    continue

                mismatches = {
                    'rich_text': ExcelComparator.compare_rich_text_objects(old_details['rich_text'],
                                                                           new_details['rich_text']) or
                                 isinstance(old_details['rich_text'], CellRichText) != isinstance(
                        new_details['rich_text'], CellRichText),
                    'font': old_details['font'] != new_details['font'],
                    'border': old_details['border'] != new_details['border'],
                    'fill': old_details['fill'] != new_details['fill'],
                    'number_format': old_details['number_format'] != new_details['number_format'],
                    'alignment': old_details['alignment'] != new_details['alignment']
                }

                if any(mismatches.values()):
                    details = "\n".join(
                        [f"{key.replace('_', ' ').title()} differences" for key, mismatch in mismatches.items() if
                         mismatch])

                    output_list.append({
                        'sheet_name': sheet_name,
                        'cell_reference': str(cell_old.coordinate),
                        'difference_type': 'Format Mismatch',
                        'base_line_value': str(old_details['value']),
                        'test_env_value': str(new_details['value']),
                        'details': details
                    })

        images_old = ExcelComparator.get_image_details(ws_old)
        images_new = ExcelComparator.get_image_details(ws_new)

        image_mismatches = ExcelComparator.compare_images(images_old, images_new, sheet_name)
        output_list.extend(image_mismatches)

        return output_list

    @staticmethod
    def check_sheet_order_and_missing(wb_new, wb_old):
        output_list = []
        sheet_order_mismatch = {
            "status": True,
            "details": ""
        }

        new_sheets = wb_new.sheetnames
        old_sheets = wb_old.sheetnames

        common_sheets = set(new_sheets).intersection(set(old_sheets))
        missing_in_new = set(old_sheets) - set(new_sheets)
        missing_in_old = set(new_sheets) - set(old_sheets)

        if missing_in_new:
            output_list.append({
                'sheet_name': 'N/A',
                'cell_reference': 'N/A',
                'difference_type': 'Missing Sheets in New File',
                'base_line_value': ', '.join(missing_in_new),
                'test_env_value': 'N/A',
                'details': 'Sheets are present in old file but missing in new file.'
            })
        if missing_in_old:
            output_list.append({
                'sheet_name': 'N/A',
                'cell_reference': 'N/A',
                'difference_type': 'Missing Sheets in Old File',
                'base_line_value': 'N/A',
                'test_env_value': ', '.join(missing_in_old),
                'details': 'Sheets are present in new file but missing in old file.'
            })

        common_sheets_list = [sheet for sheet in old_sheets if sheet in common_sheets]
        new_sheets_list = [sheet for sheet in new_sheets if sheet in common_sheets]

        if common_sheets_list != new_sheets_list:
            sheet_order_mismatch["status"] = False
            mismatch_details = []
            min_length = min(len(common_sheets_list), len(new_sheets_list))

            for i in range(min_length):
                if common_sheets_list[i] != new_sheets_list[i]:
                    mismatch_details.append(f"Position {i + 1}: {common_sheets_list[i]} -> {new_sheets_list[i]}")

            if len(new_sheets_list) > min_length:
                mismatch_details.append(f"Additional sheets in new file: {new_sheets_list[min_length:]}")
            elif len(common_sheets_list) > min_length:
                mismatch_details.append(f"Additional sheets in old file: {common_sheets_list[min_length:]}")

            sheet_order_mismatch["details"] = "\n".join(mismatch_details)

            output_list.append({
                'sheet_name': 'N/A',
                'cell_reference': 'N/A',
                'difference_type': 'Sheet Order Mismatch',
                'base_line_value': str(common_sheets_list),
                'test_env_value': str(new_sheets_list),
                'details': "\n".join(mismatch_details)
            })

        return output_list, sheet_order_mismatch

    @staticmethod
    def enable_editing_and_save(file_path):
        try:
            app = xw.App(visible=False)
            workbook = app.books.open(file_path)
            workbook.save()
            workbook.close()
            app.quit()
            logging.info(f"Enabled editing and saved the file {file_path}")
        except Exception as e:
            logging.error(f"Failed to enable editing and save the file {file_path}: {e}")
            sys.exit(1)

    def preprocess_files(self, *file_paths):
        """Enable editing and save all given file paths."""
        for file_path in file_paths:
            abs_path = os.path.abspath(file_path)
            ExcelComparator.enable_editing_and_save(abs_path)

    def compare_files(self, compare_type):
        if self.wb_new is None or self.wb_old is None:
            return {
                "overall_status": True,
                "sheet_order_comparison": {"status": True, "details": ""} if compare_type == "content" else None,
                "sheet_results": {},
                "sheet_mismatches": {}
            }

        # Check for sheet order and missing sheets
        sheet_order_mismatches, sheet_order_mismatch = self.check_sheet_order_and_missing(self.wb_new, self.wb_old)

        sheet_results = {}
        new_sheets = set(self.wb_new.sheetnames)
        old_sheets = set(self.wb_old.sheetnames)
        common_sheets = new_sheets.intersection(old_sheets)

        with ThreadPoolExecutor() as executor:
            futures = {}
            for sheet_name in common_sheets:
                if compare_type == "content":
                    futures[sheet_name] = executor.submit(self.check_content, sheet_name, self.wb_new[sheet_name],
                                                          self.wb_old[sheet_name])
                elif compare_type == "format":
                    futures[sheet_name] = executor.submit(self.check_format, sheet_name, self.wb_new[sheet_name],
                                                          self.wb_old[sheet_name])

            for sheet_name, future in futures.items():
                result = future.result()
                if result:
                    sheet_results[sheet_name] = {
                        "status": False,
                        "mismatches": result
                    }
                else:
                    sheet_results[sheet_name] = {"status": True}

        # Integrate sheet order and missing sheets comparison in content comparison
        if compare_type == "content":
            overall_status = all(sheet["status"] for sheet in sheet_results.values()) and sheet_order_mismatch["status"]
            sheet_results["sheet_order"] = {"status": sheet_order_mismatch["status"],
                                            "details": sheet_order_mismatch["details"]}
            combined_result = {
                "overall_status": overall_status,
                "sheet_order_comparison": sheet_order_mismatch,
                "sheet_results": sheet_results
            }
        else:
            combined_result = {
                "overall_status": all(sheet["status"] for sheet in sheet_results.values()),
                "sheet_results": sheet_results
            }

        return combined_result

    def do_content_compare(self):
        return self.compare_files("content")

    def do_format_compare(self):
        return self.compare_files("format")
Leave a Comment