Untitled
unknown
plain_text
a year ago
18 kB
11
Indexable
import logging
import os
import sys
import datetime
import openpyxl
from concurrent.futures import ThreadPoolExecutor, as_completed
import xlwings as xw
from openpyxl.drawing.image import Image
from openpyxl.cell.rich_text import CellRichText
from Checksum import Checksum
class ExcelComparator:
def __init__(self, test_env_file, base_line_file):
self.test_env_file = os.path.abspath(test_env_file)
self.base_line_file = os.path.abspath(base_line_file)
# Always enable editing and save the files first
self.preprocess_files(self.test_env_file, self.base_line_file)
# Check if the files are different using their checksums
if Checksum.md5(self.test_env_file) != Checksum.md5(self.base_line_file):
try:
self.wb_new = openpyxl.load_workbook(self.test_env_file, data_only=False, rich_text=True)
self.wb_old = openpyxl.load_workbook(self.base_line_file, data_only=False, rich_text=True)
except Exception as e:
print(f"Error loading workbooks: {e}", file=sys.stderr)
sys.exit(1)
else:
self.wb_new = None
self.wb_old = None
@staticmethod
def serialize_value(value):
if isinstance(value, datetime.datetime):
return value.isoformat()
return str(value)
@staticmethod
def get_cell_details(cell):
cell_details = {
'value': ExcelComparator.serialize_value(cell.value),
'data_type': str(cell.data_type),
'font': cell.font.__dict__,
'border': cell.border.__dict__,
'fill': cell.fill.__dict__,
'number_format': str(cell.number_format),
'alignment': cell.alignment.__dict__,
'protection': cell.protection.__dict__,
'hyperlink': str(cell.hyperlink),
'comment': str(cell.comment),
'rich_text': ExcelComparator.serialize_value(cell.value) if isinstance(cell.value, CellRichText) else None,
'formula': ExcelComparator.serialize_value(cell.value) if cell.data_type == 'f' else None
}
return cell_details
@staticmethod
def get_image_details(sheet):
images = []
for drawing in sheet._images:
if isinstance(drawing, Image):
try:
width = getattr(drawing, 'width', None)
height = getattr(drawing, 'height', None)
anchor = getattr(drawing, 'anchor', None)
# If anchor is a complex object with _from attribute
if hasattr(anchor, '_from'):
position = (anchor._from.col, anchor._from.row)
else:
position = anchor # Assuming anchor is a simple attribute like 'A1'
image_details = {
'name': drawing.path,
'size': (width, height),
'position': position
}
images.append(image_details)
except Exception as e:
print(f"Error processing image: {e}")
return images
@staticmethod
def compare_images(images_old, images_new, sheet_name):
mismatches = []
old_image_names_positions = {(img['name'], img['position']) for img in images_old}
new_image_names_positions = {(img['name'], img['position']) for img in images_new}
added_images = new_image_names_positions - old_image_names_positions
removed_images = old_image_names_positions - new_image_names_positions
for img_name, img_position in added_images:
img_new = next(img for img in images_new if img['name'] == img_name and img['position'] == img_position)
mismatches.append({
'sheet_name': sheet_name,
'cell_reference': f"Image at {img_new['position']}",
'difference_type': 'Image Added',
'base_line_value': 'N/A',
'test_env_value': img_new['name'],
'details': f"New size: {img_new['size']}\nPosition: {img_new['position']}"
})
for img_name, img_position in removed_images:
img_old = next(img for img in images_old if img['name'] == img_name and img['position'] == img_position)
mismatches.append({
'sheet_name': sheet_name,
'cell_reference': f"Image at {img_old['position']}",
'difference_type': 'Image Removed',
'base_line_value': img_old['name'],
'test_env_value': 'N/A',
'details': f"Old size: {img_old['size']}\nPosition: {img_old['position']}"
})
for img_name, img_position in old_image_names_positions & new_image_names_positions:
img_old = next(img for img in images_old if img['name'] == img_name and img['position'] == img_position)
img_new = next(img for img in images_new if img['name'] == img_name and img['position'] == img_position)
if img_old['position'] != img_new['position']:
mismatches.append({
'sheet_name': sheet_name,
'cell_reference': f"Image at {img_old['position']} to {img_new['position']}",
'difference_type': 'Image Position Changed',
'base_line_value': img_old['position'],
'test_env_value': img_new['position'],
'details': f"Image: {img_old['name']}\nOld position: {img_old['position']}\nNew position: {img_new['position']}"
})
if img_old['size'] != img_new['size']:
mismatches.append({
'sheet_name': sheet_name,
'cell_reference': f"Image at {img_old['position']}",
'difference_type': 'Image Size Changed',
'base_line_value': img_old['size'],
'test_env_value': img_new['size'],
'details': f"Image: {img_old['name']}\nOld size: {img_old['size']}\nNew size: {img_new['size']}"
})
return mismatches
@staticmethod
def compare_rich_text_objects(old_rich_text, new_rich_text):
def extract_text(rich_text):
if hasattr(rich_text, 'text'):
return rich_text.text
elif isinstance(rich_text, str):
return rich_text
return ""
old_text = extract_text(old_rich_text)
new_text = extract_text(new_rich_text)
return old_text != new_text
@staticmethod
def extract_and_compare_formats(old_format, new_format):
def compare_dicts(dict1, dict2):
differences = []
for key in set(dict1.keys()).union(set(dict2.keys())):
value1 = dict1.get(key)
value2 = dict2.get(key)
if value1 != value2:
differences.append((key, 'diff'))
return differences
differences = compare_dicts(old_format, new_format)
return differences
@staticmethod
def compare_values(old_value, new_value):
if isinstance(old_value, datetime.datetime) and isinstance(new_value, datetime.datetime):
return old_value != new_value
return str(old_value) != str(new_value)
@staticmethod
def check_content(sheet_name, ws_new, ws_old):
output_list = []
max_row = max(ws_new.max_row, ws_old.max_row)
max_column = max(ws_new.max_column, ws_old.max_column)
for row in range(1, max_row + 1):
for col in range(1, max_column + 1):
cell_new = ws_new.cell(row=row, column=col)
cell_old = ws_old.cell(row=row, column=col)
if ExcelComparator.compare_values(cell_old.value, cell_new.value):
output_list.append({
'sheet_name': sheet_name,
'cell_reference': cell_old.coordinate,
'difference_type': 'Value Mismatch',
'base_line_value': str(cell_old.value),
'test_env_value': str(cell_new.value),
'details': 'Cell value mismatch'
})
if cell_old.data_type == 'f' or cell_new.data_type == 'f':
if cell_old.value != cell_new.value:
output_list.append({
'sheet_name': sheet_name,
'cell_reference': cell_old.coordinate,
'difference_type': 'Formula Mismatch',
'base_line_value': str(cell_old.value),
'test_env_value': str(cell_new.value),
'details': 'Formula mismatch'
})
return output_list
@staticmethod
def check_format(sheet_name, ws_new, ws_old):
output_list = []
max_row = max(ws_new.max_row, ws_old.max_row)
max_column = max(ws_new.max_column, ws_old.max_column)
for row in range(1, max_row + 1):
for col in range(1, max_column + 1):
cell_new = ws_new.cell(row=row, column=col)
cell_old = ws_old.cell(row=row, column=col)
old_details = ExcelComparator.get_cell_details(cell_old)
new_details = ExcelComparator.get_cell_details(cell_new)
# Exclude value and formula mismatches from format checks
if ExcelComparator.compare_values(cell_old.value, cell_new.value) or \
(cell_old.data_type == 'f' or cell_new.data_type == 'f') and cell_old.value != cell_new.value:
continue
mismatches = {
'rich_text': ExcelComparator.compare_rich_text_objects(old_details['rich_text'],
new_details['rich_text']) or
isinstance(old_details['rich_text'], CellRichText) != isinstance(
new_details['rich_text'], CellRichText),
'font': old_details['font'] != new_details['font'],
'border': old_details['border'] != new_details['border'],
'fill': old_details['fill'] != new_details['fill'],
'number_format': old_details['number_format'] != new_details['number_format'],
'alignment': old_details['alignment'] != new_details['alignment']
}
if any(mismatches.values()):
details = "\n".join(
[f"{key.replace('_', ' ').title()} differences" for key, mismatch in mismatches.items() if
mismatch])
output_list.append({
'sheet_name': sheet_name,
'cell_reference': str(cell_old.coordinate),
'difference_type': 'Format Mismatch',
'base_line_value': str(old_details['value']),
'test_env_value': str(new_details['value']),
'details': details
})
images_old = ExcelComparator.get_image_details(ws_old)
images_new = ExcelComparator.get_image_details(ws_new)
image_mismatches = ExcelComparator.compare_images(images_old, images_new, sheet_name)
output_list.extend(image_mismatches)
return output_list
@staticmethod
def check_sheet_order_and_missing(wb_new, wb_old):
output_list = []
sheet_order_mismatch = {
"status": True,
"details": ""
}
new_sheets = wb_new.sheetnames
old_sheets = wb_old.sheetnames
common_sheets = set(new_sheets).intersection(set(old_sheets))
missing_in_new = set(old_sheets) - set(new_sheets)
missing_in_old = set(new_sheets) - set(old_sheets)
if missing_in_new:
output_list.append({
'sheet_name': 'N/A',
'cell_reference': 'N/A',
'difference_type': 'Missing Sheets in New File',
'base_line_value': ', '.join(missing_in_new),
'test_env_value': 'N/A',
'details': 'Sheets are present in old file but missing in new file.'
})
if missing_in_old:
output_list.append({
'sheet_name': 'N/A',
'cell_reference': 'N/A',
'difference_type': 'Missing Sheets in Old File',
'base_line_value': 'N/A',
'test_env_value': ', '.join(missing_in_old),
'details': 'Sheets are present in new file but missing in old file.'
})
common_sheets_list = [sheet for sheet in old_sheets if sheet in common_sheets]
new_sheets_list = [sheet for sheet in new_sheets if sheet in common_sheets]
if common_sheets_list != new_sheets_list:
sheet_order_mismatch["status"] = False
mismatch_details = []
min_length = min(len(common_sheets_list), len(new_sheets_list))
for i in range(min_length):
if common_sheets_list[i] != new_sheets_list[i]:
mismatch_details.append(f"Position {i + 1}: {common_sheets_list[i]} -> {new_sheets_list[i]}")
if len(new_sheets_list) > min_length:
mismatch_details.append(f"Additional sheets in new file: {new_sheets_list[min_length:]}")
elif len(common_sheets_list) > min_length:
mismatch_details.append(f"Additional sheets in old file: {common_sheets_list[min_length:]}")
sheet_order_mismatch["details"] = "\n".join(mismatch_details)
output_list.append({
'sheet_name': 'N/A',
'cell_reference': 'N/A',
'difference_type': 'Sheet Order Mismatch',
'base_line_value': str(common_sheets_list),
'test_env_value': str(new_sheets_list),
'details': "\n".join(mismatch_details)
})
return output_list, sheet_order_mismatch
@staticmethod
def enable_editing_and_save(file_path):
try:
app = xw.App(visible=False)
workbook = app.books.open(file_path)
workbook.save()
workbook.close()
app.quit()
logging.info(f"Enabled editing and saved the file {file_path}")
except Exception as e:
logging.error(f"Failed to enable editing and save the file {file_path}: {e}")
sys.exit(1)
def preprocess_files(self, *file_paths):
"""Enable editing and save all given file paths."""
for file_path in file_paths:
abs_path = os.path.abspath(file_path)
ExcelComparator.enable_editing_and_save(abs_path)
def compare_files(self, compare_type):
if self.wb_new is None or self.wb_old is None:
return {
"overall_status": True,
"sheet_order_comparison": {"status": True, "details": ""} if compare_type == "content" else None,
"sheet_results": {},
"sheet_mismatches": {}
}
# Check for sheet order and missing sheets
sheet_order_mismatches, sheet_order_mismatch = self.check_sheet_order_and_missing(self.wb_new, self.wb_old)
sheet_results = {}
new_sheets = set(self.wb_new.sheetnames)
old_sheets = set(self.wb_old.sheetnames)
common_sheets = new_sheets.intersection(old_sheets)
with ThreadPoolExecutor() as executor:
futures = {}
for sheet_name in common_sheets:
if compare_type == "content":
futures[sheet_name] = executor.submit(self.check_content, sheet_name, self.wb_new[sheet_name],
self.wb_old[sheet_name])
elif compare_type == "format":
futures[sheet_name] = executor.submit(self.check_format, sheet_name, self.wb_new[sheet_name],
self.wb_old[sheet_name])
for sheet_name, future in futures.items():
result = future.result()
if result:
sheet_results[sheet_name] = {
"status": False,
"mismatches": result
}
else:
sheet_results[sheet_name] = {"status": True}
# Integrate sheet order and missing sheets comparison in content comparison
if compare_type == "content":
overall_status = all(sheet["status"] for sheet in sheet_results.values()) and sheet_order_mismatch["status"]
sheet_results["sheet_order"] = {"status": sheet_order_mismatch["status"],
"details": sheet_order_mismatch["details"]}
combined_result = {
"overall_status": overall_status,
"sheet_order_comparison": sheet_order_mismatch,
"sheet_results": sheet_results
}
else:
combined_result = {
"overall_status": all(sheet["status"] for sheet in sheet_results.values()),
"sheet_results": sheet_results
}
return combined_result
def do_content_compare(self):
return self.compare_files("content")
def do_format_compare(self):
return self.compare_files("format")
Editor is loading...
Leave a Comment