Untitled

 avatar
unknown
python
15 days ago
2.9 kB
3
Indexable
import PyPDF2
import pydeep
from pdf2image import convert_from_path
from PIL import Image, ImageChops


def remove_metadata(pdf_file):
    # Open the PDF file.
    with open(pdf_file, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        # Check if metadata exists.
        if reader.metadata is not None:
            print("Metadata found in the PDF file.")
            # Create a new PDF file without metadata.
            writer = PyPDF2.PdfWriter()
            # Copy pages from the original PDF to the new PDF.
            for page_num in range(len(reader.pages)):
                page = reader.pages[page_num]
                writer.add_page(page)
            # Open a new file to write the PDF without metadata.
            new_pdf_file = f"{pdf_file.split('.')[0]}_no_metadata.pdf"
            with open(new_pdf_file, 'wb') as output_file:
                writer.write(output_file)
            print(f"PDF file without metadata saved as '{new_pdf_file}'.")
        else:
            print("No metadata found in the PDF file.")

# # Specify the path to your PDF file.
# pdf_file_path = "grab.pdf"
# # Call the function to remove metadata.
# remove_metadata(pdf_file_path)
# hash1 = pydeep.hash_file("new_no_metadata.pdf")
#
# # Specify the path to your PDF file.
# pdf_file_path = "new.pdf"
# # Call the function to remove metadata.
# remove_metadata(pdf_file_path)
# hash2 = pydeep.hash_file("grab_no_metadata.pdf")


# similarity = pydeep.compare(hash1, hash2)
# print(f"Similarity Score: {similarity}")


# Convert PDF to images
pdf_path = "old.pdf"
images = convert_from_path(pdf_path, dpi=300)

# Get total height and max width
total_height = sum(img.height for img in images)
max_width = max(img.width for img in images)

# Create a blank image with the correct dimensions
final_image = Image.new("RGB", (max_width, total_height))

# Paste each image below the previous one
y_offset = 0
for img in images:
    final_image.paste(img, (0, y_offset))
    y_offset += img.height

# Save as a single long JPEG
final_image.save("old.jpg", "JPEG")

# Convert PDF to images
pdf_path = "new.pdf"
images = convert_from_path(pdf_path, dpi=300)

# Get total height and max width
total_height = sum(img.height for img in images)
max_width = max(img.width for img in images)

# Create a blank image with the correct dimensions
final_image = Image.new("RGB", (max_width, total_height))

# Paste each image below the previous one
y_offset = 0
for img in images:
    final_image.paste(img, (0, y_offset))
    y_offset += img.height

# Save as a single long JPEG
final_image.save("new.jpg", "JPEG")


def are_images_identical(image1_path, image2_path):
    img1 = Image.open(image1_path)
    img2 = Image.open(image2_path)
    return ImageChops.difference(img1, img2).getbbox() is None

# Example Usage
print(are_images_identical("old.jpg", "new.jpg"))
Editor is loading...
Leave a Comment