Untitled
unknown
python
15 days ago
2.9 kB
3
Indexable
import PyPDF2 import pydeep from pdf2image import convert_from_path from PIL import Image, ImageChops def remove_metadata(pdf_file): # Open the PDF file. with open(pdf_file, 'rb') as file: reader = PyPDF2.PdfReader(file) # Check if metadata exists. if reader.metadata is not None: print("Metadata found in the PDF file.") # Create a new PDF file without metadata. writer = PyPDF2.PdfWriter() # Copy pages from the original PDF to the new PDF. for page_num in range(len(reader.pages)): page = reader.pages[page_num] writer.add_page(page) # Open a new file to write the PDF without metadata. new_pdf_file = f"{pdf_file.split('.')[0]}_no_metadata.pdf" with open(new_pdf_file, 'wb') as output_file: writer.write(output_file) print(f"PDF file without metadata saved as '{new_pdf_file}'.") else: print("No metadata found in the PDF file.") # # Specify the path to your PDF file. # pdf_file_path = "grab.pdf" # # Call the function to remove metadata. # remove_metadata(pdf_file_path) # hash1 = pydeep.hash_file("new_no_metadata.pdf") # # # Specify the path to your PDF file. # pdf_file_path = "new.pdf" # # Call the function to remove metadata. # remove_metadata(pdf_file_path) # hash2 = pydeep.hash_file("grab_no_metadata.pdf") # similarity = pydeep.compare(hash1, hash2) # print(f"Similarity Score: {similarity}") # Convert PDF to images pdf_path = "old.pdf" images = convert_from_path(pdf_path, dpi=300) # Get total height and max width total_height = sum(img.height for img in images) max_width = max(img.width for img in images) # Create a blank image with the correct dimensions final_image = Image.new("RGB", (max_width, total_height)) # Paste each image below the previous one y_offset = 0 for img in images: final_image.paste(img, (0, y_offset)) y_offset += img.height # Save as a single long JPEG final_image.save("old.jpg", "JPEG") # Convert PDF to images pdf_path = "new.pdf" images = convert_from_path(pdf_path, dpi=300) # Get total height and max width total_height = sum(img.height for img in images) max_width = max(img.width for img in images) # Create a blank image with the correct dimensions final_image = Image.new("RGB", (max_width, total_height)) # Paste each image below the previous one y_offset = 0 for img in images: final_image.paste(img, (0, y_offset)) y_offset += img.height # Save as a single long JPEG final_image.save("new.jpg", "JPEG") def are_images_identical(image1_path, image2_path): img1 = Image.open(image1_path) img2 = Image.open(image2_path) return ImageChops.difference(img1, img2).getbbox() is None # Example Usage print(are_images_identical("old.jpg", "new.jpg"))
Editor is loading...
Leave a Comment