Untitled
unknown
plain_text
2 years ago
7.7 kB
4
Indexable
from tkinter import Tk from tkinter.filedialog import askdirectory import os FILE_FORMATS = { 'doc': ['docx', 'doc', 'docm', 'rtf'], 'ppt': ['ppt', 'pptx'], 'xls': ['xlsx', 'xls', 'xlsm'], 'txt': ['txt', 'xml', 'ini', 'log'], 'image': ['jpg', 'jpeg', 'jpe', 'jp2', 'png', 'gif', 'tif', 'tiff', 'bmp', 'heic'], 'html': ['html', 'htm'] } class ConvertToPDF: # @staticmethod # def docs_to_pdf(source): # # we need to install first # # pip install docx2pdf # from docx2pdf import convert # # try: # target = os.path.splitext(source)[0] + '.pdf' # convert(source, target) # return True # # except Exception as ex: # raise ex @staticmethod def docx_to_pdf(source): import comtypes.client wdFormatPDF = 17 out_file = os.path.splitext(source)[0] word = comtypes.client.CreateObject('Word.Application') doc = word.Documents.Open(source) try: doc.SaveAs(out_file, FileFormat=wdFormatPDF) return True except Exception as ex: raise ex finally: doc.Close() word.Quit() @staticmethod def image_to_pdf(source): # pip install img2pdf # pip install pillow_heif import img2pdf from PIL import Image try: # opening image if source.endswith('tiff') or source.endswith('tif'): return ConvertToPDF.__tiff_to_pdf(source) elif source.endswith('heic'): from pillow_heif import register_heif_opener register_heif_opener() image = Image.open(source) target = os.path.splitext(source)[0] + '.pdf' # converting into chunks using img2pdf pdf_bytes = img2pdf.convert(image.filename) # opening or creating pdf file file = open(target, "wb") # writing pdf files with chunks file.write(pdf_bytes) # closing image file image.close() file.close() return True except Exception as ex: raise ex @staticmethod def __tiff_to_pdf(tiff_path: str) -> str: from PIL import Image, ImageSequence if tiff_path.endswith('tiff'): pdf_path = tiff_path.replace('.tiff', '.pdf') else: pdf_path = tiff_path.replace('.tif', '.pdf') if not os.path.exists(tiff_path): raise Exception(f'{tiff_path} does not find.') image = Image.open(tiff_path) images = [] for i, page in enumerate(ImageSequence.Iterator(image)): page = page.convert("RGB") images.append(page) if len(images) == 1: images[0].save(pdf_path) else: images[0].save(pdf_path, save_all=True, append_images=images[1:]) return True @staticmethod def pptx_to_pdf(source): from win32com import client import os out_file = os.path.splitext(source)[0] powerpoint = client.Dispatch("Powerpoint.Application") pdf = powerpoint.Presentations.Open(source, WithWindow=False) try: pdf.Saveas(out_file, 32) return True except Exception as ex: raise ex finally: pdf.Close() powerpoint.Quit() @staticmethod def text_to_pdf(source): from fpdf import FPDF import textwrap try: target = os.path.splitext(source)[0] + '.pdf' file = open(source, encoding='UTF-8') text = file.read() file.close() a4_width_mm = 210 pt_to_mm = 0.35 fontsize_pt = 10 fontsize_mm = fontsize_pt * pt_to_mm margin_bottom_mm = 10 character_width_mm = 7 * pt_to_mm width_text = a4_width_mm / character_width_mm pdf = FPDF(orientation='P', unit='mm', format='A4') pdf.set_auto_page_break(True, margin=margin_bottom_mm) pdf.add_page() pdf.set_font(family='Courier', size=fontsize_pt) splitted = text.split('\n') for line in splitted: lines = textwrap.wrap(line, width_text) if len(lines) == 0: pdf.ln() for wrap in lines: pdf.cell(0, fontsize_mm, wrap, ln=1) pdf.output(target, 'F') return True except Exception as ex: raise ex @staticmethod def html_to_pdf(source): # pip install pdfkit # # install wkhtmltopdf # https://github.com/wkhtmltopdf/wkhtmltopdf/releases/download/0.12.4/wkhtmltox-0.12.4_msvc2015-win64.exe import pdfkit try: target = os.path.splitext(source)[0] + '.pdf' pdfkit.from_file(source, target) return True except Exception as ex: raise ex @staticmethod def xls_to_pdf(source): from win32com import client target = os.path.splitext(source)[0] + '.pdf' # Open Microsoft Excel excel = client.Dispatch("Excel.Application") # Read Excel File sheets = excel.Workbooks.Open(source) try: ws_index_list = [i + 1 for i in range(len(sheets.WorkSheets))] sheets.WorkSheets(ws_index_list).Select() # Convert into PDF File sheets.ActiveSheet.ExportAsFixedFormat(0, target) return True except Exception as ex: raise ex finally: sheets.Close(False) excel.Quit() def filter_files(folder_input): file_list = [os.path.join(folder_input, file) for file in os.listdir(folder_input)] format_list = [k for k in FILE_FORMATS] filtered_files = {k: [] for k in FILE_FORMATS} for file in file_list: for fr in format_list: frmt = os.path.splitext(file)[1][1:] if frmt in FILE_FORMATS[fr]: filtered_files[fr].append(file) return filtered_files def convert_files(file_dict): for frmt in file_dict: for file in file_dict[frmt]: try: print('\nConverting File:', file) if frmt == 'doc': ConvertToPDF.docx_to_pdf(file) elif frmt == 'ppt': ConvertToPDF.pptx_to_pdf(file) elif frmt == 'xls': ConvertToPDF.xls_to_pdf(file) elif frmt == 'image': ConvertToPDF.image_to_pdf(file) elif frmt == 'txt': ConvertToPDF.text_to_pdf(file) elif frmt == 'html': ConvertToPDF.html_to_pdf(file) else: continue print('Removing File:', file) os.remove(file) except Exception as ex: print(f"Error while converting file '{file}': {ex}") input_path = askdirectory(title='Select Folder') print('folder name:',input_path) folder_list = [os.path.join(input_path, folder) for folder in os.listdir(input_path)] print(folder_list) for folder in folder_list: filtered_file_dict = filter_files(folder) convert_files(filtered_file_dict)
Editor is loading...