Untitled
unknown
plain_text
a year ago
1.1 kB
9
Indexable
import fitz def Appooz(pdf_path): try: pdf_document = fitz.open(pdf_path) italicized_words = [] for page_num in range(len(pdf_document)): page = pdf_document.load_page(page_num) text_dict = page.get_text("dict") blocks = text_dict.get("blocks", []) for block in blocks: if "lines" in block: for line in block["lines"]: for span in line["spans"]: if span["flags"] & 2: italicized_words.append(f"Page {page_num + 1}: {span['text']}") output_path = pdf_path.replace(".pdf", "_italicized_words.txt") with open(output_path, 'w') as file: for word in italicized_words: file.write(word + "\n") print(f"Results saved to {output_path}") except Exception as e: print(f"An error occurred: {e}") finally: pdf_document.close() pdf_path = "/Users/appoos/Documents/Thesis/Abhijith_PhD-Corrected-19-06-24.pdf" Appooz(pdf_path)
Editor is loading...
Leave a Comment