Untitled
unknown
plain_text
a year ago
1.1 kB
12
Indexable
import fitz
def Appooz(pdf_path):
try:
pdf_document = fitz.open(pdf_path)
italicized_words = []
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
text_dict = page.get_text("dict")
blocks = text_dict.get("blocks", [])
for block in blocks:
if "lines" in block:
for line in block["lines"]:
for span in line["spans"]:
if span["flags"] & 2:
italicized_words.append(f"Page {page_num + 1}: {span['text']}")
output_path = pdf_path.replace(".pdf", "_italicized_words.txt")
with open(output_path, 'w') as file:
for word in italicized_words:
file.write(word + "\n")
print(f"Results saved to {output_path}")
except Exception as e:
print(f"An error occurred: {e}")
finally:
pdf_document.close()
pdf_path = "/Users/appoos/Documents/Thesis/Abhijith_PhD-Corrected-19-06-24.pdf"
Appooz(pdf_path)
Editor is loading...
Leave a Comment