Untitled
unknown
plain_text
a year ago
1.5 kB
10
Indexable
outputs.append(result)
# combined = ' '.join(outputs)
# return combined
return outputs
# %%
def get_pdfnames():
# Get the current directory
current_directory = os.getcwd()
# Initialize an empty list to store PDF file names
pdf_files = []
# Iterate through all files in the current directory
for filename in os.listdir(current_directory):
# Check if the file has a .pdf extension
if filename.endswith(".pdf"):
pdf_files.append(filename)
return pdf_files
def write_combinedpages(output_name, combinedpages):
with open(output_name, "w") as file:
# Iterate through the list of strings
for string in combinedpages:
# Write each string to a new line in the file
file.write(string + "\n")
print(f"wroted {output_name}")
pass
# %%
def get_txt_from_pdf(pdfname):
print(pdfname)
txtname = pdfname[:-3] + "txt"
print(txtname)
results = spagetti(pdfname)
force_delete_file("cropped_image.pdf")
force_delete_file("PDF_image.png")
combined = combine_results(results)
write_combinedpages(txtname, combined)
return txtname
# %%
def main():
pdfnames = get_pdfnames()
[get_txt_from_pdf(name) for name in pdfnames]
# results = multip(get_txt_from_pdf, pdfnames)
# for result in results:
# print(result)
# pass
# %%
main()
# %%
if __name__ == "__main__":
main()
passEditor is loading...
Leave a Comment