Untitled
unknown
plain_text
5 months ago
1.5 kB
4
Indexable
outputs.append(result) # combined = ' '.join(outputs) # return combined return outputs # %% def get_pdfnames(): # Get the current directory current_directory = os.getcwd() # Initialize an empty list to store PDF file names pdf_files = [] # Iterate through all files in the current directory for filename in os.listdir(current_directory): # Check if the file has a .pdf extension if filename.endswith(".pdf"): pdf_files.append(filename) return pdf_files def write_combinedpages(output_name, combinedpages): with open(output_name, "w") as file: # Iterate through the list of strings for string in combinedpages: # Write each string to a new line in the file file.write(string + "\n") print(f"wroted {output_name}") pass # %% def get_txt_from_pdf(pdfname): print(pdfname) txtname = pdfname[:-3] + "txt" print(txtname) results = spagetti(pdfname) force_delete_file("cropped_image.pdf") force_delete_file("PDF_image.png") combined = combine_results(results) write_combinedpages(txtname, combined) return txtname # %% def main(): pdfnames = get_pdfnames() [get_txt_from_pdf(name) for name in pdfnames] # results = multip(get_txt_from_pdf, pdfnames) # for result in results: # print(result) # pass # %% main() # %% if __name__ == "__main__": main() pass
Editor is loading...
Leave a Comment