Untitled

 avatar
unknown
plain_text
5 months ago
1.5 kB
4
Indexable
        outputs.append(result)
    # combined = ' '.join(outputs)
    # return combined
    return outputs


# %%
def get_pdfnames():
    # Get the current directory
    current_directory = os.getcwd()

    # Initialize an empty list to store PDF file names
    pdf_files = []

    # Iterate through all files in the current directory
    for filename in os.listdir(current_directory):
        # Check if the file has a .pdf extension
        if filename.endswith(".pdf"):
            pdf_files.append(filename)
    return pdf_files


def write_combinedpages(output_name, combinedpages):
    with open(output_name, "w") as file:
        # Iterate through the list of strings
        for string in combinedpages:
            # Write each string to a new line in the file
            file.write(string + "\n")
    print(f"wroted {output_name}")
    pass


# %%


def get_txt_from_pdf(pdfname):
    print(pdfname)
    txtname = pdfname[:-3] + "txt"
    print(txtname)
    results = spagetti(pdfname)
    force_delete_file("cropped_image.pdf")
    force_delete_file("PDF_image.png")
    combined = combine_results(results)
    write_combinedpages(txtname, combined)
    return txtname


# %%
def main():
    pdfnames = get_pdfnames()
    [get_txt_from_pdf(name) for name in pdfnames]
    # results = multip(get_txt_from_pdf, pdfnames)
    # for result in results:
    #    print(result)
    # pass


# %%
main()

# %%
if __name__ == "__main__":
    main()
    pass
Editor is loading...
Leave a Comment