After Refactor
unknown
python
10 months ago
1.7 kB
6
Indexable
def upload_document( URL, course_id, file_name, course_document_id, ): """ Upload and process a document from a PDF URL to AstraDB. Args: URL (str): URL to the document. course_id (str): ID of the course. file_name (str): Name of the file. course_document_id (str): ID of the course document. Returns: Sending chunk to AstraDB Sending webhook to BE """ pdf_page = load_doc(URL) if is_document(pdf_page): LOGGER.info('Success Document Loaded... Course ID: ', course_id, ' || Document ID: ', course_document_id) pdf_page_parsed = parsing_pdf_html(pdf_page) removed_break = remove_break_add_whitespace(pdf_page_parsed) doc = create_document_by_splitting(removed_break) chunks = extract_headers(doc) doc_tokens = 0 print("insert metedata information") #*************** add metadata course ID, document name, and document ID to chunks new_chunks, doc_tokens = process_chunks(chunks, course_id, file_name, course_document_id, doc_tokens) if (doc_tokens < 1000000 ) and ( check_upload_document(new_chunks, course_id, course_document_id, doc_tokens) ): embbed_openai(new_chunks, course_id, course_document_id, doc_tokens) else: status = "Failed Uplaod" webhookPayload = { 'course_id': course_id, 'document_id': course_document_id, 'tokens_embbed': doc_tokens, 'error' : "PDF Too Large", } push_payload( url_error_webhook, status, webhookPayload, )
Editor is loading...
Leave a Comment