Untitled
unknown
plain_text
a year ago
2.1 kB
6
Indexable
import os import fitz # PyMuPDF from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer from langchain.chains import LLMChain from langchain.prompts import PromptTemplate from langchain.llms import HuggingFacePipeline def extract_text_from_pdf(pdf_path): document = fitz.open(pdf_path) text = "" for page_num in range(len(document)): page = document.load_page(page_num) text += page.get_text() return text class PDFChatbot: def __init__(self, pdf_text, hf_model_name='gpt2'): self.pdf_text = pdf_text self.chain = self.create_chain(hf_model_name) def create_chain(self, hf_model_name): prompt = PromptTemplate( template="The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly. The assistant has access to the text of a PDF document:\n\n{pdf_text}\n\nUser: {user_input}\nAI:", input_variables=["pdf_text", "user_input"], ) tokenizer = AutoTokenizer.from_pretrained(hf_model_name) model = AutoModelForCausalLM.from_pretrained(hf_model_name) hf_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=100) llm = HuggingFacePipeline(pipeline=hf_pipeline) return LLMChain(prompt=prompt, llm=llm) def query(self, user_input): response = self.chain.run({"pdf_text": self.pdf_text, "user_input": user_input}) return response def main(): pdf_path = 'path_to_your_pdf.pdf' pdf_text = extract_text_from_pdf(pdf_path) chatbot = PDFChatbot(pdf_text, hf_model_name='gpt2') print("PDF Chatbot is ready. You can start asking questions about the PDF.") while True: user_input = input("You: ") if user_input.lower() in ["exit", "quit"]: print("Exiting the PDF Chatbot.") break response = chatbot.query(user_input) print("AI:", response) if __name__ == "__main__": main()
Editor is loading...
Leave a Comment