Untitled

 avatar
unknown
plain_text
a year ago
2.1 kB
6
Indexable
import os
import fitz  # PyMuPDF
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline

def extract_text_from_pdf(pdf_path):
    document = fitz.open(pdf_path)
    text = ""
    for page_num in range(len(document)):
        page = document.load_page(page_num)
        text += page.get_text()
    return text

class PDFChatbot:
    def __init__(self, pdf_text, hf_model_name='gpt2'):
        self.pdf_text = pdf_text
        self.chain = self.create_chain(hf_model_name)

    def create_chain(self, hf_model_name):
        prompt = PromptTemplate(
            template="The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly. The assistant has access to the text of a PDF document:\n\n{pdf_text}\n\nUser: {user_input}\nAI:",
            input_variables=["pdf_text", "user_input"],
        )
        
        tokenizer = AutoTokenizer.from_pretrained(hf_model_name)
        model = AutoModelForCausalLM.from_pretrained(hf_model_name)
        hf_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=100)

        llm = HuggingFacePipeline(pipeline=hf_pipeline)
        return LLMChain(prompt=prompt, llm=llm)

    def query(self, user_input):
        response = self.chain.run({"pdf_text": self.pdf_text, "user_input": user_input})
        return response

def main():
    pdf_path = 'path_to_your_pdf.pdf'
    pdf_text = extract_text_from_pdf(pdf_path)
    
    chatbot = PDFChatbot(pdf_text, hf_model_name='gpt2')
    print("PDF Chatbot is ready. You can start asking questions about the PDF.")

    while True:
        user_input = input("You: ")
        if user_input.lower() in ["exit", "quit"]:
            print("Exiting the PDF Chatbot.")
            break
        response = chatbot.query(user_input)
        print("AI:", response)

if __name__ == "__main__":
    main()
Editor is loading...
Leave a Comment