Untitled
main from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig, pipeline, BitsAndBytesConfig , CodeGenTokenizer from langchain.llms import HuggingFacePipeline from langchain import PromptTemplate, LLMChain from transformers import AutoTokenizer , AutoModelForCausalLM import torch from langchain import PromptTemplate, LLMChain import gradio tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2") base_model = AutoModelForCausalLM.from_pretrained( "microsoft/phi-2", load_in_8bit=True, torch_dtype=torch.float32, device_map='auto' ) pipe = pipeline( "text-generation", model=base_model, tokenizer=tokenizer, max_length=256, temperature=0.6, top_p=0.95, repetition_penalty=1.2 ) local_llm = HuggingFacePipeline(pipeline=pipe) pipe.model.config.pad_token_id = pipe.model.config.eos_token_id template = """respond to the instruction below. behave like a chatbot and respond to the user. try to be helpful. ### Instruction: {instruction} Answer:""" prompt = PromptTemplate(template=template, input_variables=["instruction"]) llm_chain = LLMChain(prompt=prompt,llm=local_llm) def greet(name): return llm_chain.run(name) # pour test greet("INTRODUCE YOURSELF") # créer l'interface UI (input/output) qui expire après 72heures gradio.Interface(greet, "text", "text").launch(share=True) !pip install -q gradio !pip -q install git+https://github.com/huggingface/transformers # need to install from github !pip install -q datasets loralib sentencepiece !pip -q install bitsandbytes accelerate !pip -q install langchain !pip install einops #Si erreur en lien avec transformers go !pip uninstall transformers puis !pip install transformers puis restart runtime #Si erreur dans google collab sur UTF-8 exécuter : import locale locale.getpreferredencoding = lambda: "UTF-8" usingLLama # https://www.gradio.app/ (open source) demo your machine learning model with a friendly web interface so that anyone can use it # https://docs.llamaindex.ai/en/latest/getting_started/concepts.html# LlamaIndex RAG to load documents (PDF) using 'chunk' # https://huggingface.co/microsoft/phi-2 from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext from llama_index.llms import HuggingFaceLLM import torch documents = SimpleDirectoryReader("/content/Data").load_data() from llama_index.prompts.prompts import SimpleInputPrompt system_prompt = "You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided." # This will wrap the default prompts that are internal to llama-index query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>") llm = HuggingFaceLLM( context_window=4096, max_new_tokens=256, generate_kwargs={"temperature": 0.0, "do_sample": False}, system_prompt=system_prompt, query_wrapper_prompt=query_wrapper_prompt, tokenizer_name="microsoft/phi-2", model_name="microsoft/phi-2", device_map="cuda", # uncomment this if using CUDA to reduce memory usage model_kwargs={"torch_dtype": torch.bfloat16} ) from llama_index.embeddings import HuggingFaceEmbedding embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") service_context = ServiceContext.from_defaults( chunk_size=1024, llm=llm, embed_model=embed_model ) index = VectorStoreIndex.from_documents(documents, service_context=service_context) query_engine = index.as_query_engine() def predict(input, history): response = query_engine.query(input) return str(response) import gradio as gr gr.ChatInterface(predict).launch(share=True) // !pip show transformers Pour voir la version par défaut installé dans Google Collab // Si erreur plus tard en lien avec transofrmers !pip uninstall transformers et !pip install transformers pour avoir la dernière version !pip install -q pypdf !pip install -q python-dotenv !pip install -q llama-index !pip install -q gradio !pip install einops !pip install accelerate
Leave a Comment