Untitled
unknown
plain_text
10 months ago
1.3 kB
5
Indexable
import torch from unsloth import FastLanguageModel from unsloth.chat_templates import get_chat_template from transformers import TextStreamer max_seq_length = 2048 dtype = None load_in_4bit = True model, tokenizer = FastLanguageModel.from_pretrained( model_name = "Fanesz/nindy_v2", max_seq_length = max_seq_length, dtype = dtype, load_in_4bit = load_in_4bit, ) tokenizer = get_chat_template( tokenizer, chat_template = "chatml", mapping = { "role": "from", "content": "value", "user": "Human", "assistant" : "Nindy" }, map_eos_token = True, ) FastLanguageModel.for_inference(model) def chat(prompt): messages = [{ "from": "Human", "value": prompt }] input_ids = tokenizer.apply_chat_template( messages, tokenize=True, add_generation_prompt=True, return_tensors="pt", ).to("cuda") attention_mask = input_ids != tokenizer.pad_token_id text_streamer = TextStreamer(tokenizer) _ = model.generate( input_ids=input_ids, attention_mask=attention_mask, streamer=text_streamer, max_new_tokens=128, use_cache=True ) chat("Hello")
Editor is loading...
Leave a Comment