Untitled
unknown
plain_text
a year ago
1.3 kB
12
Indexable
import torch
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template
from transformers import TextStreamer
max_seq_length = 2048
dtype = None
load_in_4bit = True
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "Fanesz/nindy_v2",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
tokenizer = get_chat_template(
tokenizer,
chat_template = "chatml",
mapping = {
"role": "from",
"content": "value",
"user": "Human",
"assistant" : "Nindy"
},
map_eos_token = True,
)
FastLanguageModel.for_inference(model)
def chat(prompt):
messages = [{
"from": "Human",
"value": prompt
}]
input_ids = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt",
).to("cuda")
attention_mask = input_ids != tokenizer.pad_token_id
text_streamer = TextStreamer(tokenizer)
_ = model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
streamer=text_streamer,
max_new_tokens=128,
use_cache=True
)
chat("Hello")Editor is loading...
Leave a Comment