Untitled
unknown
plain_text
2 years ago
1.0 kB
7
Indexable
import torch
from transformers import LlamaTokenizer, AutoModelForCausalLM
GPU = torch.device("cuda")
tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_code_7b_instruct")
model = AutoModelForCausalLM.from_pretrained("syzymon/long_llama_code_7b_instruct",
torch_dtype=torch.float32,
trust_remote_code=True)
model.to(GPU)
while True:
# Get prompt from user
prompt = input("Enter your prompt (or 'exit' to quit): ")
if prompt.lower() == 'exit':
break
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
input_ids = input_ids.to(GPU)
outputs = model(input_ids=input_ids)
logits = outputs.logits
# Get the most probable token IDs from the logits
predicted_token_ids = torch.argmax(logits, dim=-1)
# Decode the token IDs to get the text
decoded_text = tokenizer.decode(predicted_token_ids[0].tolist())
print(decoded_text)
Editor is loading...