Untitled

mail@pastecode.io avatar
unknown
plain_text
18 days ago
851 B
10
Indexable
Never
from transformers import T5Tokenizer, T5ForConditionalGeneration
from sentence_transformers import SentenceTransformer, util

# Initialize the T5 model for query generation
model_name = 'doc2query/msmarco-t5-base-v1'
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

text = "atlantic ocean temperature"

input_ids = tokenizer.encode(text, max_length=100, truncation=True, return_tensors='pt')
outputs = model.generate(
    input_ids=input_ids,
    max_length=64,
    do_sample=True,
    top_p=0.95,
    num_return_sequences=10)

# Convert outputs to a list of clean questions
questions = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

print("\nGenerated Queries:")
for i, query in enumerate(questions):
    print(f'{i + 1}: {query}')
Leave a Comment