Untitled
unknown
plain_text
a year ago
851 B
20
Indexable
from transformers import T5Tokenizer, T5ForConditionalGeneration
from sentence_transformers import SentenceTransformer, util
# Initialize the T5 model for query generation
model_name = 'doc2query/msmarco-t5-base-v1'
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
text = "atlantic ocean temperature"
input_ids = tokenizer.encode(text, max_length=100, truncation=True, return_tensors='pt')
outputs = model.generate(
input_ids=input_ids,
max_length=64,
do_sample=True,
top_p=0.95,
num_return_sequences=10)
# Convert outputs to a list of clean questions
questions = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
print("\nGenerated Queries:")
for i, query in enumerate(questions):
print(f'{i + 1}: {query}')Editor is loading...
Leave a Comment