Untitled
unknown
plain_text
2 years ago
2.8 kB
20
Indexable
import json
import numpy as np
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
# Function to read the SQuAD dataset
def read_squad(path):
with open(path, 'r') as f:
squad_dict = json.load(f)
contexts = []
questions = []
answers = []
for group in squad_dict['data']:
for passage in group['paragraphs']:
context = passage['context']
for qa in passage['qas']:
question = qa['question']
for answer in qa['answers']:
contexts.append(context)
questions.append(question)
answers.append(answer['text'])
return contexts, questions, answers
# Function to evaluate the model using F1 score and Exact Match
def evaluate_model(model, tokenizer, contexts, questions, answers):
f1_scores = []
exact_matches = []
nlp = pipeline('question-answering', model=model, tokenizer=tokenizer)
for context, question, answer in zip(contexts, questions, answers):
model_answer = nlp({'question': question, 'context': context})['answer']
f1_scores.append(compute_f1(answer, model_answer))
exact_matches.append(compute_exact(answer, model_answer))
return np.mean(f1_scores), np.mean(exact_matches)
# Function to compute the F1 score
def compute_f1(answer, model_answer):
answer_tokens = answer.split()
model_answer_tokens = model_answer.split()
common = set(answer_tokens) & set(model_answer_tokens)
if len(common) == 0:
return 0
precision = len(common) / len(model_answer_tokens)
recall = len(common) / len(answer_tokens)
f1 = (2 * precision * recall) / (precision + recall)
return f1
# Function to compute the Exact Match score
def compute_exact(answer, model_answer):
return int(answer == model_answer)
# Load the SQuAD dataset
contexts, questions, answers = read_squad('/kaggle/input/squad-dataset/squad.json')
# Load the tokenizer and model for Mistral
tokenizer_mistral = AutoTokenizer.from_pretrained('path/to/mistral')
model_mistral = AutoModelForQuestionAnswering.from_pretrained('path/to/mistral')
# Load the tokenizer and model for Gemma 7B
tokenizer_gemma = AutoTokenizer.from_pretrained('google/gemma-7b')
model_gemma = AutoModelForQuestionAnswering.from_pretrained('google/gemma-7b')
# Evaluate Mistral
f1_mistral, em_mistral = evaluate_model(model_mistral, tokenizer_mistral, contexts, questions, answers)
# Evaluate Gemma 7B
f1_gemma, em_gemma = evaluate_model(model_gemma, tokenizer_gemma, contexts, questions, answers)
# Print the results
print(f'Mistral F1 Score: {f1_mistral}')
print(f'Mistral Exact Match: {em_mistral}')
print(f'Gemma 7B F1 Score: {f1_gemma}')
print(f'Gemma 7B Exact Match: {em_gemma}')
Editor is loading...
Leave a Comment