Untitled
unknown
plain_text
a year ago
2.8 kB
5
Indexable
import json import numpy as np from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline # Function to read the SQuAD dataset def read_squad(path): with open(path, 'r') as f: squad_dict = json.load(f) contexts = [] questions = [] answers = [] for group in squad_dict['data']: for passage in group['paragraphs']: context = passage['context'] for qa in passage['qas']: question = qa['question'] for answer in qa['answers']: contexts.append(context) questions.append(question) answers.append(answer['text']) return contexts, questions, answers # Function to evaluate the model using F1 score and Exact Match def evaluate_model(model, tokenizer, contexts, questions, answers): f1_scores = [] exact_matches = [] nlp = pipeline('question-answering', model=model, tokenizer=tokenizer) for context, question, answer in zip(contexts, questions, answers): model_answer = nlp({'question': question, 'context': context})['answer'] f1_scores.append(compute_f1(answer, model_answer)) exact_matches.append(compute_exact(answer, model_answer)) return np.mean(f1_scores), np.mean(exact_matches) # Function to compute the F1 score def compute_f1(answer, model_answer): answer_tokens = answer.split() model_answer_tokens = model_answer.split() common = set(answer_tokens) & set(model_answer_tokens) if len(common) == 0: return 0 precision = len(common) / len(model_answer_tokens) recall = len(common) / len(answer_tokens) f1 = (2 * precision * recall) / (precision + recall) return f1 # Function to compute the Exact Match score def compute_exact(answer, model_answer): return int(answer == model_answer) # Load the SQuAD dataset contexts, questions, answers = read_squad('/kaggle/input/squad-dataset/squad.json') # Load the tokenizer and model for Mistral tokenizer_mistral = AutoTokenizer.from_pretrained('path/to/mistral') model_mistral = AutoModelForQuestionAnswering.from_pretrained('path/to/mistral') # Load the tokenizer and model for Gemma 7B tokenizer_gemma = AutoTokenizer.from_pretrained('google/gemma-7b') model_gemma = AutoModelForQuestionAnswering.from_pretrained('google/gemma-7b') # Evaluate Mistral f1_mistral, em_mistral = evaluate_model(model_mistral, tokenizer_mistral, contexts, questions, answers) # Evaluate Gemma 7B f1_gemma, em_gemma = evaluate_model(model_gemma, tokenizer_gemma, contexts, questions, answers) # Print the results print(f'Mistral F1 Score: {f1_mistral}') print(f'Mistral Exact Match: {em_mistral}') print(f'Gemma 7B F1 Score: {f1_gemma}') print(f'Gemma 7B Exact Match: {em_gemma}')
Editor is loading...
Leave a Comment