find_best.py

 avatar
unknown
python
5 months ago
1.4 kB
2
Indexable
import os
import json
from collections import defaultdict, Counter

# Define folder containing prediction files
folder = 'preds'
files = os.listdir(folder)

answer_sheets = {}
for file in files:
    file_path = os.path.join(folder, file)
    pred_dict = {}
    with open(file_path, 'r') as f:
        data = json.load(f)
        for idx, answer in enumerate(data['answers']):
            qid = answer['qid']
            try:
                retrieve = answer['retrieve'][0]
            except:
                retrieve = answer['retrieve']
            if qid != idx + 1:
                raise ValueError(f"File {file}: ID sequence error at qid {qid}")
            pred_dict[qid] = retrieve
    answer_sheets[file] = pred_dict

score_dict = {}
for file in files:
    score_dict[file] = 0
import json
with open("ground_truth.json", "r") as file:
    answers = json.load(file)['answers']
for answer in answers:
    qid = answer['qid']
    ground_truth = answer['retrieve']
    for file in files:
        if answer_sheets[file][qid] == ground_truth:
            score_dict[file] += 1

score_list = [ (file, score) for file, score in score_dict.items() ]
score_list = sorted(score_list, key=lambda x: x[1], reverse=True)

print(f'Score list: {score_list}')
best_file = score_list[0][0]
print(f"Best prediction: {best_file}")
Editor is loading...
Leave a Comment