Untitled
unknown
plain_text
3 years ago
2.8 kB
7
Indexable
import glob
import os
import re
from typing import List
# define main function, write if name == main, and call main function:
def main(path: str, match: str, validation_results_indicator: str, patterns: List[str]):
# load all txt files in path with names that match the match string
# for each file find the last occurence of the validation_results_indicator
# and extract the first occurence of each pattern in the patterns list after the validation_results_indicator
# store the float value after the pattern
# return a dictionary with the file name as key and the list of extracted values as value, list indices correspond to the pattern list
# use regex to extract the values and to match the file names
# use glob module to find the files
# example:
# path = 'C:\\Users\\user\\Desktop\\test'
# match = 'test*.txt'
# validation_results_indicator = 'Validation Results'
# patterns = ['Accuracy', 'Precision', 'Recall', 'F1']
# main(path, match, validation_results_indicator, patterns)
# {'test1.txt': [0.5, 0.6, 0.7, 0.8], 'test2.txt': [0.1, 0.2, 0.3, 0.4]}
# test files content for the example:
# test1.txt:
# some text
# some text
# some text
# Validation Results
# Accuracy: 0.5
# Precision: 0.6
# Recall: 0.7
# F1: 0.8
# test2.txt:
# some text
# some text
# some text
# Validation Results
# Accuracy: 0.1
# Precision: 0.2
# Recall: 0.3
# F1: 0.4
# now write the functionality for that specification and example:
# use glob to find the files
files = glob.glob(os.path.join(path, match))
# create a dictionary to store the results
results = {}
# iterate over the files
for file in files:
# open the file
with open(file, 'r') as f:
# read the content
content = f.read()
# find the last occurence of the validation_results_indicator
validation_results_index = content.rfind(validation_results_indicator)
# iterate over the patterns
for i, pattern in enumerate(patterns):
# find the first occurence of the pattern after the validation_results_indicator
pattern_index = content.find(pattern, validation_results_index)
# extract the float value after the pattern, use regex to extract the float value
value = float(re.findall(r'\d+\.\d+', content[pattern_index:])[0])
# store the value in the results dictionary
results.setdefault(file, []).append(value)
return results
if __name__ == '__main__':
main('experiment_1', 'out.txt', 'Validation score:', ['AUC:'])
Editor is loading...