Untitled

mail@pastecode.io avatar
unknown
plain_text
2 years ago
2.8 kB
1
Indexable
Never
import glob
import os
import re
from typing import List


# define main function, write if name == main,  and call main function:

def main(path: str, match: str, validation_results_indicator: str, patterns: List[str]):
    # load all txt files in path with names that match the match string
    # for each file find the last occurence of the validation_results_indicator
    # and extract the first occurence of each pattern in the patterns list after the validation_results_indicator
    # store the float value after the pattern
    # return a dictionary with the file name as key and the list of extracted values as value, list indices correspond to the pattern list
    # use regex to extract the values and to match the file names
    # use glob module to find the files
    # example:
    # path = 'C:\\Users\\user\\Desktop\\test'
    # match = 'test*.txt'
    # validation_results_indicator = 'Validation Results'
    # patterns = ['Accuracy', 'Precision', 'Recall', 'F1']
    # main(path, match, validation_results_indicator, patterns)
    # {'test1.txt': [0.5, 0.6, 0.7, 0.8], 'test2.txt': [0.1, 0.2, 0.3, 0.4]}
    # test files content for the example:
    # test1.txt:
    # some text
    # some text
    # some text
    # Validation Results
    # Accuracy: 0.5
    # Precision: 0.6
    # Recall: 0.7
    # F1: 0.8
    # test2.txt:
    # some text
    # some text
    # some text
    # Validation Results
    # Accuracy: 0.1
    # Precision: 0.2
    # Recall: 0.3
    # F1: 0.4

    # now write the functionality for that specification and example:
    # use glob to find the files
    files = glob.glob(os.path.join(path, match))
    # create a dictionary to store the results
    results = {}
    # iterate over the files
    for file in files:
        # open the file
        with open(file, 'r') as f:
            # read the content
            content = f.read()
            # find the last occurence of the validation_results_indicator
            validation_results_index = content.rfind(validation_results_indicator)
            # iterate over the patterns
            for i, pattern in enumerate(patterns):
                # find the first occurence of the pattern after the validation_results_indicator
                pattern_index = content.find(pattern, validation_results_index)
                # extract the float value after the pattern, use regex to extract the float value
                value = float(re.findall(r'\d+\.\d+', content[pattern_index:])[0])
                # store the value in the results dictionary
                results.setdefault(file, []).append(value)
    return results




if __name__ == '__main__':
    main('experiment_1', 'out.txt', 'Validation score:', ['AUC:'])