Untitled
unknown
plain_text
2 years ago
2.8 kB
3
Indexable
import glob import os import re from typing import List # define main function, write if name == main, and call main function: def main(path: str, match: str, validation_results_indicator: str, patterns: List[str]): # load all txt files in path with names that match the match string # for each file find the last occurence of the validation_results_indicator # and extract the first occurence of each pattern in the patterns list after the validation_results_indicator # store the float value after the pattern # return a dictionary with the file name as key and the list of extracted values as value, list indices correspond to the pattern list # use regex to extract the values and to match the file names # use glob module to find the files # example: # path = 'C:\\Users\\user\\Desktop\\test' # match = 'test*.txt' # validation_results_indicator = 'Validation Results' # patterns = ['Accuracy', 'Precision', 'Recall', 'F1'] # main(path, match, validation_results_indicator, patterns) # {'test1.txt': [0.5, 0.6, 0.7, 0.8], 'test2.txt': [0.1, 0.2, 0.3, 0.4]} # test files content for the example: # test1.txt: # some text # some text # some text # Validation Results # Accuracy: 0.5 # Precision: 0.6 # Recall: 0.7 # F1: 0.8 # test2.txt: # some text # some text # some text # Validation Results # Accuracy: 0.1 # Precision: 0.2 # Recall: 0.3 # F1: 0.4 # now write the functionality for that specification and example: # use glob to find the files files = glob.glob(os.path.join(path, match)) # create a dictionary to store the results results = {} # iterate over the files for file in files: # open the file with open(file, 'r') as f: # read the content content = f.read() # find the last occurence of the validation_results_indicator validation_results_index = content.rfind(validation_results_indicator) # iterate over the patterns for i, pattern in enumerate(patterns): # find the first occurence of the pattern after the validation_results_indicator pattern_index = content.find(pattern, validation_results_index) # extract the float value after the pattern, use regex to extract the float value value = float(re.findall(r'\d+\.\d+', content[pattern_index:])[0]) # store the value in the results dictionary results.setdefault(file, []).append(value) return results if __name__ == '__main__': main('experiment_1', 'out.txt', 'Validation score:', ['AUC:'])
Editor is loading...