Untitled

 avatar
unknown
plain_text
22 days ago
26 kB
0
Indexable
# Operating system libraries
import os
from pprint import pprint
import random
from functools import wraps

# Numerical and math libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Deep Learning libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Machine learning libraries
from sklearn.model_selection import KFold
#from sklearn.metrics import make_scorer
#from sklearn import set_config

enable_torch_cuda = True
enable_sklearn_cuda = True # Enable cuML for sklearn if available

if enable_torch_cuda and torch.cuda.is_available():
    print("Detected and enable CUDA support in PyTorch")
    torch.set_default_device('cuda')

if enable_sklearn_cuda:
    try:
        from cuml.svm import SVR as SVR
        from cuml.svm import LinearSVR as LinearSVR # Import LinearSVR from cuML
        from cuml.ensemble import RandomForestRegressor as MultiOutputRegressor
        print("Detected and enabled CUDA support in cuML (sklearn-cuda)")
    except ImportError:
        print("cuML (sklearn-cuda) not found or not installed with GPU support. Falling back to CPU-based sklearn.")
        from sklearn.svm import SVR as SVR
        from sklearn.svm import LinearSVR as LinearSVR # Import LinearSVR from sklearn
        from sklearn.multioutput import MultiOutputRegressor
else:
    from sklearn.svm import SVR as SVR
    from sklearn.svm import LinearSVR as LinearSVR # Import LinearSVR from sklearn
    from sklearn.multioutput import MultiOutputRegressor

# Parameters for every model
param_grid_base = {
    'modeltype': ['nn', 'svr', 'linearsvr'], # Add 'linearsvr'
}

# Parameters for NN models
param_grid_nn = {
    **param_grid_base,
    'hidden_size': np.linspace(50, 300, 15, dtype=int).tolist(),
    'learning_rate': np.logspace(-4, -1, num=10).tolist(),
    'weight_decay': np.logspace(-3.5, -2, 5).tolist(),
    'num_epochs': np.linspace(15, 1000, 10, dtype=int).tolist(),
    'batch_size': [250], # np.logspace(np.log10(1), np.log10(250 + 1), 5, dtype=int).tolist(), ## too slow?
    'dropout_prob': np.logspace(-3, -0.1, 3).tolist(),
    'patience': np.linspace(2, 10, 8).tolist(),
    'threshold': np.logspace(-5, -3, num=8).tolist(),
}

# Parameters for SVR models
param_grid_svr = {
    **param_grid_base,
    'kernel': {'linear': 0.4, 'rbf': 0.6 },
    'C': np.logspace(-2, 3, 6).tolist(),
    'epsilon': [0.01, 0.1, 0.5, 1, 5],
    'gamma': {'scale':0.30, 'auto':0.10, 0.01:0.15, 0.1:0.15, 1.0:0.15, 10.0:0.15}
}

# Parameters for LinearSVR models
param_grid_linearsvr = { # Define parameters for LinearSVR
    **param_grid_base,
    'C': np.logspace(-2, 3, 6).tolist(),
    'epsilon': [0.01, 0.1, 0.5, 1, 5],
    'loss': ['epsilon_insensitive', 'squared_epsilon_insensitive'] # Example parameters for LinearSVR
}


# Parameters for all the possible models
param_grid = {**param_grid_nn, **param_grid_svr, **param_grid_linearsvr} # Include param_grid_linearsvr

# Meta-parameters for sampling and cross validation
n_samples = 10
outer_kfold = 10
inner_kfold = 5
outputDir = "plots"
randomSeed = 42

# Set the seed for reproducibility
random.seed(randomSeed)

# Get the number of available CPU cores
num_threads = os.cpu_count()
if (num_threads > 1):
    print(f"Detected and enable multithread support with {num_threads} threads")

# Set the number of threads for CPU operations
torch.set_num_threads(num_threads)
os.environ["OMP_NUM_THREADS"] = str(num_threads)
os.environ["MKL_NUM_THREADS"] = str(num_threads)
torch.set_num_interop_threads(num_threads)

# Set additional environment variables for sklearn
os.environ["JOBLIB_NUM_CPU_THREADS"] = str(-1)
os.environ["LOOPY_BACKEND"] = "loky"

def set_n_jobs_default(cls):
    """Decorator to set n_jobs=-1 by default for scikit-learn estimators."""
    original_init = cls.__init__

    @wraps(original_init)
    def new_init(self, *args, **kwargs):
        if 'n_jobs' not in kwargs:
            kwargs['n_jobs'] = -1
        original_init(self, *args, **kwargs)

    cls.__init__ = new_init
    return cls

MultiOutputRegressor = set_n_jobs_default(MultiOutputRegressor)

# Set environment to suppress warnings
#os.environ["PYTHONWARNINGS"] = "ignore:You are using torch.load with weights_only=False"

# Read the data
data = pd.read_csv("ML-CUP24-TR.csv", comment='#', header=None)

# All columns except the last 3 and the first are input data
X_raw = data.iloc[:, 1:-3].values

# The last 3 columns are the desired outputs
y_raw = data.iloc[:, -3:].values

# Normalize data
def normalize_data(inputs):
    return inputs
    mean = np.mean(inputs, axis=0)
    std = np.std(inputs, axis=0)
    return (inputs - mean) / std

# Convert the data to NumPY tensors and PyTorch tensors
X_np = normalize_data(X_raw).astype(np.float32)
y_np = y_raw.astype(np.float32)
X_torch = torch.tensor(normalize_data(X_raw), dtype=torch.float32)
y_torch = torch.tensor(y_raw, dtype=torch.float32)

# Define hyperparameters
input_size = X_torch.shape[1]
output_size = 3

# Define a simple neural network
class NeuralNet(nn.Module):
    def __init__(self, hidden_size, dropout_prob):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Define the Mean Euclidean Error function (MEE)
def mean_euclidean_error(y_true, y_pred):
    return np.mean(np.sqrt(np.sum((y_true - y_pred) ** 2, axis=1)))

# Create a custom scorer (a negative MEE)
#neg_mee_scorer = make_scorer(mean_euclidean_error, greater_is_better=False)

# Outer K-fold for testing purposes
testFold = KFold(n_splits=outer_kfold, shuffle=True, random_state=randomSeed)

# Track MEE scores for each testing fold
testFold_best_train_mees = []
testFold_best_val_mees = []
testFold_best_parameters = []
testFold_best_model = []

for fold, (tval_index, test_index) in enumerate(testFold.split(X_np)):

    tval_X_np, test_X_np = X_np[tval_index], X_np[test_index]
    tval_y_np, test_y_np = y_np[tval_index], y_np[test_index]
    tval_X_torch, test_X_torch = X_torch[tval_index], X_torch[test_index]
    tval_y_torch, test_y_torch = y_torch[tval_index], y_torch[test_index]

    # Track MEE scores for this random search
    randomSearch_avg_train_mees = []
    randomSearch_avg_val_mees = []
    randomSearch_parameters = []

    def train_model(
            train_X_torch, train_X_np,
            train_y_torch, train_y_np,
            val_X_torch, val_X_np,
            val_y_torch, val_y_np,
            params):

        if (params[0] == 'nn'):

            (modeltype,hidden_size,learning_rate,weight_decay,num_epochs,batch_size,dropout_prob,patience,threshold) = params

            model = NeuralNet(hidden_size, dropout_prob)
            criterion = nn.MSELoss()
            optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

            # Move model to CUDA if available
            if enable_torch_cuda and torch.cuda.is_available():
                model = model.cuda()

            # Split the training data into batches
            def batchify(data, batch_size):
                return [data[i:i + batch_size] for i in range(0, len(data), batch_size)]

            train_X_batches = batchify(train_X_torch, batch_size)
            train_y_batches = batchify(train_y_torch, batch_size)
            n_batches = len(train_X_batches)

            # Track MEE for each epoch
            epoch_train_mees = []
            epoch_val_mees = []

            for epoch in range(num_epochs):
                model.train()

                train_mee = 0.0
                for i in range(0,n_batches):

                    batch_X = train_X_batches[i]
                    batch_y = train_y_batches[i]

                    optimizer.zero_grad()
                    outputs = model(batch_X)
                    loss = criterion(outputs, batch_y)
                    loss.backward()
                    optimizer.step()

                    # Calculate MEE for the batch
                    train_mee += mean_euclidean_error(batch_y.detach().cpu().numpy(), outputs.detach().cpu().numpy())

                # Average MEE over all batches
                train_mee /= n_batches
                epoch_train_mees.append(train_mee)

                model.eval()
                with torch.no_grad():
                    epoch_val_mees.append(mean_euclidean_error(val_y_np, model(val_X_torch).detach().cpu().numpy()))

            return model, epoch_train_mees[-1], epoch_val_mees[-1], epoch_train_mees, epoch_val_mees

        elif (params[0] == 'svr'):

            (modeltype,kernel,C,epsilon,gamma) = params

            svr = MultiOutputRegressor(SVR(kernel=kernel,C=C,epsilon=epsilon,gamma=gamma))
            svr.fit(train_X_np,train_y_np)
            train_mees = mean_euclidean_error(train_y_np,svr.predict(train_X_np))
            val_mees = mean_euclidean_error(val_y_np,svr.predict(val_X_np))

            return svr, train_mees, val_mees, [], []

        elif (params[0] == 'linearsvr'): # Handle LinearSVR

            (modeltype, C, epsilon, loss) = params # Adjust parameters as needed

            linearsvr = MultiOutputRegressor(LinearSVR(C=C, epsilon=epsilon, loss=loss)) # Use LinearSVR
            linearsvr.fit(train_X_np,train_y_np)
            train_mees = mean_euclidean_error(train_y_np, linearsvr.predict(train_X_np))
            val_mees = mean_euclidean_error(val_y_np, linearsvr.predict(val_X_np))

            return linearsvr, train_mees, val_mees, [], []

        else:
            os.abort()

    # Search over a random sample of the parameter combinations
    for i in range(0,n_samples):

        def rchoice(pg):
            return random.choice(pg)

        modeltype = rchoice(param_grid['modeltype'])

        if (modeltype == 'nn'):
            hidden_size = rchoice(param_grid['hidden_size'])
            learning_rate = rchoice(param_grid['learning_rate'])
            weight_decay = rchoice(param_grid['weight_decay'])
            num_epochs = rchoice(param_grid['num_epochs'])
            batch_size = rchoice(param_grid['batch_size'])
            dropout_prob = rchoice(param_grid['dropout_prob'])
            patience = rchoice(param_grid['patience'])
            threshold = rchoice(param_grid['threshold'])
            # Put all the random parameters into a tuple
            params = (modeltype,hidden_size,learning_rate,weight_decay,num_epochs,batch_size,dropout_prob,patience,threshold)

        def rchoicep(pg):
            return random.choices(list(pg.keys()), weights=list(pg.values()), k=1)[0]

        if (modeltype == 'svr'):
            kernel = rchoicep(param_grid['kernel'])
            C = rchoice(param_grid['C'])
            epsilon = rchoice(param_grid['epsilon'])
            gamma = rchoicep(param_grid['gamma'])
            # Put all the random parameters into a tuple
            params = (modeltype,kernel,C,epsilon,gamma)

        if (modeltype == 'linearsvr'): # Parameters for LinearSVR
            C = rchoice(param_grid['C'])
            epsilon = rchoice(param_grid['epsilon'])
            loss = rchoice(param_grid_linearsvr['loss'])
            # Put all the random parameters into a tuple
            params = (modeltype, C, epsilon, loss)


        validFold = KFold(n_splits=inner_kfold, shuffle=True, random_state=randomSeed)

        # The MEE scores for each validation fold
        validFold_train_mees = []
        validFold_val_mees = []

        # Inner K-fold for validation purposes
        for internalFold, (train_index, val_index) in enumerate(validFold.split(tval_X_np)):

            train_X_np, val_X_np = tval_X_np[train_index], tval_X_np[val_index]
            train_y_np, val_y_np = tval_y_np[train_index], tval_y_np[val_index]
            train_X_torch, val_X_torch = tval_X_torch[train_index], tval_X_torch[val_index]
            train_y_torch, val_y_torch = tval_y_torch[train_index], tval_y_torch[val_index]

            _, train_mees, val_mees, epoch_train_mees, epoch_val_mees = train_model(
                train_X_torch,train_X_np,
                train_y_torch,train_y_np,
                val_X_torch, val_X_np,
                val_y_torch, val_y_np,
                params)

            validFold_train_mees.append(train_mees)
            validFold_val_mees.append(val_mees)

        randomSearch_avg_train_mees.append(np.mean(validFold_train_mees))
        randomSearch_avg_val_mees.append(np.mean(validFold_val_mees))

        #randomSearch_avg_train_mees.append(np.sqrt(np.mean(np.pow(validFold_train_mees,2)))) # Quadratic mean
        #randomSearch_avg_val_mees.append(np.sqrt(np.mean(np.pow(validFold_val_mees,2)))) # Quadratic mean

        randomSearch_parameters.append(params)

    # Sort the random search results by validation MEE
    sorted_indices = np.argsort(randomSearch_avg_val_mees)

    randomSearch_avg_train_mees = [randomSearch_avg_train_mees[i] for i in sorted_indices]
    randomSearch_avg_val_mees = [randomSearch_avg_val_mees[i] for i in sorted_indices]
    randomSearch_parameters = [randomSearch_parameters[i] for i in sorted_indices]

    print(f"RandomSearch Best Training MEE: {randomSearch_avg_train_mees[0]}")
    print(f"RandomSearch Best Validation MEE: {randomSearch_avg_val_mees[0]}")
    print(f"RandomSearch Best Parameters: {randomSearch_parameters[0]}")

    # Create directory if it doesn't exist
    os.makedirs(f'{outputDir}/{fold}', exist_ok=True)

    # Find the smallest i such that randomSearch_parameters[i][0] == 'nn'
    try:
        nn_index = next(i for i, params in enumerate(randomSearch_parameters) if params[0] == 'nn')
    except StopIteration:
        print("No 'nn' model found in randomSearch_parameters")
        nn_index = -1 # Indicate no NN model found

    # Find the smallest i such that randomSearch_parameters[i][0] == 'svr'
    try:
        svr_index = next(i for i, params in enumerate(randomSearch_parameters) if params[0] == 'svr')
    except StopIteration:
        print("No 'svr' model found in randomSearch_parameters")
        svr_index = -1 # Indicate no SVR model found

    # Find the smallest i such that randomSearch_parameters[i][0] == 'linearsvr'
    try:
        linearsvr_index = next(i for i, params in enumerate(randomSearch_parameters) if params[0] == 'linearsvr')
    except StopIteration:
        print("No 'linearsvr' model found in randomSearch_parameters")
        linearsvr_index = -1 # Indicate no LinearSVR model found


    if nn_index != -1:
        # Our best NN model
        nn_model, nn_train_mees, nn_val_mees, nn_epoch_train_mees, nn_epoch_val_mees = train_model(
            tval_X_torch, tval_X_np,
            tval_y_torch, tval_y_np,
            test_X_torch, test_X_np,
            test_y_torch, test_y_np,
            randomSearch_parameters[nn_index])

        # Print results for our best NN model
        print(f"Best refit NN Training MEE: {nn_train_mees}")
        print(f"Best refit NN Testing MEE: {nn_val_mees}")

        # Plot learning curve for our best NN model
        plt.figure(figsize=(10, 6))
        plt.plot(range(1, len(nn_epoch_train_mees) + 1), nn_epoch_train_mees, label='Train MEE')
        plt.plot(range(1, len(nn_epoch_val_mees) + 1), nn_epoch_val_mees, label='Test MEE', linestyle='--')
        plt.xlabel('Epoch')
        plt.ylabel('MEE')
        plt.title('NN Learning Curve - Best Model')
        plt.legend()
        plt.grid()
        # Save the plot to a file
        plt.savefig(f'{outputDir}/{fold}/nn_learning_curve.png')
        #plt.show()
    else:
        nn_train_mees = float('inf') # Assign infinity if no NN model
        nn_val_mees = float('inf')

    if svr_index != -1:
        # Our best SVR model
        svr_model, svr_train_mees, svr_val_mees, svr_epoch_train_mees, svr_epoch_val_mees = train_model(
            tval_X_torch, tval_X_np,
            tval_y_torch, tval_y_np,
            test_X_torch, test_X_np,
            test_y_torch, test_y_np,
            randomSearch_parameters[svr_index])

        # Print results for our best SVR model
        print(f"Best refit SVR Training MEE: {svr_train_mees}")
        print(f"Best refit SVR Testing MEE: {svr_val_mees}")
    else:
        svr_train_mees = float('inf') # Assign infinity if no SVR model
        svr_val_mees = float('inf')

    if linearsvr_index != -1:
        # Our best LinearSVR model
        linearsvr_model, linearsvr_train_mees, linearsvr_val_mees, linearsvr_epoch_train_mees, linearsvr_epoch_val_mees = train_model(
            tval_X_torch, tval_X_np,
            tval_y_torch, tval_y_np,
            test_X_torch, test_X_np,
            test_y_torch, test_y_np,
            randomSearch_parameters[linearsvr_index])

        # Print results for our best LinearSVR model
        print(f"Best refit LinearSVR Training MEE: {linearsvr_train_mees}")
        print(f"Best refit LinearSVR Testing MEE: {linearsvr_val_mees}")
    else:
        linearsvr_train_mees = float('inf') # Assign infinity if no LinearSVR model
        linearsvr_val_mees = float('inf')


    df = pd.DataFrame({
        'avg_train_mee': randomSearch_avg_train_mees,
        'avg_val_mee': randomSearch_avg_val_mees,
        'parameters': randomSearch_parameters,
    })

    def unpackParams(key,x):
        if (x[0] == 'nn' and key in param_grid_nn):
            return x[list(param_grid_nn).index(key)]
        elif (x[0] == 'svr' and key in param_grid_svr):
            return x[list(param_grid_svr).index(key)]
        elif (x[0] == 'linearsvr' and key in param_grid_linearsvr):
            return x[list(param_grid_linearsvr).index(key)]
        else:
            return None

    for i, key in enumerate(param_grid.keys()):
        df[key] = df['parameters'].apply(lambda x: unpackParams(key,x))

    df.drop('parameters', axis=1, inplace=True)

    # Print the DataFrame to a file
    df.to_csv(f'{outputDir}/{fold}/results.csv', index=False)

    # Select rows in the dataframe where 'modeltype' is 'nn'
    nn_df = df[df['modeltype'] == 'nn']

    # Plot statistics for each NN parameter
    for column in param_grid_nn.keys():

        if column == 'modeltype' or column == 'gamma':
            continue; # we have nothing to do with modeltype and we don't know how to handle mixed data

        # Calculate the average of avg_val_mee for each unique value in the column
        average_values = nn_df.groupby(column)['avg_val_mee'].mean().reset_index()
        #pprint(average_values)

        # Plotting
        plt.figure(figsize=(8, 5))
        plt.plot(average_values[column], average_values['avg_val_mee'], marker='o', linestyle='-', color='blue')

        # Add labels and title
        plt.xlabel(column)

        if pd.api.types.is_numeric_dtype(nn_df[column]) and (max(average_values[column])/min(average_values[column]) > 20):
            plt.xscale('log')

        plt.ylabel('Average Mean Test Error')
        plt.title(f'Effect of NN {column} on Average Mean Test Error')
        #plt.legend()
        #plt.grid(True)

        # Save the plot to a file
        plt.savefig(f'{outputDir}/{fold}/nn_{column}.png')
        #plt.show()

    # Select rows in the dataframe where 'modeltype' is 'svr'
    nn_svr = df[df['modeltype'] == 'svr']

    # Plot statistics for each SVR parameter
    for column in param_grid_svr.keys():

        if column == 'modeltype' or column == 'gamma':
            continue; # we have nothing to do with modeltype and we don't know how to handle mixed data

        # Calculate the average of avg_val_mee for each unique value in the column
        average_values = nn_svr.groupby(column)['avg_val_mee'].mean().reset_index()
        #pprint(average_values)

        # Plotting
        plt.figure(figsize=(8, 5))
        plt.plot(average_values[column], average_values['avg_val_mee'], marker='o', linestyle='-', color='blue')

        # Add labels and title
        plt.xlabel(column)

        if pd.api.types.is_numeric_dtype(nn_svr[column]) and (max(average_values[column])/min(average_values[column]) > 20):
            plt.xscale('log')

        plt.ylabel('Average Mean Test Error')
        plt.title(f'Effect of SVR {column} on Average Mean Test Error')
        #plt.legend()
        #plt.grid(True)

        # Save the plot to a file
        plt.savefig(f'{outputDir}/{fold}/svr_{column}.png')
        #plt.show()

    # Select rows in the dataframe where 'modeltype' is 'linearsvr'
    nn_linearsvr = df[df['modeltype'] == 'linearsvr']

    # Plot statistics for each LinearSVR parameter
    for column in param_grid_linearsvr.keys():

        if column == 'modeltype': # or column == 'gamma': LinearSVR doesn't have gamma
            continue; # we have nothing to do with modeltype and we don't know how to handle mixed data

        # Calculate the average of avg_val_mee for each unique value in the column
        average_values = nn_linearsvr.groupby(column)['avg_val_mee'].mean().reset_index()
        #pprint(average_values)

        # Plotting
        plt.figure(figsize=(8, 5))
        plt.plot(average_values[column], average_values['avg_val_mee'], marker='o', linestyle='-', color='blue')

        # Add labels and title
        plt.xlabel(column)

        if pd.api.types.is_numeric_dtype(nn_linearsvr[column]) and (max(average_values[column])/min(average_values[column]) > 20):
            plt.xscale('log')

        plt.ylabel('Average Mean Test Error')
        plt.title(f'Effect of LinearSVR {column} on Average Mean Test Error')
        #plt.legend()
        #plt.grid(True)

        # Save the plot to a file
        plt.savefig(f'{outputDir}/{fold}/linearsvr_{column}.png')
        #plt.show()


    best_model_index = np.argmin([nn_val_mees, svr_val_mees, linearsvr_val_mees]) # Find the index of the best model
    best_val_mees_list = [nn_val_mees, svr_val_mees, linearsvr_val_mees]
    best_train_mees_list = [nn_train_mees, svr_train_mees, linearsvr_train_mees]
    model_types = ['nn', 'svr', 'linearsvr']

    if best_model_index == 0 and nn_index != -1:
        testFold_best_parameters.append(randomSearch_parameters[nn_index])
        testFold_best_train_mees.append(nn_train_mees)
        testFold_best_val_mees.append(nn_val_mees)
        testFold_best_model.append(nn_model)
    elif best_model_index == 1 and svr_index != -1:
        testFold_best_parameters.append(randomSearch_parameters[svr_index])
        testFold_best_train_mees.append(svr_train_mees)
        testFold_best_val_mees.append(svr_val_mees)
        testFold_best_model.append(svr_model)
    elif best_model_index == 2 and linearsvr_index != -1:
        testFold_best_parameters.append(randomSearch_parameters[linearsvr_index])
        testFold_best_train_mees.append(linearsvr_train_mees)
        testFold_best_val_mees.append(linearsvr_val_mees)
        testFold_best_model.append(linearsvr_model)
    else: # Fallback to SVR if something went wrong and no better model is found.
        if svr_index != -1:
            testFold_best_parameters.append(randomSearch_parameters[svr_index])
            testFold_best_train_mees.append(svr_train_mees)
            testFold_best_val_mees.append(svr_val_mees)
            testFold_best_model.append(svr_model)
        elif nn_index != -1:
            testFold_best_parameters.append(randomSearch_parameters[nn_index])
            testFold_best_train_mees.append(nn_train_mees)
            testFold_best_val_mees.append(nn_val_mees)
            testFold_best_model.append(nn_model)
        elif linearsvr_index != -1:
            testFold_best_parameters.append(randomSearch_parameters[linearsvr_index])
            testFold_best_train_mees.append(linearsvr_train_mees)
            testFold_best_val_mees.append(linearsvr_val_mees)
            testFold_best_model.append(linearsvr_model)
        else:
            print("No model found in this fold!")
            continue # skip to the next fold if no model was trained in this iteration.


# Sort the best models by validation MEE
sorted_indices = np.argsort(testFold_best_val_mees)

testFold_best_train_mees = [testFold_best_train_mees[i] for i in sorted_indices]
testFold_best_val_mees = [testFold_best_val_mees[i] for i in sorted_indices]
testFold_best_parameters = [testFold_best_parameters[i] for i in sorted_indices]
testFold_best_model = [testFold_best_model[i] for i in sorted_indices]

# Print results of the test fold
df = pd.DataFrame({
    'testFold_best_train_mees': testFold_best_train_mees,
    'testFold_best_val_mees': testFold_best_val_mees,
    'testFold_best_parameters': testFold_best_parameters,
})

# Print the DataFrame to a file
df.to_csv(f'{outputDir}/results.csv', index=False)

# Evaluate the best model
def bestModelEval(X):
    bestModelType = testFold_best_parameters[0][0]
    bestModel = testFold_best_model[0]
    if (bestModelType == 'nn'):
        return bestModel(X)
    elif (bestModelType == 'svr'):
        return bestModel(X)
    elif (bestModelType == 'linearsvr'):
        return bestModel(X)
    else:
        print("Unknwon model type?")
        os.abort()
Editor is loading...
Leave a Comment