Untitled

import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

# Common hyperparameters
n_estimators = [50, 100]
learning_rates = [0.01, 0.1]
max_depths = [3, 5, 10, None]

# Define models and parameter grids
models_params = {
    "Random Forest": (MultiOutputRegressor(RandomForestRegressor(random_state=42)), 
                      {"estimator__n_estimators": n_estimators, "estimator__max_depth": max_depths}),
    "Gradient Boosting": (MultiOutputRegressor(GradientBoostingRegressor(random_state=42)), 
                          {"estimator__n_estimators": n_estimators, "estimator__learning_rate": learning_rates}),
    "XGBoost": (MultiOutputRegressor(XGBRegressor(random_state=42, verbosity=0)), 
                {"estimator__n_estimators": n_estimators, "estimator__max_depth": max_depths}),
    "LightGBM": (MultiOutputRegressor(LGBMRegressor(random_state=42)), 
                 {"estimator__n_estimators": n_estimators, "estimator__learning_rate": learning_rates}),
    "CatBoost": (MultiOutputRegressor(CatBoostRegressor(random_state=42, verbose=0)), 
                 {"estimator__iterations": n_estimators, "estimator__learning_rate": learning_rates})
}

def evaluate_models_with_grid_search(X, y):
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    metrics = []

    # Train and evaluate each model using GridSearchCV
    for name, (model, params) in models_params.items():
        grid_search = GridSearchCV(model, params, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
        grid_search.fit(X_train, y_train)
        best_model = grid_search.best_estimator_
        y_pred = best_model.predict(X_test)

        # Calculate metrics
        mse = mean_squared_error(y_test, y_pred, multioutput='uniform_average')
        mae = mean_absolute_error(y_test, y_pred, multioutput='uniform_average')
        r2 = r2_score(y_test, y_pred, multioutput='uniform_average')

        metrics.append({"Model": name, "MSE": mse, "MAE": mae, "R2": r2})

    # Return metrics as a DataFrame
    metrics_df = pd.DataFrame(metrics)
    print(metrics_df)
    return metrics_df

# Example usage
# Replace `modelling_data` with your dataset
X = modelling_data.drop(['strain', 'stress', 'Material', 'TestMethod', 'Grade Name', 
                         'Filler Type', 'Parent Identifier', 'Instance Number', 
                         'strain_at_break', 'stress_at_break', 'E', 'a', 'b', 'c', 
                         'd', 'f', 'Base Resin1', 'Base Resin2', 
                         'Reinforce Fiber-Glass', 'base resin 3'], axis=1)
y = modelling_data[['E', 'a']]

metrics_df = evaluate_models_with_grid_search(X, y)
Editor is loading...