Untitled
unknown
plain_text
a year ago
3.1 kB
4
Indexable
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
# Common hyperparameters
n_estimators = [50, 100]
learning_rates = [0.01, 0.1]
max_depths = [3, 5, 10, None]
# Define models and parameter grids
models_params = {
"Random Forest": (MultiOutputRegressor(RandomForestRegressor(random_state=42)),
{"estimator__n_estimators": n_estimators, "estimator__max_depth": max_depths}),
"Gradient Boosting": (MultiOutputRegressor(GradientBoostingRegressor(random_state=42)),
{"estimator__n_estimators": n_estimators, "estimator__learning_rate": learning_rates}),
"XGBoost": (MultiOutputRegressor(XGBRegressor(random_state=42, verbosity=0)),
{"estimator__n_estimators": n_estimators, "estimator__max_depth": max_depths}),
"LightGBM": (MultiOutputRegressor(LGBMRegressor(random_state=42)),
{"estimator__n_estimators": n_estimators, "estimator__learning_rate": learning_rates}),
"CatBoost": (MultiOutputRegressor(CatBoostRegressor(random_state=42, verbose=0)),
{"estimator__iterations": n_estimators, "estimator__learning_rate": learning_rates})
}
def evaluate_models_with_grid_search(X, y):
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
metrics = []
# Train and evaluate each model using GridSearchCV
for name, (model, params) in models_params.items():
grid_search = GridSearchCV(model, params, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
# Calculate metrics
mse = mean_squared_error(y_test, y_pred, multioutput='uniform_average')
mae = mean_absolute_error(y_test, y_pred, multioutput='uniform_average')
r2 = r2_score(y_test, y_pred, multioutput='uniform_average')
metrics.append({"Model": name, "MSE": mse, "MAE": mae, "R2": r2})
# Return metrics as a DataFrame
metrics_df = pd.DataFrame(metrics)
print(metrics_df)
return metrics_df
# Example usage
# Replace `modelling_data` with your dataset
X = modelling_data.drop(['strain', 'stress', 'Material', 'TestMethod', 'Grade Name',
'Filler Type', 'Parent Identifier', 'Instance Number',
'strain_at_break', 'stress_at_break', 'E', 'a', 'b', 'c',
'd', 'f', 'Base Resin1', 'Base Resin2',
'Reinforce Fiber-Glass', 'base resin 3'], axis=1)
y = modelling_data[['E', 'a']]
metrics_df = evaluate_models_with_grid_search(X, y)
Editor is loading...
Leave a Comment