Untitled
unknown
plain_text
4 months ago
3.1 kB
3
Indexable
import pandas as pd from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.multioutput import MultiOutputRegressor from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor from xgboost import XGBRegressor from lightgbm import LGBMRegressor from catboost import CatBoostRegressor # Common hyperparameters n_estimators = [50, 100] learning_rates = [0.01, 0.1] max_depths = [3, 5, 10, None] # Define models and parameter grids models_params = { "Random Forest": (MultiOutputRegressor(RandomForestRegressor(random_state=42)), {"estimator__n_estimators": n_estimators, "estimator__max_depth": max_depths}), "Gradient Boosting": (MultiOutputRegressor(GradientBoostingRegressor(random_state=42)), {"estimator__n_estimators": n_estimators, "estimator__learning_rate": learning_rates}), "XGBoost": (MultiOutputRegressor(XGBRegressor(random_state=42, verbosity=0)), {"estimator__n_estimators": n_estimators, "estimator__max_depth": max_depths}), "LightGBM": (MultiOutputRegressor(LGBMRegressor(random_state=42)), {"estimator__n_estimators": n_estimators, "estimator__learning_rate": learning_rates}), "CatBoost": (MultiOutputRegressor(CatBoostRegressor(random_state=42, verbose=0)), {"estimator__iterations": n_estimators, "estimator__learning_rate": learning_rates}) } def evaluate_models_with_grid_search(X, y): # Split data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) metrics = [] # Train and evaluate each model using GridSearchCV for name, (model, params) in models_params.items(): grid_search = GridSearchCV(model, params, cv=3, scoring='neg_mean_squared_error', n_jobs=-1) grid_search.fit(X_train, y_train) best_model = grid_search.best_estimator_ y_pred = best_model.predict(X_test) # Calculate metrics mse = mean_squared_error(y_test, y_pred, multioutput='uniform_average') mae = mean_absolute_error(y_test, y_pred, multioutput='uniform_average') r2 = r2_score(y_test, y_pred, multioutput='uniform_average') metrics.append({"Model": name, "MSE": mse, "MAE": mae, "R2": r2}) # Return metrics as a DataFrame metrics_df = pd.DataFrame(metrics) print(metrics_df) return metrics_df # Example usage # Replace `modelling_data` with your dataset X = modelling_data.drop(['strain', 'stress', 'Material', 'TestMethod', 'Grade Name', 'Filler Type', 'Parent Identifier', 'Instance Number', 'strain_at_break', 'stress_at_break', 'E', 'a', 'b', 'c', 'd', 'f', 'Base Resin1', 'Base Resin2', 'Reinforce Fiber-Glass', 'base resin 3'], axis=1) y = modelling_data[['E', 'a']] metrics_df = evaluate_models_with_grid_search(X, y)
Editor is loading...
Leave a Comment