Hyperparameters Optimization using hyperopt
unknown
python
4 years ago
3.6 kB
2
Indexable
def walk_forward_val_mean_score(input_model): metric_results = [] for train_idx, test_idx in folds: X_train, X_test = X.loc[train_idx], X.loc[test_idx] y_train, y_test = y.loc[train_idx], y.loc[test_idx] y_true = y_test pipeline = Pipeline(steps=[ ('model', input_model)]) model = pipeline fit_info = pipeline.fit(X_train, y_train) y_pred = pipeline.predict(X_test) metric_results.append(mean_absolute_error(y_true, y_pred)) return np.mean(metric_results) def scaled_walk_forward_val_mean_score(input_model): metric_results = [] for train_idx, test_idx in folds: X_train, X_test = X.loc[train_idx], X.loc[test_idx] y_train, y_test = y.loc[train_idx], y.loc[test_idx] y_true = y_test num_pipeline = Pipeline([ ('minmax_scaler', MinMaxScaler()) ]) preprocessor = ColumnTransformer([ ('numerical', num_pipeline, exp_X_columns) ], remainder='passthrough') pipeline = Pipeline(steps=[ ('preprocessor', preprocessor), ('model', input_model)]) model = pipeline fit_info = pipeline.fit(X_train, y_train) y_pred = pipeline.predict(X_test) metric_results.append(mean_absolute_error(y_true, y_pred)) return np.mean(metric_results) def walk_forward_sklearn(estim): metric_results = [] for train_idx, test_idx in folds: X_train, X_test = X.loc[train_idx], X.loc[test_idx] y_train, y_test = y.loc[train_idx], y.loc[test_idx] y_true = y_test estim.fit(X_train, y_train) print(estim.score(X_test, y_test)) print(estim.best_model()) def _random_state(name, random_state): if random_state is None: return hp.randint(name, 5) else: return random_state space = { 'max_depth': scope.int(hp.uniform('max_depth', 1, 11)), 'num_leaves': scope.int(hp.uniform('num_leaves', 2, 121)), 'learning_rate': hp.loguniform('learning_rate', np.log(0.0001), np.log(0.5)) - 0.0001, 'n_estimators': scope.int(hp.quniform('n_estimators', 100, 6000, 200)), #'gamma': hp.loguniform('gamma', np.log(0.0001), np.log(5)) - 0.0001, 'min_child_weight': scope.int(hp.loguniform('min_child_weight', np.log(1), np.log(100))), 'subsample': hp.uniform('subsample', 0.5, 1), 'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1), #'colsample_bylevel': hp.uniform('colsample_bylevel', 0.5, 1), 'reg_alpha': hp.loguniform('reg_alpha', np.log(0.0001), np.log(1)) - 0.0001, 'reg_lambda': hp.loguniform('reg_lambda', np.log(1), np.log(4)), #'boosting_type': hp.choice(name, ['gbdt', 'dart', 'goss']), 'boosting_type': hp.choice('boosting_type', ['gbdt']), 'seed': hp.randint('seed', 5) } folds, train_seasons, test_seasons = sscv.split(train_size=2, test_size=1) default_hiperparameters = { 'objective': 'regression', #'random_state': 0 } def objective(space): hiperparameters = {**default_hiperparameters, **space} model = lgb.LGBMRegressor( **hiperparameters ) accuracy = walk_forward_val_mean_score(model) return {'loss': (-100 + accuracy), 'status': STATUS_OK } #spark_trials = SparkTrials() trials = Trials() best = fmin( fn=objective, space=space, algo=tpe.suggest, max_evals=150, #trials=spark_trials, trials=trials, rstate=np.random.RandomState(seed=0), return_argmin=False ) best
Editor is loading...