Hyperparameters Optimization using hyperopt

 avatar
unknown
python
4 years ago
3.6 kB
1
Indexable
def walk_forward_val_mean_score(input_model):
    metric_results = []
    for train_idx, test_idx in folds:
        X_train, X_test = X.loc[train_idx], X.loc[test_idx]
        y_train, y_test = y.loc[train_idx], y.loc[test_idx]
        y_true = y_test
        
        pipeline = Pipeline(steps=[
                ('model', input_model)])
        
        model = pipeline
        fit_info = pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)
        metric_results.append(mean_absolute_error(y_true, y_pred)) 

    return np.mean(metric_results)  

def scaled_walk_forward_val_mean_score(input_model):
    metric_results = []
    for train_idx, test_idx in folds:
        X_train, X_test = X.loc[train_idx], X.loc[test_idx]
        y_train, y_test = y.loc[train_idx], y.loc[test_idx]
        y_true = y_test

        num_pipeline = Pipeline([
          ('minmax_scaler', MinMaxScaler())
      ])
        preprocessor = ColumnTransformer([
          ('numerical', num_pipeline, exp_X_columns)
        ], remainder='passthrough')
        
        pipeline = Pipeline(steps=[
                ('preprocessor', preprocessor),
                ('model', input_model)])
        
        model = pipeline
        fit_info = pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)
        metric_results.append(mean_absolute_error(y_true, y_pred)) 

    return np.mean(metric_results)  

def walk_forward_sklearn(estim):
    metric_results = []
    for train_idx, test_idx in folds:
        X_train, X_test = X.loc[train_idx], X.loc[test_idx]
        y_train, y_test = y.loc[train_idx], y.loc[test_idx]
        y_true = y_test
        
        estim.fit(X_train, y_train)
        print(estim.score(X_test, y_test))
        print(estim.best_model())

def _random_state(name, random_state):
    if random_state is None:
        return hp.randint(name, 5)
    else:
        return random_state


space = {
    'max_depth': scope.int(hp.uniform('max_depth', 1, 11)),
    'num_leaves': scope.int(hp.uniform('num_leaves', 2, 121)),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.0001), np.log(0.5)) - 0.0001,
    'n_estimators': scope.int(hp.quniform('n_estimators', 100, 6000, 200)),
    #'gamma': hp.loguniform('gamma', np.log(0.0001), np.log(5)) - 0.0001,
    'min_child_weight': scope.int(hp.loguniform('min_child_weight', np.log(1), np.log(100))),
    'subsample': hp.uniform('subsample', 0.5, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
    #'colsample_bylevel': hp.uniform('colsample_bylevel', 0.5, 1),
    'reg_alpha': hp.loguniform('reg_alpha', np.log(0.0001), np.log(1)) - 0.0001,
    'reg_lambda': hp.loguniform('reg_lambda', np.log(1), np.log(4)),
    #'boosting_type': hp.choice(name, ['gbdt', 'dart', 'goss']),
    'boosting_type': hp.choice('boosting_type', ['gbdt']),
    'seed': hp.randint('seed', 5)
}

folds, train_seasons, test_seasons = sscv.split(train_size=2, test_size=1)

default_hiperparameters = {
    'objective': 'regression',
    #'random_state': 0
}

def objective(space):
    hiperparameters = {**default_hiperparameters, **space}
    model = lgb.LGBMRegressor(
        **hiperparameters
    )

    accuracy = walk_forward_val_mean_score(model)

    return {'loss': (-100 + accuracy), 'status': STATUS_OK }

#spark_trials = SparkTrials()
trials = Trials()
best = fmin(
        fn=objective,
        space=space,
        algo=tpe.suggest,
        max_evals=150,
        #trials=spark_trials,
        trials=trials,
        rstate=np.random.RandomState(seed=0),
        return_argmin=False
    )
best