def walk_forward_val_mean_score(input_model):
metric_results = []
for train_idx, test_idx in folds:
X_train, X_test = X.loc[train_idx], X.loc[test_idx]
y_train, y_test = y.loc[train_idx], y.loc[test_idx]
y_true = y_test
pipeline = Pipeline(steps=[
('model', input_model)])
model = pipeline
fit_info = pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
metric_results.append(mean_absolute_error(y_true, y_pred))
return np.mean(metric_results)
def scaled_walk_forward_val_mean_score(input_model):
metric_results = []
for train_idx, test_idx in folds:
X_train, X_test = X.loc[train_idx], X.loc[test_idx]
y_train, y_test = y.loc[train_idx], y.loc[test_idx]
y_true = y_test
num_pipeline = Pipeline([
('minmax_scaler', MinMaxScaler())
])
preprocessor = ColumnTransformer([
('numerical', num_pipeline, exp_X_columns)
], remainder='passthrough')
pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('model', input_model)])
model = pipeline
fit_info = pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
metric_results.append(mean_absolute_error(y_true, y_pred))
return np.mean(metric_results)
def walk_forward_sklearn(estim):
metric_results = []
for train_idx, test_idx in folds:
X_train, X_test = X.loc[train_idx], X.loc[test_idx]
y_train, y_test = y.loc[train_idx], y.loc[test_idx]
y_true = y_test
estim.fit(X_train, y_train)
print(estim.score(X_test, y_test))
print(estim.best_model())
def _random_state(name, random_state):
if random_state is None:
return hp.randint(name, 5)
else:
return random_state
space = {
'max_depth': scope.int(hp.uniform('max_depth', 1, 11)),
'num_leaves': scope.int(hp.uniform('num_leaves', 2, 121)),
'learning_rate': hp.loguniform('learning_rate', np.log(0.0001), np.log(0.5)) - 0.0001,
'n_estimators': scope.int(hp.quniform('n_estimators', 100, 6000, 200)),
#'gamma': hp.loguniform('gamma', np.log(0.0001), np.log(5)) - 0.0001,
'min_child_weight': scope.int(hp.loguniform('min_child_weight', np.log(1), np.log(100))),
'subsample': hp.uniform('subsample', 0.5, 1),
'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
#'colsample_bylevel': hp.uniform('colsample_bylevel', 0.5, 1),
'reg_alpha': hp.loguniform('reg_alpha', np.log(0.0001), np.log(1)) - 0.0001,
'reg_lambda': hp.loguniform('reg_lambda', np.log(1), np.log(4)),
#'boosting_type': hp.choice(name, ['gbdt', 'dart', 'goss']),
'boosting_type': hp.choice('boosting_type', ['gbdt']),
'seed': hp.randint('seed', 5)
}
folds, train_seasons, test_seasons = sscv.split(train_size=2, test_size=1)
default_hiperparameters = {
'objective': 'regression',
#'random_state': 0
}
def objective(space):
hiperparameters = {**default_hiperparameters, **space}
model = lgb.LGBMRegressor(
**hiperparameters
)
accuracy = walk_forward_val_mean_score(model)
return {'loss': (-100 + accuracy), 'status': STATUS_OK }
#spark_trials = SparkTrials()
trials = Trials()
best = fmin(
fn=objective,
space=space,
algo=tpe.suggest,
max_evals=150,
#trials=spark_trials,
trials=trials,
rstate=np.random.RandomState(seed=0),
return_argmin=False
)
best