Untitled
unknown
python
2 years ago
3.2 kB
9
Indexable
%%time
# Инициализация модели
model = CatBoostClassifier(iterations = 1088,
max_depth = 8,
#bagging_temperature = 1.1, # 0.029831
learning_rate = 0.03,
#l2_leaf_reg = 1,
#bootstrap_type = 'Poisson', # Bayesian, Bernoulli, Poisson, MVS
#subsample = 5,
#grow_policy = 'SymmetricTree',# Lossguide SymmetricTree Depthwise
#min_data_in_leaf = 4,
#max_leaves = 22,
#leaf_estimation_backtracking = 'No', # 'Armijo' 'AnyImprovement'
#eval_metric = 'AUC',
#loss_function='Logloss',
cat_features = categorical_features,
#border_count = 254, # The number of splits for numerical features.
random_strength = 10,
random_seed = RANDOM_STATE,
auto_class_weights = 'None', #'SqrtBalanced'
# eval_metric = 'AUC',
task_type = 'GPU',
devices = '0:1',
metric_period = 10,
verbose=100)
# Инициализация KFold Cross-Validation
n_splits = 5
kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_STATE)
#kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_STATE)
roc_auc_scores = []
# Обучение с KFold Cross-Validation
for split, (train_idx, val_idx) in enumerate(kf.split(X_train_, y_train_)):
print(f'\nTraining on split {split+1}/{n_splits}, Years: {sorted(X_train_.iloc[train_idx].year.unique())}')
X_train, X_val = X_train_.iloc[train_idx], X_train_.iloc[val_idx]
y_train, y_val = y_train_.iloc[train_idx], y_train_.iloc[val_idx]
#----------------Нормализация--------------------#
scaler = StandardScaler()
X_train.loc[:, numerical_features] = scaler.fit_transform(X_train[numerical_features])
X_val.loc[:, numerical_features] = scaler.transform(X_val[numerical_features])
#------------------------------------------------#
model.fit(X_train, y_train, eval_set=(X_val, y_val), early_stopping_rounds=20, verbose=100)
# Предсказания на валидационном фолде
y_pred = model.predict_proba(X_val)[:, 1]
# Подсчёт ROC-AUC
roc_auc = roc_auc_score(y_val, y_pred)
roc_auc_scores.append(roc_auc)
print(f'Split {split+1}, ROC-AUC : {roc_auc}, GINI : {round((2*roc_auc) - 1, 4)}')
# Вывод на экран среднего значения ROC-AUC по всем фолдам
rauc = sum(roc_auc_scores) / n_splits
# Подсчёт GINI на тестовом наборе
print("Average ROC AUC:", rauc, "Average GINI:", rauc*2-1, 'Test Set GINI:')
Editor is loading...