Untitled

mail@pastecode.io avatar
unknown
python
a year ago
3.2 kB
2
Indexable
Never
%%time


# Инициализация модели
model = CatBoostClassifier(iterations = 1088,
                           max_depth = 8,
                           #bagging_temperature = 1.1, # 0.029831
                           learning_rate = 0.03,
                           #l2_leaf_reg = 1,
                           
                           #bootstrap_type = 'Poisson', # Bayesian, Bernoulli, Poisson, MVS
                           #subsample = 5,
                           #grow_policy = 'SymmetricTree',# Lossguide SymmetricTree Depthwise
                           #min_data_in_leaf = 4,
                           #max_leaves = 22,
                           #leaf_estimation_backtracking = 'No', # 'Armijo' 'AnyImprovement'
                           #eval_metric = 'AUC',
                           #loss_function='Logloss',
                           cat_features = categorical_features,
                           #border_count = 254,                      # The number of splits for numerical features.
                           random_strength = 10,

                           random_seed = RANDOM_STATE,
                           auto_class_weights = 'None', #'SqrtBalanced'
                           # eval_metric = 'AUC',
                           
                           task_type = 'GPU',
                           devices = '0:1',
                           metric_period = 10,
                           verbose=100)

# Инициализация KFold Cross-Validation
n_splits = 5
kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_STATE)
#kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_STATE)

roc_auc_scores = [] 

# Обучение с KFold Cross-Validation 
for split, (train_idx, val_idx) in enumerate(kf.split(X_train_, y_train_)):
    print(f'\nTraining on split {split+1}/{n_splits}, Years: {sorted(X_train_.iloc[train_idx].year.unique())}')
    
    X_train, X_val = X_train_.iloc[train_idx], X_train_.iloc[val_idx]
    y_train, y_val = y_train_.iloc[train_idx], y_train_.iloc[val_idx]
    
    #----------------Нормализация--------------------#
    scaler = StandardScaler()
    X_train.loc[:, numerical_features] = scaler.fit_transform(X_train[numerical_features])
    X_val.loc[:, numerical_features] = scaler.transform(X_val[numerical_features])
    #------------------------------------------------#
    
    model.fit(X_train, y_train, eval_set=(X_val, y_val), early_stopping_rounds=20, verbose=100)
    
    # Предсказания на валидационном фолде
    y_pred = model.predict_proba(X_val)[:, 1]
    
    # Подсчёт ROC-AUC
    roc_auc = roc_auc_score(y_val, y_pred)
    roc_auc_scores.append(roc_auc)
    
    print(f'Split {split+1}, ROC-AUC : {roc_auc}, GINI : {round((2*roc_auc) - 1, 4)}')

    
# Вывод на экран среднего значения ROC-AUC по всем фолдам
rauc = sum(roc_auc_scores) / n_splits

# Подсчёт GINI на тестовом наборе
print("Average ROC AUC:", rauc, "Average GINI:", rauc*2-1, 'Test Set GINI:')