Untitled
python
a month ago
3.2 kB
1
Indexable
Never
%%time # Инициализация модели model = CatBoostClassifier(iterations = 1088, max_depth = 8, #bagging_temperature = 1.1, # 0.029831 learning_rate = 0.03, #l2_leaf_reg = 1, #bootstrap_type = 'Poisson', # Bayesian, Bernoulli, Poisson, MVS #subsample = 5, #grow_policy = 'SymmetricTree',# Lossguide SymmetricTree Depthwise #min_data_in_leaf = 4, #max_leaves = 22, #leaf_estimation_backtracking = 'No', # 'Armijo' 'AnyImprovement' #eval_metric = 'AUC', #loss_function='Logloss', cat_features = categorical_features, #border_count = 254, # The number of splits for numerical features. random_strength = 10, random_seed = RANDOM_STATE, auto_class_weights = 'None', #'SqrtBalanced' # eval_metric = 'AUC', task_type = 'GPU', devices = '0:1', metric_period = 10, verbose=100) # Инициализация KFold Cross-Validation n_splits = 5 kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_STATE) #kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_STATE) roc_auc_scores = [] # Обучение с KFold Cross-Validation for split, (train_idx, val_idx) in enumerate(kf.split(X_train_, y_train_)): print(f'\nTraining on split {split+1}/{n_splits}, Years: {sorted(X_train_.iloc[train_idx].year.unique())}') X_train, X_val = X_train_.iloc[train_idx], X_train_.iloc[val_idx] y_train, y_val = y_train_.iloc[train_idx], y_train_.iloc[val_idx] #----------------Нормализация--------------------# scaler = StandardScaler() X_train.loc[:, numerical_features] = scaler.fit_transform(X_train[numerical_features]) X_val.loc[:, numerical_features] = scaler.transform(X_val[numerical_features]) #------------------------------------------------# model.fit(X_train, y_train, eval_set=(X_val, y_val), early_stopping_rounds=20, verbose=100) # Предсказания на валидационном фолде y_pred = model.predict_proba(X_val)[:, 1] # Подсчёт ROC-AUC roc_auc = roc_auc_score(y_val, y_pred) roc_auc_scores.append(roc_auc) print(f'Split {split+1}, ROC-AUC : {roc_auc}, GINI : {round((2*roc_auc) - 1, 4)}') # Вывод на экран среднего значения ROC-AUC по всем фолдам rauc = sum(roc_auc_scores) / n_splits # Подсчёт GINI на тестовом наборе print("Average ROC AUC:", rauc, "Average GINI:", rauc*2-1, 'Test Set GINI:')