Untitled
unknown
plain_text
2 years ago
3.8 kB
4
Indexable
from sklearn.model_selection import RepeatedStratifiedKFold from sklearn.ensemble import VotingClassifier from matplotlib import pyplot import scikitplot as skplt from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, average_precision_score from sklearn.model_selection import cross_val_predict ds_dropado = ds.drop(labels=['IND_BOM_1_2', 'IND_BOM_1_1'], axis=1) df_treino = pd.concat([treino_ina.drop(['IND_BOM_1_2','IND_BOM_1_1'], axis=1), treino_adi.drop(['IND_BOM_1_2','IND_BOM_1_1'], axis=1)]) df_validacao = pd.concat([valid_ina.drop(['IND_BOM_1_2','IND_BOM_1_1'], axis=1), valid_adi.drop(['IND_BOM_1_2','IND_BOM_1_1'], axis=1)]) df_validacao.drop(columns=['INDEX'], axis=1, inplace=True) df_teste = pd.concat([teste_ina.drop(['IND_BOM_1_2','IND_BOM_1_1'], axis=1), teste_adi.drop(['IND_BOM_1_2','IND_BOM_1_1'], axis=1)]) df_teste.drop(columns=['INDEX'], axis=1, inplace=True) def get_dataset(): X, y = df_treino, [0]*len(treino_ina)+[1]*len(treino_adi) return X, y def get_voting(): models = list() models.append(('mlp0', MLPClassifier(verbose=True, max_iter=10000, early_stopping=True, hidden_layer_sizes=(13), solver='lbfgs', learning_rate='constant', activation='logistic', learning_rate_init=0.0319077297544169)))#.fit(get_dataset()[0], get_dataset()[1]))) models.append(('mlp1', MLPClassifier(verbose=True, max_iter=10000, early_stopping=True, hidden_layer_sizes=(3,), solver='sgd', learning_rate='adaptive', activation='tanh', learning_rate_init=0.058684739035340376) ))#.fit(get_dataset()[0], get_dataset()[1]))) models.append(('mlp2', MLPClassifier(verbose=True, max_iter=10000, early_stopping=True, hidden_layer_sizes=(5,), solver='sgd', learning_rate='constant', activation='tanh', learning_rate_init=0.010432296668493837) ))#.fit(get_dataset()[0], get_dataset()[1]))) ensemble = VotingClassifier(verbose=True, estimators=models, voting='soft').fit(get_dataset()[0], get_dataset()[1]) return ensemble def get_models(): models = dict() models['mlp0'] = MLPClassifier(max_iter=10000, early_stopping=True) models['mlp1'] = MLPClassifier(max_iter=10000, early_stopping=True) models['mlp2'] = MLPClassifier(max_iter=10000, early_stopping=True) models['soft_voting'] = get_voting() return models def compute_performance_metrics_sem_plot2(y, y_pred_class, y_pred_scores, rede_trial): accuracy = accuracy_score(y, y_pred_class) recall = recall_score(y, y_pred_class) precision = precision_score(y, y_pred_class) f1 = f1_score(y, y_pred_class) performance_metrics = (accuracy, recall, precision, f1) if y_pred_scores is not None: skplt.metrics.plot_ks_statistic(y, y_pred_scores) # plt.show() y_pred_scores = y_pred_scores[:, 1] auroc = roc_auc_score(y, y_pred_scores) aupr = average_precision_score(y, y_pred_scores) performance_metrics = performance_metrics + (auroc, aupr) plt.title(label=rede_trial, y=0.9) plt.suptitle('Acurácia: {:3.3f}\nRecall: {:3.3f}\nPrecision: {:3.3f}\nF1: {:3.3f}\nAUROC: {:3.3f}\nAURP: {:3.3f}'.format(accuracy, recall, precision, f1, auroc, aupr), x=0.25, y=0.8) plt.savefig(rede_trial, dpi=100) plt.close() return performance_metrics def evaluate_model(model, X, y): cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1) scores = cross_val_predict(model, X, y, cv=cv, n_jobs=-1, method='predict') scores_proba = cross_val_predict(model, X, y, cv=cv, n_jobs=-1, method='predict_proba') compute_performance_metrics_sem_plot2(y, scores, scores_proba, 'Ensamble/Ensamble') return scores # Dataset de treino X, y = get_dataset() # Modelos de ensamble models = get_models() # Avaliar cada modelo e armazenar seus resultados results, names = list(), list()
Editor is loading...