from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import VotingClassifier
from matplotlib import pyplot
import scikitplot as skplt
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, average_precision_score
from sklearn.model_selection import cross_val_predict
ds_dropado = ds.drop(labels=['IND_BOM_1_2', 'IND_BOM_1_1'], axis=1)
df_treino = pd.concat([treino_ina.drop(['IND_BOM_1_2','IND_BOM_1_1'], axis=1), treino_adi.drop(['IND_BOM_1_2','IND_BOM_1_1'], axis=1)])
df_validacao = pd.concat([valid_ina.drop(['IND_BOM_1_2','IND_BOM_1_1'], axis=1), valid_adi.drop(['IND_BOM_1_2','IND_BOM_1_1'], axis=1)])
df_validacao.drop(columns=['INDEX'], axis=1, inplace=True)
df_teste = pd.concat([teste_ina.drop(['IND_BOM_1_2','IND_BOM_1_1'], axis=1), teste_adi.drop(['IND_BOM_1_2','IND_BOM_1_1'], axis=1)])
df_teste.drop(columns=['INDEX'], axis=1, inplace=True)
def get_dataset():
X, y = df_treino, [0]*len(treino_ina)+[1]*len(treino_adi)
return X, y
def get_voting():
models = list()
models.append(('mlp0', MLPClassifier(verbose=True, max_iter=10000, early_stopping=True, hidden_layer_sizes=(13), solver='lbfgs', learning_rate='constant', activation='logistic', learning_rate_init=0.0319077297544169)))#.fit(get_dataset()[0], get_dataset()[1])))
models.append(('mlp1', MLPClassifier(verbose=True, max_iter=10000, early_stopping=True, hidden_layer_sizes=(3,), solver='sgd', learning_rate='adaptive', activation='tanh', learning_rate_init=0.058684739035340376) ))#.fit(get_dataset()[0], get_dataset()[1])))
models.append(('mlp2', MLPClassifier(verbose=True, max_iter=10000, early_stopping=True, hidden_layer_sizes=(5,), solver='sgd', learning_rate='constant', activation='tanh', learning_rate_init=0.010432296668493837) ))#.fit(get_dataset()[0], get_dataset()[1])))
ensemble = VotingClassifier(verbose=True, estimators=models, voting='soft').fit(get_dataset()[0], get_dataset()[1])
return ensemble
def get_models():
models = dict()
models['mlp0'] = MLPClassifier(max_iter=10000, early_stopping=True)
models['mlp1'] = MLPClassifier(max_iter=10000, early_stopping=True)
models['mlp2'] = MLPClassifier(max_iter=10000, early_stopping=True)
models['soft_voting'] = get_voting()
return models
def compute_performance_metrics_sem_plot2(y, y_pred_class, y_pred_scores, rede_trial):
accuracy = accuracy_score(y, y_pred_class)
recall = recall_score(y, y_pred_class)
precision = precision_score(y, y_pred_class)
f1 = f1_score(y, y_pred_class)
performance_metrics = (accuracy, recall, precision, f1)
if y_pred_scores is not None:
skplt.metrics.plot_ks_statistic(y, y_pred_scores)
# plt.show()
y_pred_scores = y_pred_scores[:, 1]
auroc = roc_auc_score(y, y_pred_scores)
aupr = average_precision_score(y, y_pred_scores)
performance_metrics = performance_metrics + (auroc, aupr)
plt.title(label=rede_trial, y=0.9)
plt.suptitle('Acurácia: {:3.3f}\nRecall: {:3.3f}\nPrecision: {:3.3f}\nF1: {:3.3f}\nAUROC: {:3.3f}\nAURP: {:3.3f}'.format(accuracy, recall, precision, f1, auroc, aupr), x=0.25, y=0.8)
plt.savefig(rede_trial, dpi=100)
plt.close()
return performance_metrics
def evaluate_model(model, X, y):
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
scores = cross_val_predict(model, X, y, cv=cv, n_jobs=-1, method='predict')
scores_proba = cross_val_predict(model, X, y, cv=cv, n_jobs=-1, method='predict_proba')
compute_performance_metrics_sem_plot2(y, scores, scores_proba, 'Ensamble/Ensamble')
return scores
# Dataset de treino
X, y = get_dataset()
# Modelos de ensamble
models = get_models()
# Avaliar cada modelo e armazenar seus resultados
results, names = list(), list()