Untitled
unknown
plain_text
3 years ago
2.3 kB
2
Indexable
import numpy as np from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from sklearn.model_selection import RepeatedKFold # Klasyfikatory from sklearn.naive_bayes import GaussianNB from sklearn.linear_model import LogisticRegression matrix_labels = ['gaussian_nb', 'logistic_regression'] datasets = ["bupa", "coil2000", "cryotherapy", "ecoli4", "german", "glass2", "glass4", "glass5", "haberman", "heart", "ionosphere", "iris", "liver", "mammographic", "monk-2", "phoneme", "pima", "popfailures", "ring", "sonar", "soybean", "spambase", "spectfheart", "titanic", "twonorm", "vowel0", "waveform", "wine", "wisconsin"] def prepare_dataset(set_name): dataset = np.genfromtxt(f"dataset/{set_name}.csv", delimiter=",") x = dataset[:, :-1] y = dataset[:, -1].astype(int) return x,y def split_data(x, y): x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=.3, random_state=42 ) return x_train, x_test, y_train, y_test def get_gaussian(x_train, y_train, x_test): clf = GaussianNB() clf.fit(x_train, y_train) predict = clf.predict(x_test) return predict def get_logistic_regression(x_train, y_train, x_test): logreg_clf = LogisticRegression() logreg_clf.fit(x_train, y_train) predict = logreg_clf.predict(x_test) return predict def test(x, y): kf = RepeatedKFold(n_splits=5, n_repeats=2, random_state=1234) numpy = [] numpy.append(matrix_labels) for train_index, test_index in kf.split(x): x_train, x_test = x[train_index], x[test_index] y_train, y_test = y[train_index], y[test_index] predict_gaussian = get_gaussian(x_train, y_train, x_test) predict_logistic_regression = get_logistic_regression(x_train, y_train, x_test) numpy.append([ accuracy_score(y_test, predict_gaussian), accuracy_score(y_test, predict_logistic_regression) ]) return numpy def get_score(numpy): mean_score = np.mean(numpy) std_score = np.std(numpy) return "Accuracy score: %.3f (%.3f)" % (mean_score, std_score) for dataset in datasets: x,y = prepare_dataset(dataset) x_train, x_test, y_train, y_test = split_data(x,y) result = test(x, y) print(get_score(result))
Editor is loading...