Untitled
unknown
plain_text
a year ago
1.5 kB
6
Indexable
import pandas as pd import numpy as np import os from sklearn import tree from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score, classification_report from sklearn.preprocessing import LabelEncoder label_encoder = LabelEncoder() # Đọc dữ liệu từ file data = pd.read_csv('Z:\\nDP\\breast-cancer-wisconsin.data') data.head() # y = data.iloc[:, -1] # print(y) # X = data.iloc[:, 1:10] # print(X) # print('Kích thước của X: ', X.shape) # print('Kích thước của y: ', y.shape) from sklearn import preprocessing le = preprocessing.LabelEncoder() for column_name in data.columns: if data[column_name].dtype == object: data[column_name] = le.fit_transform(data[column_name]) else: pass y = data['lop'] X = data.iloc[:, 1:10] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) print("Training size: %d" % len(y_train)) print("Test size : %d" % len(y_test)) from sklearn.tree import DecisionTreeClassifier clf= DecisionTreeClassifier(criterion="entropy") clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print("Print results for 2 test data points:") print("Predicted labels: ", y_pred[15:35]) print("Ground truth : ", y_test[15:35]) print("Accuracy of Decision tree: %.2f %%" % ( 100 * accuracy_score(y_test, y_pred))) print('Classification Report:\n{}\n'.format(classification_report(y_test,clf.predict(X_test))))
Editor is loading...
Leave a Comment