Untitled
unknown
plain_text
2 years ago
1.5 kB
7
Indexable
import pandas as pd
import numpy as np
import os
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
# Đọc dữ liệu từ file
data = pd.read_csv('Z:\\nDP\\breast-cancer-wisconsin.data')
data.head()
# y = data.iloc[:, -1]
# print(y)
# X = data.iloc[:, 1:10]
# print(X)
# print('Kích thước của X: ', X.shape)
# print('Kích thước của y: ', y.shape)
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
for column_name in data.columns:
if data[column_name].dtype == object:
data[column_name] = le.fit_transform(data[column_name])
else:
pass
y = data['lop']
X = data.iloc[:, 1:10]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
print("Training size: %d" % len(y_train))
print("Test size : %d" % len(y_test))
from sklearn.tree import DecisionTreeClassifier
clf= DecisionTreeClassifier(criterion="entropy")
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Print results for 2 test data points:")
print("Predicted labels: ", y_pred[15:35])
print("Ground truth : ", y_test[15:35])
print("Accuracy of Decision tree: %.2f %%" % ( 100 * accuracy_score(y_test, y_pred)))
print('Classification Report:\n{}\n'.format(classification_report(y_test,clf.predict(X_test))))Editor is loading...
Leave a Comment