import pandas as pd
import seaborn as sb
import numpy as np
iris_data = pd.read_csv("iris.data")
colums_name = ['Sepal length', 'Sepal width', 'Petal length', 'Petal width', 'class']
iris_data.columns = colums_name
# iris_data.head()
# iris_data.isnull().sum()
# iris_data.value_counts("class")
# sb.countplot(x="class", data = iris_data)
fig = sb.scatterplot(x="Sepal length", y="Sepal width", hue="class", data=iris_data)
fig.set(title="Sepal Length and Width")
features = iris_data.iloc[:, 0:4]
label = iris_data.iloc[:, 4]
print("Features : ")
print(features)
print("Label : ")
print(label)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( features, label, test_size=0.2, random_state=0)
print('Number of train data' ,X_train.shape[0])
print('Number of data test', y_test.shape[0])
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=8, weights="uniform")
knn.fit(X_train, y_train)
train_acc = knn.score(X_train, y_train)
print("The accuracy Of KNN classifier on training data is : {:.3f}".format(train_acc))