Untitled
import pandas as pd # Pandas (version : 1.1.5) import numpy as np # Numpy (version : 1.19.2) import matplotlib.pyplot as plt # Matplotlib (version : 3.3.2) from sklearn.cluster import KMeans # Scikit Learn (version : 0.23.2) import seaborn as sns # Seaborn (version : 0.11.1) from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score plt.style.use('seaborn') data = pd.read_csv('Iris.csv') for i,col in enumerate(data.columns): print(f'Column number {1+i} is {col}') data.drop('Id', axis=1, inplace=True) # data.head() data['Species'].value_counts() target_data = data.iloc[:,4] # print(target_data.head) clustering_data = data.iloc[:,[0,1,2,3]] clustering_data.head() wcss=[] for i in range(1,11): km = KMeans(i) km.fit(clustering_data) wcss.append(km.inertia_) np.array(wcss) fig, ax = plt.subplots(figsize=(15,7)) ax = plt.plot(range(1,11),wcss, linewidth=2, color="red", marker ="8") plt.axvline(x=3, ls='--') plt.ylabel('WCSS') plt.xlabel('No. of Clusters (k)') plt.title('The Elbow Method', fontsize = 20) plt.show() kms = KMeans(n_clusters=3, init='k-means++') kms.fit(clustering_data) clusters = clustering_data.copy() clusters['Cluster_Prediction'] = kms.fit_predict(clustering_data) print(clusters.head()) silhouette_avg = silhouette_score(clustering_data, clusters['Cluster_Prediction']) print(f"The average silhouette_score is : {silhouette_avg}") fig, ax = plt.subplots(figsize=(15,7)) plt.scatter(x=clusters[clusters['Cluster_Prediction'] == 0]['SepalLengthCm'], y=clusters[clusters['Cluster_Prediction'] == 0]['SepalWidthCm'], s=70,edgecolor='teal', linewidth=0.3, c='teal', label='Iris-versicolor') silhouette_avg = silhouette_score(clustering_data, clusters['Cluster_Prediction']) print(f"The average silhouette_score is : {silhouette_avg}") plt.scatter(x=clusters[clusters['Cluster_Prediction'] == 1]['SepalLengthCm'], y=clusters[clusters['Cluster_Prediction'] == 1]['SepalWidthCm'], s=70,edgecolor='lime', linewidth=0.3, c='lime', label='Iris-setosa') plt.scatter(x=clusters[clusters['Cluster_Prediction'] == 2]['SepalLengthCm'], y=clusters[clusters['Cluster_Prediction'] == 2]['SepalWidthCm'], s=70,edgecolor='magenta', linewidth=0.3, c='magenta', label='Iris-virginica') plt.scatter(x=kms.cluster_centers_[:, 0], y=kms.cluster_centers_[:, 1], s = 170, c = 'yellow', label = 'Centroids',edgecolor='black', linewidth=0.3) plt.legend(loc='upper right') plt.xlim(4,8) plt.ylim(1.8,4.5) ax.set_ylabel('Sepal Width (in cm)') ax.set_xlabel('Sepal Length (in cm)') plt.title('Clusters', fontsize = 20) plt.show()
Leave a Comment