k_mean_clustering
unknown
python
2 years ago
1.5 kB
14
Indexable
import pandas as pd from sklearn.cluster import KMeans from sklearn.preprocessing import StandardScaler import matplotlib.pyplot as plt excel_file = r"C:\Users\andyl\Documents\UTCC\All_stores_kmean.xlsx" df = pd.read_excel(excel_file) data = df[['Average Sales']] scaler = StandardScaler() normalized_data = scaler.fit_transform(data) kmeans = KMeans(n_clusters=4, random_state=42, n_init=10) df['Cluster'] = kmeans.fit_predict(normalized_data) centroids = scaler.inverse_transform(kmeans.cluster_centers_) outliers = df[df['Average Sales'] > 3000000] plt.figure(figsize=(10, 6)) plt.axhline(y=0, xmin=0, xmax=1, color='black', linestyle='--', label='Average Sales Range') plt.scatter(df['Average Sales'], [0] * len(df), c=df['Cluster'], cmap='viridis', marker='o', edgecolors='k', s=50, alpha=0.7, label='Data Points') plt.scatter(centroids[:, 0], [0] * len(centroids), marker='x', s=200, linewidths=3, color='red', label='Centroids') plt.scatter(outliers['Average Sales'], [0] * len(outliers), c='orange', marker='o', edgecolors='k', s=50, alpha=0.7, label='Outliers') plt.title('K-Means Clustering with Centroids and Outliers (1 Variable)') plt.xlabel('Average Sales') plt.ylabel('Cluster') plt.xlim(10000, 3000000) plt.ylim(-1, 1) plt.yticks([]) plt.xticks([10000, 500000, 1000000, 1500000, 2000000, 2500000, 3000000], ['10,000', '500,000', '1,000,000', '1,500,000', '2,000,000', '2,500,000', '3,000,000']) plt.legend() plt.show()
Editor is loading...
Leave a Comment