Untitled
unknown
plain_text
a year ago
1.4 kB
4
Indexable
import pandas as pd import numpy as np from sklearn.cluster import KMeans from sklearn.preprocessing import OneHotEncoder, StandardScaler import matplotlib.pyplot as plt # Assuming df1 is your DataFrame containing both categorical and numeric features # Separate categorical and numeric features categorical_features = df1.select_dtypes(include=['object']) numeric_features = df1.select_dtypes(include=['int', 'float']) # One-hot encode categorical features encoder = OneHotEncoder() categorical_encoded = encoder.fit_transform(categorical_features).toarray() # Scale numeric features scaler = StandardScaler() numeric_scaled = scaler.fit_transform(numeric_features) # Combine encoded categorical and scaled numeric features X = np.concatenate((categorical_encoded, numeric_scaled), axis=1) # Define a range of k values to experiment with k_values = range(1, 11) # Example range from 1 to 10 # Initialize list to store inertia values inertia = [] # Perform K-means clustering for each value of k for k in k_values: kmeans = KMeans(n_clusters=k, random_state=42) kmeans.fit(X) inertia.append(kmeans.inertia_) # Plot the elbow graph plt.figure(figsize=(10, 6)) plt.plot(k_values, inertia, marker='o', color='b', linestyle='-') plt.xlabel('Number of clusters (k)') plt.ylabel('Inertia') plt.title('Elbow Method for Optimal k') plt.xticks(k_values) plt.grid(True) plt.show()
Editor is loading...
Leave a Comment