Untitled

mail@pastecode.io avatar
unknown
plain_text
a month ago
1.4 kB
2
Indexable
Never
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import matplotlib.pyplot as plt

# Assuming df1 is your DataFrame containing both categorical and numeric features

# Separate categorical and numeric features
categorical_features = df1.select_dtypes(include=['object'])
numeric_features = df1.select_dtypes(include=['int', 'float'])

# One-hot encode categorical features
encoder = OneHotEncoder()
categorical_encoded = encoder.fit_transform(categorical_features).toarray()

# Scale numeric features
scaler = StandardScaler()
numeric_scaled = scaler.fit_transform(numeric_features)

# Combine encoded categorical and scaled numeric features
X = np.concatenate((categorical_encoded, numeric_scaled), axis=1)

# Define a range of k values to experiment with
k_values = range(1, 11)  # Example range from 1 to 10

# Initialize list to store inertia values
inertia = []

# Perform K-means clustering for each value of k
for k in k_values:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X)
    inertia.append(kmeans.inertia_)

# Plot the elbow graph
plt.figure(figsize=(10, 6))
plt.plot(k_values, inertia, marker='o', color='b', linestyle='-')
plt.xlabel('Number of clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal k')
plt.xticks(k_values)
plt.grid(True)
plt.show()
Leave a Comment