Untitled
unknown
python
a year ago
2.1 kB
8
Indexable
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AgglomerativeClustering
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
np.random.seed(42)
n_samples = 100
# Create two features with different scales
X = np.zeros((n_samples, 2))
X[:, 0] = np.random.normal(0, 1, n_samples)
X[:, 1] = np.random.normal(0, 5, n_samples)
# Convert to DataFrame for better visualization
df = pd.DataFrame(X, columns=['Feature1', 'Feature2'])
# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Perform hierarchical clustering
n_clusters = 3
clustering = AgglomerativeClustering(n_clusters=n_clusters)
clusters = clustering.fit_predict(X_scaled)
# Create visualization function
def plot_clusters_and_dendrogram(X, labels):
fig = plt.figure(figsize=(12, 5))
# Plot 1: Scatter plot of clusters
ax1 = fig.add_subplot(121)
scatter = ax1.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')
ax1.set_title('Hierarchical Clustering Results')
ax1.set_xlabel('Feature 1')
ax1.set_ylabel('Feature 2')
plt.colorbar(scatter)
# Plot 2: Dendrogram
ax2 = fig.add_subplot(122)
linkage_matrix = linkage(X, method='ward')
dendrogram(linkage_matrix)
ax2.set_title('Dendrogram')
ax2.set_xlabel('Sample Index')
ax2.set_ylabel('Distance')
plt.tight_layout()
return plt
# Plot results
plot = plot_clusters_and_dendrogram(X_scaled, clusters)
print("\nCluster Statistics:")
for i in range(n_clusters):
cluster_size = np.sum(clusters == i)
cluster_data = X_scaled[clusters == i]
cluster_mean = np.mean(cluster_data, axis=0)
cluster_std = np.std(cluster_data, axis=0)
print(f"\nCluster {i}:")
print(f"Size: {cluster_size}")
print(f"Mean: Feature1={cluster_mean[0]:.2f}, Feature2={cluster_mean[1]:.2f}")
print(f"Std: Feature1={cluster_std[0]:.2f}, Feature2={cluster_std[1]:.2f}")
plt.show()Editor is loading...
Leave a Comment