question 2
# Import necessary libraries import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.cluster import KMeans import matplotlib.pyplot as plt # Load the dataset file_path = '/mnt/data/customer_shopping_data.csv' data = pd.read_csv(file_path) # Step 1: Aggregate data by shopping mall mall_data = data.groupby('shopping_mall')[['quantity', 'price']].sum() # Step 2: Normalize the data for clustering scaler = StandardScaler() scaled_data = scaler.fit_transform(mall_data) # Step 3: Determine the optimal number of clusters using the elbow method inertia = [] for k in range(1, 10): kmeans = KMeans(n_clusters=k, random_state=42) kmeans.fit(scaled_data) inertia.append(kmeans.inertia_) # Plot the elbow curve plt.figure(figsize=(8, 5)) plt.plot(range(1, 10), inertia, marker='o', linestyle='--') plt.title('Elbow Method for Optimal Clusters') plt.xlabel('Number of Clusters') plt.ylabel('Inertia') plt.show() # Step 4: Apply K-means clustering with 3 clusters kmeans = KMeans(n_clusters=3, random_state=42) mall_data['Cluster'] = kmeans.fit_predict(scaled_data) # Step 5: Visualize the clustering results plt.figure(figsize=(8, 5)) for cluster in range(3): cluster_data = mall_data[mall_data['Cluster'] == cluster] plt.scatter(cluster_data['quantity'], cluster_data['price'], label=f'Cluster {cluster}') plt.title('Shopping Mall Clusters') plt.xlabel('Total Quantity') plt.ylabel('Total Price') plt.legend() plt.show() # Display the resulting cluster assignments mall_data.reset_index(inplace=True) print(mall_data)
Leave a Comment