question 2

# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Load the dataset
file_path = '/mnt/data/customer_shopping_data.csv'
data = pd.read_csv(file_path)

# Step 1: Aggregate data by shopping mall
mall_data = data.groupby('shopping_mall')[['quantity', 'price']].sum()

# Step 2: Normalize the data for clustering
scaler = StandardScaler()
scaled_data = scaler.fit_transform(mall_data)

# Step 3: Determine the optimal number of clusters using the elbow method
inertia = []
for k in range(1, 10):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(scaled_data)
    inertia.append(kmeans.inertia_)

# Plot the elbow curve
plt.figure(figsize=(8, 5))
plt.plot(range(1, 10), inertia, marker='o', linestyle='--')
plt.title('Elbow Method for Optimal Clusters')
plt.xlabel('Number of Clusters')
plt.ylabel('Inertia')
plt.show()

# Step 4: Apply K-means clustering with 3 clusters
kmeans = KMeans(n_clusters=3, random_state=42)
mall_data['Cluster'] = kmeans.fit_predict(scaled_data)

# Step 5: Visualize the clustering results
plt.figure(figsize=(8, 5))
for cluster in range(3):
    cluster_data = mall_data[mall_data['Cluster'] == cluster]
    plt.scatter(cluster_data['quantity'], cluster_data['price'], label=f'Cluster {cluster}')

plt.title('Shopping Mall Clusters')
plt.xlabel('Total Quantity')
plt.ylabel('Total Price')
plt.legend()
plt.show()

# Display the resulting cluster assignments
mall_data.reset_index(inplace=True)
print(mall_data)
Editor is loading...