Untitled

mail@pastecode.io avatar
unknown
python
a year ago
1.4 kB
1
Indexable
Never
def objective(X, y, cen):
    n = len(X)
    obj = 0
    for i in range(n):
        #obj += np.linalg.norm(X[i]-cen[y[i]])
        obj += (X[i] - cen[y[i]])**2
    return np.sum(obj)

def find_clusters(X, n_clusters, rseed=2):
    # 1. Randomly choose clusters
    rng = np.random.RandomState(rseed)
    i = rng.permutation(X.shape[0])[:n_clusters]
    centers = X[i]
    #print(centers)
    i = 0
    while True:
        print("Loop no: ", i)
        i+=1
        # 2a. Assign labels based on closest center
        #labels = pairwise_distances_argmin(X, centers,metric='euclidean')
        labels = pairwise_distances(X, centers, metric='euclidean').argmin(axis=1)
        print("Pairwise cosine start")
        t1 = datetime.now()
        #labels = pairwise_distances(X, centers, metric='cosine').argmax(axis=1)
        print("Pairwise cosine end")
        t2 = datetime.now()
        print(labels)
        
        # 2b. Find new centers from means of points
        new_centers = np.array([X[labels == i].mean(0)
                                for i in range(n_clusters)])
        print(new_centers)
        
        print(t2-t1)
        # 2c. Check for convergence
        if np.all(centers == new_centers):
            break
        centers = new_centers
    ob = objective(X, labels, new_centers)
    print(centers)
    print(ob)
    
    return centers, labels