Untitled
unknown
python
2 years ago
2.2 kB
6
Indexable
def fit(self, data : pd.DataFrame, animate=False, colour_map=None):
if animate and not colour_map:
raise ValueError('Argument colourmap has to be specified if animate is set to true.')
self.data = data
assignments = defaultdict(lambda: -1) # this will store our assignments of instances to a cluster
# randomly create our cluster centres
self.c : pd.DataFrame = data.sample(self.k, random_state=self.seed)
self.c.index = [i for i in range(self.k)] # re-index
converged = False
loops = 1
while not converged:
converged = True
# step 1: assign every instance to its closest cluster
for i, instance in data.iterrows():
prev_assignment = assignments[i]
closest_cluster, min_d = (None, float('inf'))
for j, cluster in self.c.iterrows():
# calculate the euclidian distance between the test instance and the cluster centre
d = self.distance(instance, cluster)
# assign the instance to the cluster if it is the closest cluster found so far
if d < min_d:
min_d = d
closest_cluster = j
assignments[i] = closest_cluster
converged = False if prev_assignment != assignments[i] else converged
if animate: self._savefig(data, assignments, f'{loops}_{time.time()}.png', colour_map)
# step 2: recentre the clusters to the mean of the points assigned to it
for j, cluster in self.c.iterrows():
# calculate the mean of the points assigned to cluster
points_in_cluster = data.iloc[[i for i in assignments.keys() if assignments[i]==j]]
m = points_in_cluster.mean()
# recentre the cluster
self.c.iloc[j] = m
loops += 1
if animate: self._savefig(data, assignments, f'{loops}_{time.time()}.png', colour_map)
return pd.Series([assignments[x] for x in data.index], index=data.index)Editor is loading...
Leave a Comment