Untitled
unknown
python
2 years ago
2.2 kB
4
Indexable
def fit(self, data : pd.DataFrame, animate=False, colour_map=None): if animate and not colour_map: raise ValueError('Argument colourmap has to be specified if animate is set to true.') self.data = data assignments = defaultdict(lambda: -1) # this will store our assignments of instances to a cluster # randomly create our cluster centres self.c : pd.DataFrame = data.sample(self.k, random_state=self.seed) self.c.index = [i for i in range(self.k)] # re-index converged = False loops = 1 while not converged: converged = True # step 1: assign every instance to its closest cluster for i, instance in data.iterrows(): prev_assignment = assignments[i] closest_cluster, min_d = (None, float('inf')) for j, cluster in self.c.iterrows(): # calculate the euclidian distance between the test instance and the cluster centre d = self.distance(instance, cluster) # assign the instance to the cluster if it is the closest cluster found so far if d < min_d: min_d = d closest_cluster = j assignments[i] = closest_cluster converged = False if prev_assignment != assignments[i] else converged if animate: self._savefig(data, assignments, f'{loops}_{time.time()}.png', colour_map) # step 2: recentre the clusters to the mean of the points assigned to it for j, cluster in self.c.iterrows(): # calculate the mean of the points assigned to cluster points_in_cluster = data.iloc[[i for i in assignments.keys() if assignments[i]==j]] m = points_in_cluster.mean() # recentre the cluster self.c.iloc[j] = m loops += 1 if animate: self._savefig(data, assignments, f'{loops}_{time.time()}.png', colour_map) return pd.Series([assignments[x] for x in data.index], index=data.index)
Editor is loading...
Leave a Comment