def cluster(self):
mbk = MiniBatchKMeans(n_clusters=self.K, batch_size=self.K*2, verbose=self.verbose, compute_labels=False)
if self.subsample is None:
data = np.vstack([self.data[k] for k in self.data.keys() if self.data[k] is not None])
mbk.fit(data)
else: # sample number of files
fnames = self.data.keys()
subset = random.sample(fnames, int(self.subsample * len(fnames)))
subdata = np.vstack([self.data[k] for k in subset if self.data[k] is not None])
mbk.fit(subdata)
return mbk.cluster_centers_
评论列表
文章目录