def compute_centroid_set(self):
INPUT_ITR = subset_iterator(
X=self.docv,
m=self.subcluster_m,
repeats=self.subcluster_repeats,
)
kn = self.subcluster_kn
clf = SpectralClustering(
n_clusters=kn,
affinity="precomputed",
)
C = []
for X in INPUT_ITR:
# Remove any rows that have zero vectors
bad_row_idx = ((X**2).sum(axis=1) == 0)
X = X[~bad_row_idx]
A = cosine_affinity(X)
# "Force" symmetry due to rounding errors
A = np.maximum( A, A.transpose() )
labels = clf.fit_predict(A)
# Compute the centroids
(N, dim) = X.shape
centroids = np.zeros((kn, dim))
for i in range(kn):
idx = labels == i
mu = X[idx].mean(axis=0)
mu /= np.linalg.norm(mu)
centroids[i] = mu
C.append(centroids)
return np.vstack(C)
评论列表
文章目录