def ward_hc(self, n_clusters, n_neighbors=10):
"""
Perform Ward hierarchical clustering
Parameters
----------
n_clusters : int
number of clusters
lsi_components : int
apply LSA before the clustering algorithm
n_neighbors : int
N nearest neighbors used for computing the connectivity matrix
"""
from sklearn.cluster import AgglomerativeClustering
from sklearn.neighbors import kneighbors_graph
pars = {'n_neighbors': n_neighbors, 'is_hierarchical': True,
"metric": self.metric}
if 'lsi' not in self.pipeline:
raise ValueError("you must use lsi with birch clustering "
"for scaling reasons.")
# This is really not efficient as
# it's done a second time in _cluster_func
X = self.pipeline.data
connectivity = kneighbors_graph(X, n_neighbors=n_neighbors,
include_self=False)
km = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward',
connectivity=connectivity)
return self._cluster_func(n_clusters, km, pars)
评论列表
文章目录