def dbscan(self, n_clusters=None, eps=0.5, min_samples=10,
algorithm='auto', leaf_size=30):
"""
Perform DBSCAN clustering
This can also be used for Duplicate Detection (when ep
Parameters
----------
n_clusters : int
number of clusters # not used just present for compatibility
lsi_components : int
apply LSA before the clustering algorithm
eps : float
The maximum distance between two samples for them to be considered
as in the same neighborhood.
min_samples : int
The number of samples (or total weight) in a neighborhood
for a point to be considered as a core point.
This includes the point itself.
"""
from sklearn.cluster import DBSCAN
pars = {'is_hierarchical': False, "metric": self.metric}
km = DBSCAN(eps=eps, min_samples=min_samples, algorithm=algorithm,
leaf_size=leaf_size)
return self._cluster_func(n_clusters, km, pars)
评论列表
文章目录