cluster.py 文件源码-python代码片段

cluster.py 文件源码

python

阅读 27 收藏 0 点赞 0 评论 0

项目：FreeDiscovery 作者: FreeDiscovery 项目源码文件源码

def dbscan(self, n_clusters=None, eps=0.5, min_samples=10,
               algorithm='auto', leaf_size=30):
        """
        Perform DBSCAN clustering

        This can also be used for Duplicate Detection (when ep

        Parameters
        ----------
        n_clusters : int
            number of clusters # not used just present for compatibility
        lsi_components : int
            apply LSA before the clustering algorithm
        eps : float
            The maximum distance between two samples for them to be considered
             as in the same neighborhood.
        min_samples : int
            The number of samples (or total weight) in a neighborhood
            for a point to be considered as a core point.
            This includes the point itself.
        """
        from sklearn.cluster import DBSCAN
        pars = {'is_hierarchical': False, "metric": self.metric}

        km = DBSCAN(eps=eps, min_samples=min_samples, algorithm=algorithm,
                    leaf_size=leaf_size)

        return self._cluster_func(n_clusters, km, pars)