_cluster.py 文件源码-python代码片段

def cluster_dbscan(matrix, distance_measure="sts", eps=1):
    """Clusters the distance matrix for a given epsilon value, if distance
    measure is sts. Other distance measures are: [‘cityblock’, ‘cosine’, 
    ‘euclidean’, ‘l1’, ‘l2’, ‘manhattan’, ‘braycurtis’, ‘canberra’, 
    ‘chebyshev’, ‘correlation’, ‘dice’, ‘hamming’, ‘jaccard’, ‘kulsinski’, 
    ‘mahalanobis’, ‘matching’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, 
    ‘seuclidean’, ‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’]

    Parameters
    ----------
    matrix: np.matrix
        The input matrix. If distance measure is sts, this should be the sts
        distance matrix. If other distance, this should be the time-series
        matrix of size ngenes x nsamples.
    distance_measure: str
        The distance measure, default is sts, short time-series distance.
        Any distance measure available in scikit-learn is available here.
        Note: multiple time-series is NOT supported for distances other than    
        "sts".

    Returns
    -------
    cluster_labels: list of int
        A list of size ngenes that defines cluster membership.
    """
    if (distance_measure == "sts"):
        dbs = DBSCAN(eps=eps, metric='precomputed', min_samples=2)
    else:
        dbs = DBSCAN(eps=eps, metric=distance_measure, min_samples=2)
    cluster_labels = dbs.fit_predict(matrix)
    return cluster_labels