def cluster_dbscan(matrix, distance_measure="sts", eps=1):
"""Clusters the distance matrix for a given epsilon value, if distance
measure is sts. Other distance measures are: [‘cityblock’, ‘cosine’,
‘euclidean’, ‘l1’, ‘l2’, ‘manhattan’, ‘braycurtis’, ‘canberra’,
‘chebyshev’, ‘correlation’, ‘dice’, ‘hamming’, ‘jaccard’, ‘kulsinski’,
‘mahalanobis’, ‘matching’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’,
‘seuclidean’, ‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’]
Parameters
----------
matrix: np.matrix
The input matrix. If distance measure is sts, this should be the sts
distance matrix. If other distance, this should be the time-series
matrix of size ngenes x nsamples.
distance_measure: str
The distance measure, default is sts, short time-series distance.
Any distance measure available in scikit-learn is available here.
Note: multiple time-series is NOT supported for distances other than
"sts".
Returns
-------
cluster_labels: list of int
A list of size ngenes that defines cluster membership.
"""
if (distance_measure == "sts"):
dbs = DBSCAN(eps=eps, metric='precomputed', min_samples=2)
else:
dbs = DBSCAN(eps=eps, metric=distance_measure, min_samples=2)
cluster_labels = dbs.fit_predict(matrix)
return cluster_labels
评论列表
文章目录