def get_clusters(self, k, criterion='maxclust', return_type='labels'):
""" Wrapper for cluster.hierarchy.fcluster to get clusters.
Parameters
----------
k : {int, float}
criterion : {'maxclust','distance'}, optional
If `maxclust`, `k` clusters will be formed. If
`distance`, clusters will be created at threshold `k`.
return_type : {'labels','indices','columns','rows'}
Determines what to construct the clusters of. 'labels'
only works if labels are provided. 'indices' refers
to index in distance matrix. 'columns'/'rows' works
if distance matrix is pandas DataFrame
Returns
-------
list
list of clusters [ [leaf1, leaf5], [leaf2, ...], ... ]
"""
cl = scipy.cluster.hierarchy.fcluster(self.linkage, k, criterion=criterion)
if self.labels and return_type.lower()=='labels':
return [[self.labels[j] for j in range(len(cl)) if cl[j] == i] for i in range(min(cl), max(cl) + 1)]
elif return_type.lower() == 'rows':
return [[self.mat.columns.tolist()[j] for j in range(len(cl)) if cl[j] == i] for i in range(min(cl), max(cl) + 1)]
elif return_type.lower() == 'columns':
return [[self.mat.index.tolist()[j] for j in range(len(cl)) if cl[j] == i] for i in range(min(cl), max(cl) + 1)]
else:
return [[j for j in range(len(cl)) if cl[j] == i] for i in range(min(cl), max(cl) + 1)]
评论列表
文章目录