def cluster(self, method='ward'):
""" Cluster distance matrix. This will automatically be called when
attribute linkage is requested for the first time.
Parameters
----------
method : str, optional
Clustering method (see scipy.cluster.hierarchy.linkage
for reference)
"""
# First, convert similarity matrix to distance matrix
if self.mat_type != 'distance':
if isinstance( self.mat, pd.DataFrame ):
self.dist_mat = ( self.mat.as_matrix() - self.mat.max().max() ) * -1
else:
self.dist_mat = ( self.mat - self.mat.max() ) * -1
else:
if isinstance( self.mat, pd.DataFrame ):
self.dist_mat = self.mat.as_matrix()
else:
self.dist_mat = self.mat
# Second, convert into condensed distance matrix - otherwise clustering
# thinks we are passing observations instead of final scores
self.condensed_dist_mat = scipy.spatial.distance.squareform( self.dist_mat, checks=False )
self.linkage = scipy.cluster.hierarchy.linkage(self.condensed_dist_mat, method=method)
# Save method in case we want to look it up later
self.method = method
module_logger.info('Clustering done using method "{0}"'.format(method) )
评论列表
文章目录