def calc_agg_coeff(self):
""" Returns the agglomerative coefficient, measuring the clustering
structure of the linkage matrix. Because it grows with the number of
observations, this measure should not be used to compare datasets of
very different sizes.
For each observation i, denote by m(i) its dissimilarity to the first
cluster it is merged with, divided by the dissimilarity of the merger
in the final step of the algorithm. The agglomerative coefficient is
the average of all 1 - m(i).
"""
# Turn into pandas DataFrame for fancy indexing
Z = pd.DataFrame(self.linkage, columns = ['obs1','obs2','dist','n_org'] )
# Get all distances at which an original observation is merged
all_dist = Z[ ( Z.obs1.isin(self.leafs) ) | (Z.obs2.isin(self.leafs) ) ].dist.values
# Divide all distances by last merger
all_dist /= self.linkage[-1][2]
# Calc final coefficient
coeff = np.mean( 1 - all_dist )
return coeff
评论列表
文章目录