def compute_clustering_accuracy(label1, label2):
"""
From clustering_on_transcript_compatibility_counts, see github for MIT license
"""
uniq1,uniq2 = np.unique(label1),np.unique(label2)
# Create two dictionaries. Each will store the indices of each label
entries1,entries2 = {},{}
for label in uniq1: entries1[label] = set(np.flatnonzero((label1==label)))
for label in uniq2: entries2[label] = set(np.flatnonzero((label2==label)))
# Create an intersection matrix which counts the number of entries that overlap for each label combination
W = np.zeros((len(uniq1),len(uniq2)))
for i,j in itertools.product(range(len(uniq1)),range(len(uniq2))):
W[i,j]=len(entries1[uniq1[i]].intersection(entries2[uniq2[j]]))
# find the max weight matching
match_val = get_max_wt_matching(uniq1,uniq2,W)
# return the error rate
return (1-match_val/float(len(label1)))*100
评论列表
文章目录