def build_clusters(predicted_scores, method='centroid'):
"""agglomerative clustering using predicted scores as distances
Args:
predicted_scores: predicted scores for all mentions in documents
method: methods for calculating distance between clusters
look at scipy.cluster.hierarchy.linkage documentation
Returns:
clustering, min_score and max_score in predicted_scores
"""
print('building clusters')
min_score = 1e10
max_score = 0
clustrering = []
for doc_id in tqdm(range(len(predicted_scores))):
scores = predicted_scores[doc_id]
if len(scores) > 0:
distances = []
for i in range(len(scores)):
for j in range(i + 1, len(scores)):
distances.append((scores[i, j] + scores[j, i]) / 2)
c = linkage(distances, method=method)
clustrering.append(c)
min_score = min(min(c[:, 2]), min_score)
max_score = max(max(c[:, 2]), max_score)
print('clusters are built: min_score: {} max_score: {}'.format(min_score, max_score))
return clustrering, min_score, max_score
评论列表
文章目录