def finalize(self):
merged_clusters = []
for c1 in self.clusters.values():
existing = None
for m in c1:
for c2 in merged_clusters:
if m in c2:
existing = c2
break
if existing is not None:
break
if existing is not None:
print("Merging clusters (shouldn't happen very often.)")
existing.update(c1)
else:
merged_clusters.append(set(c1))
merged_clusters = [list(c) for c in merged_clusters]
all_mentions = util.flatten(merged_clusters)
assert len(all_mentions) == len(set(all_mentions))
return {
"doc_key": self.doc_key,
"sentences": self.sentences,
"speakers": self.speakers,
"clusters": merged_clusters
}
评论列表
文章目录