clusters.py 文件源码-python代码片段

def compare_with_children(
            self, idea_id, post_ids, post_clusters, remainder, labels):
        # Compare to children classification
        compare_with_ideas = None
        all_idea_scores = []
        ideas_of_post = defaultdict(list)
        children_remainder = set(post_ids)
        children_ids = self.idea_children[idea_id]
        if len(children_ids):
            posts_of_children = {
                child_id: self.get_posts_of_idea(child_id)
                for child_id in children_ids}
            for idea_id, c_post_ids in posts_of_children.items():
                for post_id in c_post_ids:
                    ideas_of_post[post_id].append(idea_id)
                children_remainder -= set(c_post_ids)
            for post_id in children_remainder:
                ideas_of_post[post_id] = [idea_id]
            # if many ideas to a post, choose one with the most ideas in same cluster.
            # A bit arbitrary but I need a single idea.
            for cluster in chain(post_clusters, (remainder,)):
                idea_score = defaultdict(int)
                all_idea_scores.append(idea_score)
                for post_id in cluster:
                    for idea_id in ideas_of_post[post_id]:
                        idea_score[idea_id] += 1
                for post_id in cluster:
                    if len(ideas_of_post[post_id]) > 1:
                        scores = [(idea_score[idea_id], idea_id)
                                  for idea_id in ideas_of_post[post_id]]
                        scores.sort(reverse=True)
                        ideas_of_post[post_id] = [score[1] for score in scores]
            # index_by_post_id = {v: k for (k, v) in post_id_by_index.iteritems()}
            idea_of_index = [ideas_of_post[post_id][0] for post_id in post_ids]
            compare_with_ideas = {
                "Homogeneity": metrics.homogeneity_score(idea_of_index, labels),
                "Completeness": metrics.completeness_score(idea_of_index, labels),
                "V-measure": metrics.v_measure_score(idea_of_index, labels),
                "Adjusted Rand Index": metrics.adjusted_rand_score(
                    idea_of_index, labels),
                "Adjusted Mutual Information": metrics.adjusted_mutual_info_score(
                    idea_of_index, labels)}
        else:
            for post_id in children_remainder:
                ideas_of_post[post_id] = [idea_id]
            for cluster in chain(post_clusters, (remainder,)):
                all_idea_scores.append({idea_id: len(cluster)})
        return (compare_with_ideas, all_idea_scores, ideas_of_post,
                children_remainder)