word_coocurrence.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:guesswhat 作者: GuessWhatGame 项目源码 文件源码
def __init__(self, path, games, logger, suffix):
        super(WordCoocurence, self).__init__(path, self.__class__.__name__, suffix)

        questions = []
        word_counter = collections.Counter()

        NO_WORDS_TO_DISPLAY = 50

        for game in games:
            # split questions into words
            for q in game.questions:
                questions.append(q)
                q = re.sub('[?]', '', q)
                words = re.findall(r'\w+', q)

                for w in words:
                    word_counter[w.lower()] += 1


        # compute word co-coocurrence
        common_words = word_counter.most_common(NO_WORDS_TO_DISPLAY)
        common_words = [pair[0] for pair in common_words]
        corrmat = np.zeros((NO_WORDS_TO_DISPLAY, NO_WORDS_TO_DISPLAY))

        # compute the correlation matrices
        for i, question in enumerate(questions):
            for word in question:
                if word in common_words:
                    for other_word in question:
                        if other_word in common_words:
                            if word != other_word:
                                corrmat[common_words.index(word)][common_words.index(other_word)] += 1.

        # Display the cor matrix
        df = pd.DataFrame(data=corrmat, index=common_words, columns=common_words)
        f = sns.clustermap(df, standard_scale=0, col_cluster=False, row_cluster=True, cbar_kws={"label": "co-occurence"})
        f.ax_heatmap.xaxis.tick_top()

        plt.setp(f.ax_heatmap.get_xticklabels(), rotation=90)
        plt.setp(f.ax_heatmap.get_yticklabels(), rotation=0)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号