test_corpora_hashdictionary.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:topical_word_embeddings 作者: thunlp 项目源码 文件源码
def testFilter(self):
        d = HashDictionary(self.texts, myhash=zlib.adler32)
        d.filter_extremes()
        expected = {}
        self.assertEqual(d.dfs, expected)

        d = HashDictionary(self.texts, myhash=zlib.adler32)
        d.filter_extremes(no_below=0, no_above=0.3)
        expected = {29104: 2, 31049: 2, 28591: 2, 5232: 2, 10608: 2, 12466: 2, 15001: 2, 31002: 2}
        self.assertEqual(d.dfs, expected)

        d = HashDictionary(self.texts, myhash=zlib.adler32)
        d.filter_extremes(no_below=3, no_above=1.0, keep_n=4)
        expected = {5798: 3, 12736: 3, 18451: 3, 23844: 3}
        self.assertEqual(d.dfs, expected)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号