test_util.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:redbiom 作者: biocore 项目源码 文件源码
def test_stems(self):
        import nltk
        stemmer = nltk.PorterStemmer(nltk.PorterStemmer.MARTIN_EXTENSIONS)
        stops = frozenset(nltk.corpus.stopwords.words('english'))
        tests = [("foo bar", ['foo', 'bar']),
                 ("foo $1.23 is the bar", ['foo', 'bar']),
                 ("a b c d", []),  # assume single char stems are useless
                 ("ab cd", ['ab', 'cd']),
                 ("-1.23 1.23 foo", ['foo']),
                 ("-123 foo 123", ['foo']),
                 ("8:12 12:34am foo", ['foo']),
                 ("ab. foo, then bar", ['ab', 'foo', 'bar']),
                 ("crying infants", ["cry", "infant"]),
                 ("drop 12 all 3.45 the 0.123 numbers", ['drop', 'number'])]
        for test, exp in tests:
            obs = list(stems(stops, stemmer, test))
            self.assertEqual(obs, exp)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号