def test_tweet_tokenizer(self):
"""
Test TweetTokenizer using words with special and accented characters.
"""
tokenizer = TweetTokenizer(strip_handles=True, reduce_len=True)
s9 = "@myke: Let's test these words: resumé España München français"
tokens = tokenizer.tokenize(s9)
expected = [':', "Let's", 'test', 'these', 'words', ':', 'resumé',
'España', 'München', 'français']
self.assertEqual(tokens, expected)
评论列表
文章目录