def test_chunkers_and_filters(self):
"""Test SpellChecker with the 'chunkers' and 'filters' arguments."""
text = """I contain <html a=xjvf>tags</html> that should be skipped
along with a <a href='http://example.com/">link to
http://example.com/</a> that should also be skipped"""
# There are no errors when things are correctly skipped
chkr = SpellChecker("en_US",text=text,
filters=[enchant.tokenize.URLFilter],
chunkers=[enchant.tokenize.HTMLChunker])
for err in chkr:
self.fail("Extraneous spelling errors were found")
self.assertEqual(chkr.get_text(),text)
# The "html" is an error when not using HTMLChunker
chkr = SpellChecker("en_US",text=text,
filters=[enchant.tokenize.URLFilter])
for err in chkr:
self.assertEqual(err.word,"html")
break
self.assertEqual(chkr.get_text(),text)
# The "http" from the URL is an error when not using URLFilter
chkr = SpellChecker("en_US",text=text,
chunkers=[enchant.tokenize.HTMLChunker])
for err in chkr:
self.assertEqual(err.word,"http")
break
self.assertEqual(chkr.get_text(),text)
评论列表
文章目录