def test_porter2():
from whoosh.lang.porter2 import stem
plurals = ['caresses', 'flies', 'dies', 'mules', 'denied',
'died', 'agreed', 'owned', 'humbled', 'sized',
'meeting', 'stating', 'siezing', 'itemization',
'sensational', 'traditional', 'reference', 'colonizer',
'plotted']
singles = [stem(w) for w in plurals]
assert singles == ['caress', 'fli', 'die', 'mule', 'deni', 'die',
'agre', 'own', 'humbl', 'size', 'meet', 'state',
'siez', 'item', 'sensat', 'tradit', 'refer',
'colon', 'plot']
assert stem("bill's") == "bill"
assert stem("y's") == "y"
#def test_pystemmer():
# Stemmer = pytest.importorskip("Stemmer")
#
# ana = (analysis.RegexTokenizer()
# | analysis.LowercaseFilter()
# | analysis.PyStemmerFilter())
# schema = fields.Schema(text=fields.TEXT(analyzer=ana))
# st = RamStorage()
#
# ix = st.create_index(schema)
# with ix.writer() as w:
# w.add_document(text=u("rains falling strangely"))
#
# ix = st.open_index()
# with ix.writer() as w:
# w.add_document(text=u("pains stalling strongly"))
#
# ix = st.open_index()
# with ix.reader() as r:
# assert (list(r.field_terms("text"))
# == ["fall", "pain", "rain", "stall", "strang", "strong"])
评论列表
文章目录