test_analysis.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:zippy 作者: securesystemslab 项目源码 文件源码
def test_porter2():
    from whoosh.lang.porter2 import stem

    plurals = ['caresses', 'flies', 'dies', 'mules', 'denied',
               'died', 'agreed', 'owned', 'humbled', 'sized',
               'meeting', 'stating', 'siezing', 'itemization',
               'sensational', 'traditional', 'reference', 'colonizer',
               'plotted']
    singles = [stem(w) for w in plurals]

    assert singles == ['caress', 'fli', 'die', 'mule', 'deni', 'die',
                       'agre', 'own', 'humbl', 'size', 'meet', 'state',
                       'siez', 'item', 'sensat', 'tradit', 'refer',
                       'colon', 'plot']
    assert stem("bill's") == "bill"
    assert stem("y's") == "y"


#def test_pystemmer():
#    Stemmer = pytest.importorskip("Stemmer")
#
#    ana = (analysis.RegexTokenizer()
#           | analysis.LowercaseFilter()
#           | analysis.PyStemmerFilter())
#    schema = fields.Schema(text=fields.TEXT(analyzer=ana))
#    st = RamStorage()
#
#    ix = st.create_index(schema)
#    with ix.writer() as w:
#        w.add_document(text=u("rains falling strangely"))
#
#    ix = st.open_index()
#    with ix.writer() as w:
#        w.add_document(text=u("pains stalling strongly"))
#
#    ix = st.open_index()
#    with ix.reader() as r:
#        assert (list(r.field_terms("text"))
#                == ["fall", "pain", "rain", "stall", "strang", "strong"])
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号