porter.py 文件源码

python
阅读 28 收藏 0 点赞 0 评论 0

项目:hate-to-hugs 作者: sdoran35 项目源码 文件源码
def __init__(self, mode=NLTK_EXTENSIONS):
        if mode not in (
            self.NLTK_EXTENSIONS,
            self.MARTIN_EXTENSIONS,
            self.ORIGINAL_ALGORITHM
        ):
            raise ValueError(
                "Mode must be one of PorterStemmer.NLTK_EXTENSIONS, "
                "PorterStemmer.MARTIN_EXTENSIONS, or "
                "PorterStemmer.ORIGINAL_ALGORITHM"
            )

        self.mode = mode

        if self.mode == self.NLTK_EXTENSIONS:
            # This is a table of irregular forms. It is quite short,
            # but still reflects the errors actually drawn to Martin
            # Porter's attention over a 20 year period!
            irregular_forms = {
                "sky" :     ["sky", "skies"],
                "die" :     ["dying"],
                "lie" :     ["lying"],
                "tie" :     ["tying"],
                "news" :    ["news"],
                "inning" :  ["innings", "inning"],
                "outing" :  ["outings", "outing"],
                "canning" : ["cannings", "canning"],
                "howe" :    ["howe"],
                "proceed" : ["proceed"],
                "exceed"  : ["exceed"],
                "succeed" : ["succeed"],
            }

            self.pool = {}
            for key in irregular_forms:
                for val in irregular_forms[key]:
                    self.pool[val] = key

        self.vowels = frozenset(['a', 'e', 'i', 'o', 'u'])
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号