def __init__(self, mode=NLTK_EXTENSIONS):
if mode not in (
self.NLTK_EXTENSIONS,
self.MARTIN_EXTENSIONS,
self.ORIGINAL_ALGORITHM
):
raise ValueError(
"Mode must be one of PorterStemmer.NLTK_EXTENSIONS, "
"PorterStemmer.MARTIN_EXTENSIONS, or "
"PorterStemmer.ORIGINAL_ALGORITHM"
)
self.mode = mode
if self.mode == self.NLTK_EXTENSIONS:
# This is a table of irregular forms. It is quite short,
# but still reflects the errors actually drawn to Martin
# Porter's attention over a 20 year period!
irregular_forms = {
"sky" : ["sky", "skies"],
"die" : ["dying"],
"lie" : ["lying"],
"tie" : ["tying"],
"news" : ["news"],
"inning" : ["innings", "inning"],
"outing" : ["outings", "outing"],
"canning" : ["cannings", "canning"],
"howe" : ["howe"],
"proceed" : ["proceed"],
"exceed" : ["exceed"],
"succeed" : ["succeed"],
}
self.pool = {}
for key in irregular_forms:
for val in irregular_forms[key]:
self.pool[val] = key
self.vowels = frozenset(['a', 'e', 'i', 'o', 'u'])
评论列表
文章目录