def stem(words,stem_dic,mode="nltk",silent=1):
if silent==0:
print("stem ...")
if mode == "nltk":
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()
else:
print("unknown mode",mode)
assert 0
for word in set(words):
if word not in stem_dic:
stem_dic[word] = stemmer.stem(word)
words = [stem_dic[word] for word in words]
return words
评论列表
文章目录