def _lemma_(token):
if isinstance(token, str):
return _stem_(token)
if isinstance(token, unicode):
return _stem_(token)
from nltk.corpus import wordnet
def get_wordnet_pos(treebank_tag):
if treebank_tag.startswith('J'):
return wordnet.ADJ
elif treebank_tag.startswith('V'):
return wordnet.VERB
elif treebank_tag.startswith('N'):
return wordnet.NOUN
elif treebank_tag.startswith('R'):
return wordnet.ADV
else:
return ''
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()
p = get_wordnet_pos(token.pos()[0][1])
if p!=wordnet.VERB:
return _stem_(token[0])
rs = wordnet_lemmatizer.lemmatize(token[0], pos=p)
return rs
评论列表
文章目录