def get_lemmas(sent, lemmatizer):
stop_words = []
res = []
for word in sent:
pos = get_wordnet_pos(nltk.pos_tag([word])[0][1])
if pos == '':
lemma = lemmatizer.lemmatize(word)
else:
lemma = lemmatizer.lemmatize(word, pos)
#if(type(lemma) == unicode):
# lemma = lemma.encode('ascii', 'ignore')
if lemma.isdigit():
res.append('number')
else:
res.append(lemma)
return res
评论列表
文章目录