def normalize(self, words):
"""
Normalizes a list of words.
"""
# Add part of speech tags to the words
words = nltk.pos_tag(words)
for word, tag in words:
if self.lower: word = word.lower()
if self.strip: word = word.strip()
if word not in self.stopwords:
if not all(c in self.punct for c in word):
if self.lemmatize:
word = self.lemmatizer.lemmatize(word, tag)
yield word
评论列表
文章目录