def clean_review(review,stopwords):
result = ""
lemmatizer = WordNetLemmatizer()
for word in review:
#converts the word to its lemma form
word = lemmatizer.lemmatize(word)
#adds the word to the resultant review only if its not a stopword
if word not in stopwords:
#removes all non-alphabet characters
word = re.sub('[^A-Za-z ]','',word)
if(len(word) != 0):
result += word+" "
return result
remove_stopwords_nltk.py 文件源码
python
阅读 18
收藏 0
点赞 0
评论 0
评论列表
文章目录