def words_wo_stopwords(text):
"""
Cleans text from stop words.
"""
nltk_stopwords_list = stopwords.words('english')
specifics = load_stop_words(stop_word_file=join(settings.BASE_DIR, "aggregator", 'data', 'stop_words.txt'))
stopwords_list = list(set(nltk_stopwords_list + specifics + ["'s", "n't"]))
words = word_tokenize(strip_tags(text))
cleaned = [w for w in words if not w.lower() in stopwords_list]
text = " ".join(cleaned)
return text
评论列表
文章目录