def rm_stop_words(data, mode="nltk",silent=1):
"""
Input:
data is a set, {} or Counter
"""
if silent==0:
print("remove stop words ...")
if mode == "nltk":
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
else:
print("unknown mode",mode)
assert 0
if isinstance(data,list):
data = [i for i in data if i.lower() not in stop_words]
return data
else:
for word in stop_words:
if word in data:
del data[word]
评论列表
文章目录