def delNOTNeedWords(content,stopwords):
# words = jieba.lcut(content)
result=''
# for w in words:
# if w not in stopwords:
# result += w.encode('utf-8') # +"/"+str(w.flag)+" " #????
words = pseg.lcut(content)
# jieba.cut()
text_list = []
for word, flag in words:
# print word.encode('utf-8')
if (word not in stopwords and flag not in ["/x","/zg","/uj","/ul","/e","/d","/uz","/y"]): #??????????????????
# text_list.append(word.encode('utf-8'))
result += word.encode('utf-8') # +"/"+str(w.flag)+" " #????
# ''.join(text_list)
return result
# return ''.join(text_list)
评论列表
文章目录