ar.py 文件源码-python代码片段

ar.py 文件源码

python

阅读 20 收藏 0 点赞 0 评论 0

项目：recommended_system 作者: wac81 项目源码文件源码

def delNOTNeedWords(content,stopwords):
    # words = jieba.lcut(content)
    result=''
    # for w in words:
    #     if w not in stopwords:
    #         result += w.encode('utf-8')  # +"/"+str(w.flag)+" "  #????

    words = pseg.lcut(content)
    # jieba.cut()
    text_list = []
    for word, flag in words:
        # print word.encode('utf-8')
        if (word not in stopwords and flag not in ["/x","/zg","/uj","/ul","/e","/d","/uz","/y"]): #??????????????????
            # text_list.append(word.encode('utf-8'))
            result += word.encode('utf-8')  # +"/"+str(w.flag)+" "  #????
        # ''.join(text_list)
    return result
    # return ''.join(text_list)