tasks.py 文件源码-python代码片段

tasks.py 文件源码

python

阅读 40 收藏 0 点赞 0 评论 0

def keyword_extractor(data):
    try:
        #np_extractor = NPExtractor(words_wo_stopwords(strip_tags(data)))
        #result = np_extractor.extract()
        text = words_wo_stopwords(strip_tags(data))

        #TODO this is duplicated job, should be improved
        words = word_tokenize(strip_tags(text))
        taggged = pos_tag(words)
        cleaned = filter_insignificant(taggged)
        text = " ".join(cleaned)
        wc = WordCloudMod().generate(text)
        result = list(wc.keys())[:10]
    except Exception as err:
        print(colored.red("At keywords extraction {}".format(err)))
        result = []

    return result


# TODO definitely can be better if we knew where content is