def keyword_extractor(data):
try:
#np_extractor = NPExtractor(words_wo_stopwords(strip_tags(data)))
#result = np_extractor.extract()
text = words_wo_stopwords(strip_tags(data))
#TODO this is duplicated job, should be improved
words = word_tokenize(strip_tags(text))
taggged = pos_tag(words)
cleaned = filter_insignificant(taggged)
text = " ".join(cleaned)
wc = WordCloudMod().generate(text)
result = list(wc.keys())[:10]
except Exception as err:
print(colored.red("At keywords extraction {}".format(err)))
result = []
return result
# TODO definitely can be better if we knew where content is
评论列表
文章目录