def extract_tags(sentence,topK=20):
words = jieba.cut(sentence)
freq = {}
for w in words:
if len(w.strip())<2: continue
if w.lower() in stop_words: continue
freq[w]=freq.get(w,0.0)+1.0
total = sum(freq.values())
freq = [(k,v/total) for k,v in freq.iteritems()]
tf_idf_list = [(v * idf_freq.get(k,median_idf),k) for k,v in freq]
st_list = sorted(tf_idf_list,reverse=True)
top_tuples= st_list[:topK]
tags = [a[1] for a in top_tuples]
return tags
评论列表
文章目录