def wordcloud_visualization(corpus, topics, num_docs=None, min_df=0.1,
ngrams=1, weighting='tf', max_df=0.7, mds='pcoa',
*args, **kwargs):
font = pkg_resources.resource_filename(__name__,
"fonts/ZillaSlab-Medium.ttf")
print(font)
model, doc_term_matrix, vectorizer = build_model(
corpus, topics, num_docs, ngrams, weighting, min_df, max_df
)
prep_data = prepare(model.model, doc_term_matrix, vectorizer, mds=mds)
ti = prep_data.topic_info
topic_labels = ti.groupby(['Category']).groups.keys()
plt.clf()
topics = []
for label in topic_labels:
out = StringIO()
df = ti[ti.Category == label].sort_values(by='Total',
ascending=False)[:20]
tf = dict(df[['Term', 'Total']].to_dict('split')['data'])
wc = wordcloud.WordCloud(font_path=font, width=600, height=300,
background_color='white')
wc.fit_words(tf)
plt.imshow(wc)
plt.axis('off')
plt.savefig(out)
out.seek(0)
topics.append((label, out.read()))
return topics
"""
Category Freq Term Total loglift logprob
term
478 Default 738.000000 specie 738.000000 1.0000 1.0000
... ... ... ... ... ... ...
191 Topic10 25.344278 space 145.983738 1.8935 -5.0376
190 Topic10 32.076070 green 193.201661 1.8488 -4.8020
319 Topic10 12.129367 aspect 73.063725 1.8488 -5.7745
"""
评论列表
文章目录