def make_multi_hierarch_cluster_figs(model, field_input):
def make_multi_cat_clust_fig(cats, freq_thr=500): # TODO make into config
"""
Returns fig showing hierarchical clustering of probes from multiple categories
"""
start = time.time()
sns.set_style('white')
# make cat_acts_mat
acts_mats = []
cats_probe_list = []
for cat in cats:
bool_index = [True if sum(model.term_doc_freq_dict[probe]) > freq_thr else False
for probe in model.probe_store.cat_probe_list_dict[cat]]
cat_probe_acts_df = model.get_single_cat_acts_df(cat)
filtered_cat_probes_acts_mat = cat_probe_acts_df[bool_index].values
acts_mats.append(filtered_cat_probes_acts_mat)
cats_probe_list += [model.probe_store.probe_set[probe_id]
for probe_id in cat_probe_acts_df[bool_index].index.tolist()]
cat_acts_mat = np.vstack((mat for mat in acts_mats))
# fig
rcParams['lines.linewidth'] = 2.0
fig, ax = plt.subplots(figsize=(FigsConfigs.MAX_FIG_WIDTH, 5 * len(cats)), dpi=FigsConfigs.DPI)
# dendrogram
dist_matrix = pdist(cat_acts_mat, 'euclidean')
linkages = linkage(dist_matrix, method='complete')
dendrogram(linkages,
ax=ax,
labels=cats_probe_list,
orientation='right',
leaf_font_size=10)
ax.tick_params(axis='both', which='both', top='off', right='off', left='off')
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
print('{} completed in {:.1f} secs'.format(sys._getframe().f_code.co_name, time.time() - start))
return fig
figs = [make_multi_cat_clust_fig(field_input)]
return figs
评论列表
文章目录