def cluster_scatter_plot(similarity_file):
def get_cmap(N):
'''Returns a function that maps each index in 0, 1, ... N-1 to a distinct
RGB color.'''
color_norm = colors.Normalize(vmin=0, vmax=N-1)
scalar_map = cmx.ScalarMappable(norm=color_norm, cmap='hsv')
def map_index_to_rgb_color(index):
return scalar_map.to_rgba(index)
return map_index_to_rgb_color
with open(similarity_file, 'r', 'utf-8') as f:
similarity_data = json.load(f)
labels = []
point_colors = []
num_clusters = len(similarity_data['cluster2doc'].keys())
cmap = get_cmap(num_clusters)
for model_name in similarity_data['model_names']:
model_name = os.path.splitext(os.path.basename(model_name))[0]
cluster_label = similarity_data['doc2cluster'][model_name]
point_colors.append(cmap(cluster_label))
labels.append(re.compile(r"\s\([0-9]*\)-iter.*", re.IGNORECASE).split(model_name, 1)[0])
embeddings = SpectralEmbedding(affinity='precomputed').fit_transform(np.array(similarity_data['similarity_matrix']))
fig, ax = plt.subplots()
x = embeddings[:, 0]
y = embeddings[:, 1]
annotes = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] * 10
N = 100
scatter = ax.scatter(x, y, c=point_colors[:],s=100*np.ones(shape=N))
tooltip = mpld3.plugins.PointLabelTooltip(scatter, labels=labels)
mpld3.plugins.connect(fig, tooltip)
mpld3.show()
# plt.scatter(tsne_embeddings[20:40, 0], tsne_embeddings[20:40, 1], c='b')
# for label, x, y in zip(labels, tsne_embeddings[:, 0], tsne_embeddings[:, 1]):
# plt.annotate(
# label,
# xy = (x, y),
# # textcoords = 'offset points',
# bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5))
# plt.show()
评论列表
文章目录