evaluate.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:w2vec-similarity 作者: jayantj 项目源码 文件源码
def cluster_scatter_plot(similarity_file):
  def get_cmap(N):
    '''Returns a function that maps each index in 0, 1, ... N-1 to a distinct 
    RGB color.'''
    color_norm  = colors.Normalize(vmin=0, vmax=N-1)
    scalar_map = cmx.ScalarMappable(norm=color_norm, cmap='hsv') 
    def map_index_to_rgb_color(index):
        return scalar_map.to_rgba(index)
    return map_index_to_rgb_color

  with open(similarity_file, 'r', 'utf-8') as f:
    similarity_data = json.load(f)
  labels = []
  point_colors = []
  num_clusters = len(similarity_data['cluster2doc'].keys())
  cmap = get_cmap(num_clusters)
  for model_name in similarity_data['model_names']:
    model_name = os.path.splitext(os.path.basename(model_name))[0]
    cluster_label = similarity_data['doc2cluster'][model_name]
    point_colors.append(cmap(cluster_label))
    labels.append(re.compile(r"\s\([0-9]*\)-iter.*", re.IGNORECASE).split(model_name, 1)[0])
  embeddings = SpectralEmbedding(affinity='precomputed').fit_transform(np.array(similarity_data['similarity_matrix']))
  fig, ax = plt.subplots()
  x = embeddings[:, 0]
  y = embeddings[:, 1]
  annotes = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] * 10
  N = 100
  scatter = ax.scatter(x, y, c=point_colors[:],s=100*np.ones(shape=N))
  tooltip = mpld3.plugins.PointLabelTooltip(scatter, labels=labels)
  mpld3.plugins.connect(fig, tooltip)
  mpld3.show()
  # plt.scatter(tsne_embeddings[20:40, 0], tsne_embeddings[20:40, 1], c='b')
  # for label, x, y in zip(labels, tsne_embeddings[:, 0], tsne_embeddings[:, 1]):
  #   plt.annotate(
  #       label, 
  #       xy = (x, y),
  #       # textcoords = 'offset points',
  #       bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5))
  # plt.show()
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号