utils.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:FreeDiscovery 作者: FreeDiscovery 项目源码 文件源码
def centroid_similarity(X, internal_ids, nn_metric='cosine'):
    """ Given a list of documents in a cluster, compute the cluster centroid,
    intertia and individual distances

    Parameters
    ----------
    internal_ids : list
      a list of internal ids
    nn_metric : str
      a rescaling of the metric if needed
    """
    from ..metrics import _scale_cosine_similarity
    from sklearn.metrics.pairwise import pairwise_distances

    X_sl = X[internal_ids, :]
    centroid = X_sl.mean(axis=0)

    if centroid.ndim == 1:
        centroid = centroid[None, :]

    S_cos = 1 - pairwise_distances(X_sl, centroid, metric='cosine')
    S_sim = _scale_cosine_similarity(S_cos, metric=nn_metric)
    S_sim_mean = np.mean(S_sim)
    return float(S_sim_mean), S_sim[:, 0]
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号