utils.py 文件源码-python代码片段

def check_similarity_match(X_embed, S):
    """
    Since SimEcs are supposed to project the data into an embedding space where the target similarities
    can be linearly approximated, check if X_embed*X_embed^T = S
    (check mean squared error and Spearman correlation coefficient)
    Inputs:
        - X_embed: Nxd matrix with coordinates in the embedding space
        - S: NxN matrix with target similarities (do whatever transformations were done before using this
             as input to the SimEc, e.g. centering, etc.)
    Returns:
        - msq, rho, r: mean squared error, Spearman and Pearson correlation coefficent between linear kernel of embedding
                       and target similarities (mean squared error is more exact, corrcoef a more relaxed error measure)
    """
    # compute linear kernel as approximated similarities
    S_approx = X_embed.dot(X_embed.T)
    # to get results that are comparable across similarity measures, we have to normalize them somehow,
    # in this case by dividing by the absolute max value of the target similarity matrix
    n = np.max(np.abs(S))
    S_norm = S/n
    S_approx /= n
    # compute mean squared error
    msqe = np.mean((S_norm - S_approx) ** 2)
    # compute Spearman correlation coefficient
    rho = spearmanr(S_norm.flatten(), S_approx.flatten())[0]
    # compute Pearson correlation coefficient
    r = pearsonr(S_norm.flatten(), S_approx.flatten())[0]
    return msqe, rho, r