nn_vec.py 文件源码-python代码片段

def nn_vec_basic(arr1, arr2, topn, sort=True, return_sims=False, nthreads=8):
    """
    For each row in arr1 (m1 x d) find topn most similar rows from arr2 (m2 x d). Similarity is defined as dot product.
    Please note, that in the case of normalized rows in arr1 and arr2 dot product will be equal to cosine and will be
    monotonically decreasing function of Eualidean distance.
    :param arr1: array of vectors to find nearest neighbours for
    :param arr2: array of vectors to search for nearest neighbours in
    :param topn: number of nearest neighbours
    :param sort: indices in i-th row of returned array should sort corresponding rows of arr2 in descending order of
    similarity to i-th row of arr2
    :param return_sims: return similarities along with indices of nearest neighbours
    :param nthreads:
    :return: array (m1 x topn) where i-th row contains indices of rows in arr2 most similar to i-th row of m1, and, if
    return_sims=True, an array (m1 x topn) of corresponding similarities.
    """
    sims = np.dot(arr1, arr2.T)
    best_inds = argmaxk_rows(sims, topn, sort=sort, nthreads=nthreads)
    if not return_sims:
        return best_inds

    # generate row indices corresponding to best_inds (just current row id in each row) (m x k)
    rows = np.arange(best_inds.shape[0], dtype=np.intp)[:, np.newaxis].repeat(best_inds.shape[1], axis=1)
    return best_inds, sims[rows, best_inds]