matutils.py 文件源码

python
阅读 19 收藏 0 点赞 0 评论 0

项目:paragraph2vec 作者: thunlp 项目源码 文件源码
def cossim(vec1, vec2):
    """
    Return cosine similarity between two sparse vectors.
    The similarity is a number between <-1.0, 1.0>, higher is more similar.
    """
    vec1, vec2 = dict(vec1), dict(vec2)
    if not vec1 or not vec2:
        return 0.0
    vec1len = 1.0 * math.sqrt(sum(val * val for val in itervalues(vec1)))
    vec2len = 1.0 * math.sqrt(sum(val * val for val in itervalues(vec2)))
    assert vec1len > 0.0 and vec2len > 0.0, "sparse documents must not contain any explicit zero entries"
    if len(vec2) < len(vec1):
        vec1, vec2 = vec2, vec1 # swap references so that we iterate over the shorter vector
    result = sum(value * vec2.get(index, 0.0) for index, value in iteritems(vec1))
    result /= vec1len * vec2len # rescale by vector lengths
    return result
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号