clouds.py 文件源码

python
阅读 26 收藏 0 点赞 0 评论 0

项目:KDDCUP2016 作者: hugochan 项目源码 文件源码
def random_similarity(terms1, terms2, freqs1, freqs2) :

    # Merge all terms
    terms = list(set(terms1 + terms2))

    npapers = freqs1.shape[0]
    sims = np.empty(npapers, np.float)

    for i in xrange(npapers) :
        a = random.randint(0,npapers-1)  #@UndefinedVariable
        b = random.randint(0,npapers-1)  #@UndefinedVariable

        # If one of the vectors is nil, skip it
        if (freqs1[a].sum()==0.0) or (freqs2[b].sum()==0.0) :
            continue

        # Changes representation to a {term: freq} map
        fmap1 = to_dict(terms1, freqs1[a].toarray()[0])
        fmap2 = to_dict(terms2, freqs2[b].toarray()[0])

        vec1, vec2 = to_same_dimension(terms, fmap1, fmap2)

        sims[i] = 1.0-cosine(vec1, vec2)

    return sims
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号