clouds.py 文件源码-python代码片段

clouds.py 文件源码

python

阅读 28 收藏 0 点赞 0 评论 0

项目：KDDCUP2016 作者: hugochan 项目源码文件源码

def texts_similarity(terms1, terms2, freqs1, freqs2) :

    # Merge all terms
    terms = list(set(terms1 + terms2))

    npapers = freqs1.shape[0]
    sims = np.empty(npapers, np.float)

    for i in xrange(npapers) :

        # If one of the vectors is nil, skip it
        if (freqs1[i].sum()==0.0) or (freqs2[i].sum()==0.0) :
            continue

        # Changes representation to a {term: freq} map
        fmap1 = to_dict(terms1, freqs1.getrow(i).toarray()[0])
        fmap2 = to_dict(terms2, freqs2.getrow(i).toarray()[0])

        vec1, vec2 = to_same_dimension(terms, fmap1, fmap2)

        sims[i] = 1.0-cosine(vec1, vec2)

    return sims