def texts_similarity(terms1, terms2, freqs1, freqs2) :
# Merge all terms
terms = list(set(terms1 + terms2))
npapers = freqs1.shape[0]
sims = np.empty(npapers, np.float)
for i in xrange(npapers) :
# If one of the vectors is nil, skip it
if (freqs1[i].sum()==0.0) or (freqs2[i].sum()==0.0) :
continue
# Changes representation to a {term: freq} map
fmap1 = to_dict(terms1, freqs1.getrow(i).toarray()[0])
fmap2 = to_dict(terms2, freqs2.getrow(i).toarray()[0])
vec1, vec2 = to_same_dimension(terms, fmap1, fmap2)
sims[i] = 1.0-cosine(vec1, vec2)
return sims
评论列表
文章目录