def get_sils_matrix(method, scores, wordlist):
''' See get_sims_matrix for definitions, which are the same here. The
difference is that the resulting matrix contains distances instead of
similarities.
:return: 2-dimensional np.ndarray of size len(wordlist) x len(wordlist)
'''
if method =='direct':
sims = get_sims_matrix(method, scores, wordlist)
sims = preprocessing.normalize(np.matrix(sims), norm='l2')
sils = 1-sims
elif method == 'dict_cosine': # cosine dist of word-PPDB2.0Score matrix
sils = np.array([[dict_cosine_dist(scores.get(i,{}),scores.get(j,{})) for j in wordlist] for i in wordlist])
elif method == 'dict_JS': # JS divergence of word-PPDB2.0Score matrix
sils = np.array([[dict_js_divergence(scores.get(i,{}),scores.get(j,{}))[0] for j in wordlist] for i in wordlist])
elif method == 'vec_cosine':
d = scores.values()[0].shape[0]
sils = np.array([[cosine(scores.get(i,np.zeros(d)), scores.get(j,np.zeros(d))) for j in wordlist] for i in wordlist])
else:
sys.stderr.write('Unknown sil method: %s' % method)
return None
sils = np.nan_to_num(sils)
return sils
评论列表
文章目录