def calc_glove_sim(row,embedder,idf_dict):
'''
Calc glove similarities and diff of centers of query\title
'''
a2 = [x for x in remove_punctuation(row['question1']).lower().split() if x in embedder]
b2 = [x for x in remove_punctuation(row['question2']).lower().split() if x in embedder]
# if len(a2)>0 and len(b2)>0:
# glove_sim = embedder.n_similarity(a2, b2)
# else:
# return((-1, -1, np.zeros(300)))
vectorA = np.zeros(300)
for w in a2:
if w in idf_dict:
coef = idf_dict[w]
else:
coef = idf_dict['default_idf']
vectorA += coef*embedder[w]
vectorA /= len(a2)
vectorB = np.zeros(300)
for w in b2:
if w in idf_dict:
coef = idf_dict[w]
else:
coef = idf_dict['default_idf']
vectorB += coef*embedder[w]
vectorB /= len(b2)
vector_diff = (vectorA - vectorB)
glove_sim = cosine(vectorA,vectorB)
glove_vdiff_dist = np.sqrt(np.sum(vector_diff**2))
return (glove_sim,glove_vdiff_dist, vector_diff)
generate_pretrained_glove_sim_dist_diff_idf.py 文件源码
python
阅读 18
收藏 0
点赞 0
评论 0
评论列表
文章目录