generate_pretrained_glove_sim_dist_diff_idf.py 文件源码

python
阅读 18 收藏 0 点赞 0 评论 0

项目:kaggle-quora-solution-8th 作者: qqgeogor 项目源码 文件源码
def calc_glove_sim(row,embedder,idf_dict):
    '''
    Calc glove similarities and diff of centers of query\title
    '''
    a2 = [x for x in remove_punctuation(row['question1']).lower().split() if x in embedder]
    b2 = [x for x in remove_punctuation(row['question2']).lower().split() if x in embedder]

    # if len(a2)>0 and len(b2)>0:
    #     glove_sim = embedder.n_similarity(a2, b2)
    # else:
    #     return((-1, -1, np.zeros(300)))

    vectorA = np.zeros(300)
    for w in a2:
        if w in idf_dict:
            coef = idf_dict[w]
        else:
            coef = idf_dict['default_idf']
        vectorA += coef*embedder[w]
    vectorA /= len(a2)

    vectorB = np.zeros(300)
    for w in b2:
        if w in idf_dict:
            coef = idf_dict[w]
        else:
            coef = idf_dict['default_idf']
        vectorB += coef*embedder[w]
    vectorB /= len(b2)

    vector_diff = (vectorA - vectorB)
    glove_sim = cosine(vectorA,vectorB)
    glove_vdiff_dist = np.sqrt(np.sum(vector_diff**2))
    return (glove_sim,glove_vdiff_dist, vector_diff)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号