scoreword2veckeras.py 文件源码

python
阅读 27 收藏 0 点赞 0 评论 0

项目:word2vec-keras-in-gensim 作者: niitsuma 项目源码 文件源码
def build_keras_model_score_word_sg(index_size,vector_size,
                                    #vocab_size,
                                    context_size,
                                    #code_dim,
                                    score_vector_size,
                                    sub_batch_size=256,
                                    word_vectors=None,
                                    score_vectors=None,
                                    hidden_vectors=None,
                                    model=None
                                    ):
    """
    >>> word_vectors=np.array([[1,2,-1,1],[3,4,-1,-2],[5,6,-2,-2]])
    >>> score_vectors=np.array([[10,20,11,21,5,6,7,8],[30,40,33,41,9,8,7,6]])
    >>> hidden_vectors=np.array([[1,0,1,1],[0,1,1,1]])
    >>> sub_batch_size=3
    >>> vector_size=4
    >>> score_vector_size=2
    >>> kerasmodel=build_keras_model_score_word_sg(index_size=3,vector_size=vector_size,context_size=2,score_vector_size=score_vector_size,sub_batch_size=sub_batch_size,word_vectors=word_vectors,score_vectors=score_vectors,hidden_vectors=hidden_vectors)
    >>> ind=[[0,1,2],[1,2,0]]
    >>> ipt=[[1,0,1],[0,1,0]]
    >>> tmp1=kerasmodel.predict({'index':np.array(ind),'point':np.array(ipt)})
    >>> tmp3=np.array([[score_vectors[ipt[i][j]].reshape((score_vector_size,vector_size)).dot(word_vectors[ind[i][j]]) for j in range(sub_batch_size) ] for i in range(2)])
    >>> tmp2=np.array([[word_vectors[ind[i][j]].dot(hidden_vectors[ipt[i][j]].T) for j in range(sub_batch_size) ] for i in range(2)])
    >>> np.linalg.norm(1/(1+np.exp(-tmp2))-tmp1['code'])+np.linalg.norm(tmp1['score']-tmp3) < 0.0001
    True
    """

    kerasmodel = Graph()

    kerasmodel.add_input(name='point' , input_shape=(sub_batch_size,), dtype=int)
    kerasmodel.add_input(name='index' , input_shape=(sub_batch_size,), dtype=int)
    if word_vectors is None:
        kerasmodel.add_node(Embedding(index_size, vector_size, input_length=sub_batch_size                       ),name='embedding', input='index')
    else:
        kerasmodel.add_node(Embedding(index_size, vector_size, input_length=sub_batch_size,weights=[word_vectors]),name='embedding', input='index')
    if hidden_vectors is None:
        kerasmodel.add_node(Embedding(context_size, vector_size, input_length=sub_batch_size                        ),name='embedpoint', input='point')
    else:
        kerasmodel.add_node(Embedding(context_size, vector_size, input_length=sub_batch_size,weights=[hidden_vectors]),name='embedpoint', input='point')
    kerasmodel.add_node(Lambda(lambda x:x.sum(2))   , name='merge',inputs=['embedding','embedpoint'], merge_mode='mul')
    kerasmodel.add_node(Activation('sigmoid'), name='sigmoid', input='merge')
    kerasmodel.add_output(name='code',input='sigmoid')

    if score_vectors is None:
        kerasmodel.add_node(Embedding(context_size,  score_vector_size*vector_size, input_length=sub_batch_size,                       ),name='embedscore', input='point')
    else:
        kerasmodel.add_node(Embedding(context_size,  score_vector_size*vector_size, input_length=sub_batch_size,weights=[score_vectors]),name='embedscore', input='point')
    kerasmodel.add_node(Reshape((sub_batch_size,score_vector_size,vector_size,)) , name='score1',input='embedscore')

    kerasmodel.add_node(Flatten(), name='index1',input='embedding')
    kerasmodel.add_node(RepeatVector(score_vector_size), name='index2',input='index1')
    kerasmodel.add_node(Reshape((score_vector_size,sub_batch_size,vector_size,)) , name='index3',input='index2')
    kerasmodel.add_node(Permute((2,1,3,)) , name='index4',input='index3')

    kerasmodel.add_node(Lambda(lambda x:x.sum(-1))   , name='scorenode',inputs=['score1','index4'], merge_mode='mul')

    kerasmodel.add_output(name='score',input='scorenode')

    kerasmodel.compile('rmsprop', {'code':'mse','score':'mse'})
    return kerasmodel
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号