def build_keras_model_dbow(index_size,vector_size,
#vocab_size,
context_size,
sub_batch_size=1,
doctag_vectors=None,
hidden_vectors=None,
learn_doctags=True,
learn_hidden=True,
model=None,
):
"""
>>> index_size=3
>>> vector_size=2
>>> context_siz=3
>>> sub_batch_size=2
>>> doctag_vectors=np.array([[-1.1,2.2],[-3.2,-4.3],[-1.1,-1.4]],'float32')
>>> hidden_vectors=np.array([[-1,2],[3,4],[5,6]],'float32')
>>> kerasmodel=build_keras_model_dbow(index_size=3,vector_size=2,context_size=3,sub_batch_size=2,doctag_vectors=doctag_vectors,hidden_vectors=hidden_vectors)
>>> ind=[[0,1],[1,0]]
>>> ipt=[[0,1],[1,2]]
>>> tmp1=kerasmodel.predict({'index':np.array(ind),'point':np.array(ipt)})['code']
>>> tmp2=np.array([np.sum(doctag_vectors[ind[i]]*hidden_vectors[ipt[i]], axis=1) for i in range(2)])
>>> np.linalg.norm(1/(1+np.exp(-tmp2))-tmp1) < 0.001
True
"""
kerasmodel = Graph()
kerasmodel.add_input(name='point' , input_shape=(sub_batch_size,), dtype=int)
kerasmodel.add_input(name='index' , input_shape=(sub_batch_size,), dtype=int)
if hidden_vectors is None :
kerasmodel.add_node(Embedding(context_size, vector_size, input_length=sub_batch_size, ),name='embedpoint', input='point')
else:
kerasmodel.add_node(Embedding(context_size, vector_size, input_length=sub_batch_size, weights=[hidden_vectors]),name='embedpoint', input='point')
if doctag_vectors is None :
kerasmodel.add_node(Embedding(index_size , vector_size, input_length=sub_batch_size, ),name='embedindex' , input='index')
else:
kerasmodel.add_node(Embedding(index_size , vector_size, input_length=sub_batch_size, weights=[doctag_vectors]),name='embedindex' , input='index')
kerasmodel.add_node(Lambda(lambda x:x.sum(2)) , name='merge',inputs=['embedindex','embedpoint'], merge_mode='mul')
kerasmodel.add_node(Activation('sigmoid'), name='sigmoid', input='merge')
kerasmodel.add_output(name='code',input='sigmoid')
kerasmodel.compile('rmsprop', {'code':'mse'})
return kerasmodel
评论列表
文章目录