def build_model(tparams, leavesList, ancestorsList, options):
dropoutRate = options['dropoutRate']
trng = RandomStreams(123)
use_noise = theano.shared(numpy_floatX(0.))
x = T.tensor3('x', dtype=config.floatX)
y = T.tensor3('y', dtype=config.floatX)
mask = T.matrix('mask', dtype=config.floatX)
lengths = T.vector('lengths', dtype=config.floatX)
n_timesteps = x.shape[0]
n_samples = x.shape[1]
embList = []
for leaves, ancestors in zip(leavesList, ancestorsList):
tempAttention = generate_attention(tparams, leaves, ancestors)
tempEmb = (tparams['W_emb'][ancestors] * tempAttention[:,:,None]).sum(axis=1)
embList.append(tempEmb)
emb = T.concatenate(embList, axis=0)
x_emb = T.tanh(T.dot(x, emb))
hidden = gru_layer(tparams, x_emb, options)
hidden = dropout_layer(hidden, use_noise, trng, dropoutRate)
y_hat = softmax_layer(tparams, hidden) * mask[:,:,None]
logEps = 1e-8
cross_entropy = -(y * T.log(y_hat + logEps) + (1. - y) * T.log(1. - y_hat + logEps))
output_loglikelihood = cross_entropy.sum(axis=2).sum(axis=0) / lengths
cost_noreg = T.mean(output_loglikelihood)
if options['L2'] > 0.:
cost = cost_noreg + options['L2'] * ((tparams['W_output']**2).sum() + (tparams['W_attention']**2).sum() + (tparams['v_attention']**2).sum())
return use_noise, x, y, mask, lengths, cost, cost_noreg, y_hat
评论列表
文章目录