gram.py 文件源码-python代码片段

def build_model(tparams, leavesList, ancestorsList, options):
    dropoutRate = options['dropoutRate']
    trng = RandomStreams(123)
    use_noise = theano.shared(numpy_floatX(0.))

    x = T.tensor3('x', dtype=config.floatX)
    y = T.tensor3('y', dtype=config.floatX)
    mask = T.matrix('mask', dtype=config.floatX)
    lengths = T.vector('lengths', dtype=config.floatX)

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    embList = []
    for leaves, ancestors in zip(leavesList, ancestorsList):
        tempAttention = generate_attention(tparams, leaves, ancestors)
        tempEmb = (tparams['W_emb'][ancestors] * tempAttention[:,:,None]).sum(axis=1)
        embList.append(tempEmb)

    emb = T.concatenate(embList, axis=0)

    x_emb = T.tanh(T.dot(x, emb))
    hidden = gru_layer(tparams, x_emb, options)
    hidden = dropout_layer(hidden, use_noise, trng, dropoutRate)
    y_hat = softmax_layer(tparams, hidden) * mask[:,:,None]

    logEps = 1e-8
    cross_entropy = -(y * T.log(y_hat + logEps) + (1. - y) * T.log(1. - y_hat + logEps))
    output_loglikelihood = cross_entropy.sum(axis=2).sum(axis=0) / lengths
    cost_noreg = T.mean(output_loglikelihood)

    if options['L2'] > 0.:
        cost = cost_noreg + options['L2'] * ((tparams['W_output']**2).sum() + (tparams['W_attention']**2).sum() + (tparams['v_attention']**2).sum())

    return use_noise, x, y, mask, lengths, cost, cost_noreg, y_hat