def build_BiGRU(incoming, num_units, mask=None, grad_clipping=0, precompute_input=True, dropout=True, in_to_out=False):
# construct the forward and backward grus. Now, Ws are initialized by Glorot initializer with default arguments.
# Need to try other initializers for specific tasks.
# dropout for incoming
if dropout:
incoming = lasagne.layers.DropoutLayer(incoming, p=0.5)
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# now use tanh for nonlinear function of hidden gate
hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
nonlinearity=nonlinearities.tanh)
gru_forward = lasagne.layers.GRULayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
precompute_input=precompute_input,
resetgate=resetgate_forward, updategate=updategate_forward,
hidden_update=hidden_forward, name='forward')
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# now use tanh for nonlinear function of hidden gate
hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
nonlinearity=nonlinearities.tanh)
gru_backward = lasagne.layers.GRULayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
precompute_input=precompute_input, backwards=True,
resetgate=resetgate_backward, updategate=updategate_backward,
hidden_update=hidden_backward, name='backward')
# concatenate the outputs of forward and backward GRUs to combine them.
concat = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru")
# dropout for output
if dropout:
concat = lasagne.layers.DropoutLayer(concat, p=0.5)
if in_to_out:
concat = lasagne.layers.concat([concat, incoming], axis=2)
# the shape of BiRNN output (concat) is (batch_size, input_length, 2 * num_hidden_units)
return concat
评论列表
文章目录