def build_recur_dropout(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p):
# Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout.
# first get some necessary dimensions or parameters
conv_window = 3
# shape = [batch, n-step, c_dim, char_length]
# construct convolution layer
# shape = [batch, n-step, c_filters, output_length]
cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
# infer the pool size for pooling (pool size should go through all time step of cnn)
_, _, _, pool_size = cnn_layer.output_shape
# construct max pool layer
# shape = [batch, n-step, c_filters, 1]
pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
# reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))
# finally, concatenate the two incoming layers together.
# shape = [batch, n-step, c_filter&w_dim]
incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)
# dropout for incoming
incoming = lasagne.layers.DropoutLayer(incoming, p=p, shared_axes=(1,))
ingate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
outgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
forgetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
# now use tanh for nonlinear function of cell, need to try pure linear cell
cell_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
nonlinearity=nonlinearities.tanh)
lstm_forward = LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
nonlinearity=nonlinearities.tanh, peepholes=False,
ingate=ingate_forward, outgate=outgate_forward,
forgetgate=forgetgate_forward, cell=cell_forward, p=p, name='forward')
ingate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
outgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
forgetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
# now use tanh for nonlinear function of cell, need to try pure linear cell
cell_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
nonlinearity=nonlinearities.tanh)
lstm_backward = LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
nonlinearity=nonlinearities.tanh, peepholes=False, backwards=True,
ingate=ingate_backward, outgate=outgate_backward,
forgetgate=forgetgate_backward, cell=cell_backward, p=p, name='backward')
# concatenate the outputs of forward and backward LSTMs to combine them.
bi_lstm_cnn = lasagne.layers.concat([lstm_forward, lstm_backward], axis=2, name="bi-lstm")
# shape = [batch, n-step, num_units]
bi_lstm_cnn = lasagne.layers.DropoutLayer(bi_lstm_cnn, p=p, shared_axes=(1,))
return ChainCRFLayer(bi_lstm_cnn, num_labels, mask_input=mask)
评论列表
文章目录