def build_RNN(architec, layer_input, layer_mask, num_units, grad_clipping):
def build_GRU(reset_input):
resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
return GRULayer(layer_input, num_units, mask_input=layer_mask, grad_clipping=grad_clipping,
resetgate=resetgate, updategate=updategate, hidden_update=hiden_update,
reset_input=reset_input, only_return_final=True, p=0.5, name='GRU')
def build_LSTM():
ingate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
outgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
forgetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
# now use tanh for nonlinear function of cell, need to try pure linear cell
cell = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
return LSTMLayer(layer_input, num_units, mask_input=layer_mask, grad_clipping=grad_clipping,
ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate,
peepholes=False, nonlinearity=nonlinearities.tanh,
only_return_final=True, p=0.5, name='LSTM')
def build_SGRU():
resetgate_hidden = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.GlorotUniform())
resetgate_input = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.GlorotUniform())
updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.GlorotUniform())
hidden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
return SGRULayer(layer_input, num_units, mask_input=layer_mask, grad_clipping=grad_clipping,
resetgate_input=resetgate_input, resetgate_hidden=resetgate_hidden,
updategate=updategate, hidden_update=hidden_update,
only_return_final=True, p=0.5, name='SGRU')
if architec == 'gru0':
return build_GRU(False)
elif architec == 'gru1':
return build_GRU(True)
elif architec == 'lstm':
return build_LSTM()
elif architec == 'sgru':
return build_SGRU()
else:
raise ValueError('unkown architecture: %s' % architec)
评论列表
文章目录