def create_pretrained_substream(weights, biases, input_shape, input_var, mask_shape, mask_var, name,
lstm_size=250, win=T.iscalar('theta'), nonlinearity=rectify,
w_init_fn=las.init.Orthogonal(), use_peepholes=True):
gate_parameters = Gate(
W_in=w_init_fn, W_hid=w_init_fn,
b=las.init.Constant(0.))
cell_parameters = Gate(
W_in=w_init_fn, W_hid=w_init_fn,
# Setting W_cell to None denotes that no cell connection will be used.
W_cell=None, b=las.init.Constant(0.),
# By convention, the cell nonlinearity is tanh in an LSTM.
nonlinearity=tanh)
l_input = InputLayer(input_shape, input_var, 'input_'+name)
l_mask = InputLayer(mask_shape, mask_var, 'mask')
symbolic_batchsize_raw = l_input.input_var.shape[0]
symbolic_seqlen_raw = l_input.input_var.shape[1]
l_reshape1_raw = ReshapeLayer(l_input, (-1, input_shape[-1]), name='reshape1_'+name)
l_encoder_raw = create_pretrained_encoder(l_reshape1_raw, weights, biases,
[2000, 1000, 500, 50],
[nonlinearity, nonlinearity, nonlinearity, linear],
['fc1_'+name, 'fc2_'+name, 'fc3_'+name, 'bottleneck_'+name])
input_len = las.layers.get_output_shape(l_encoder_raw)[-1]
l_reshape2 = ReshapeLayer(l_encoder_raw,
(symbolic_batchsize_raw, symbolic_seqlen_raw, input_len),
name='reshape2_'+name)
l_delta = DeltaLayer(l_reshape2, win, name='delta_'+name)
l_lstm = LSTMLayer(
l_delta, int(lstm_size), peepholes=use_peepholes,
# We need to specify a separate input for masks
mask_input=l_mask,
# Here, we supply the gate parameters for each gate
ingate=gate_parameters, forgetgate=gate_parameters,
cell=cell_parameters, outgate=gate_parameters,
# We'll learn the initialization and use gradient clipping
learn_init=True, grad_clipping=5., name='lstm_'+name)
return l_lstm
评论列表
文章目录