def create_model(substreams, mask_shape, mask_var, lstm_size=250, output_classes=26,
fusiontype='concat', w_init_fn=las.init.Orthogonal(), use_peepholes=True):
gate_parameters = Gate(
W_in=w_init_fn, W_hid=w_init_fn,
b=las.init.Constant(0.))
cell_parameters = Gate(
W_in=w_init_fn, W_hid=w_init_fn,
# Setting W_cell to None denotes that no cell connection will be used.
W_cell=None, b=las.init.Constant(0.),
# By convention, the cell nonlinearity is tanh in an LSTM.
nonlinearity=tanh)
l_mask = InputLayer(mask_shape, mask_var, 'mask')
symbolic_seqlen_raw = l_mask.input_var.shape[1]
# We'll combine the forward and backward layer output by summing.
# Merge layers take in lists of layers to merge as input.
if fusiontype == 'adasum':
l_fuse = AdaptiveElemwiseSumLayer(substreams, name='adasum1')
elif fusiontype == 'sum':
l_fuse = ElemwiseSumLayer(substreams, name='sum1')
elif fusiontype == 'concat':
l_fuse = ConcatLayer(substreams, axis=-1, name='concat')
f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg')
l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')
# reshape to (num_examples * seq_len, lstm_size)
l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3')
# Now, we can apply feed-forward layers as usual.
# We want the network to predict a classification for the sequence,
# so we'll use a the number of classes.
l_softmax = DenseLayer(
l_reshape3, num_units=output_classes,
nonlinearity=las.nonlinearities.softmax, name='softmax')
l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_raw, output_classes), name='output')
return l_out, l_fuse
评论列表
文章目录