def __init__(self, incoming, num_units,
W_in_to_hid=init.Uniform(),
W_hid_to_hid=init.Uniform(),
b=init.Constant(0.),
nonlinearity=nonlinearities.rectify,
hid_init=init.Constant(0.),
backwards=False,
learn_init=False,
gradient_steps=-1,
grad_clipping=0,
unroll_scan=False,
precompute_input=True,
mask_input=None,
only_return_final=False,
p=0.,
**kwargs):
if isinstance(incoming, tuple):
input_shape = incoming
else:
input_shape = incoming.output_shape
# Retrieve the supplied name, if it exists; otherwise use ''
if 'name' in kwargs:
basename = kwargs['name'] + '.'
# Create a separate version of kwargs for the contained layers
# which does not include 'name'
layer_kwargs = dict((key, arg) for key, arg in kwargs.items()
if key != 'name')
else:
basename = ''
layer_kwargs = kwargs
# We will be passing the input at each time step to the dense layer,
# so we need to remove the second dimension (the time dimension)
in_to_hid = DenseLayer(InputLayer((None,) + input_shape[2:]),
num_units, W=W_in_to_hid, b=b,
nonlinearity=None,
name=basename + 'input_to_hidden',
**layer_kwargs)
# The hidden-to-hidden layer expects its inputs to have num_units
# features because it recycles the previous hidden state
hid_to_hid = DenseLayer(InputLayer((None, num_units)),
num_units, W=W_hid_to_hid, b=None,
nonlinearity=None,
name=basename + 'hidden_to_hidden',
**layer_kwargs)
# Make child layer parameters intuitively accessible
self.W_in_to_hid = in_to_hid.W
self.W_hid_to_hid = hid_to_hid.W
self.b = in_to_hid.b
# Just use the CustomRecurrentLayer with the DenseLayers we created
super(RecurrentLayer, self).__init__(
incoming, in_to_hid, hid_to_hid, nonlinearity=nonlinearity,
hid_init=hid_init, backwards=backwards, learn_init=learn_init,
gradient_steps=gradient_steps,
grad_clipping=grad_clipping, unroll_scan=unroll_scan,
precompute_input=precompute_input, mask_input=mask_input,
only_return_final=only_return_final, p=p, **kwargs)
评论列表
文章目录