def build_BiLSTM(incoming, num_units, mask=None, grad_clipping=0, precompute_input=True, peepholes=False, dropout=True,
in_to_out=False):
# construct the forward and backward rnns. Now, Ws are initialized by Glorot initializer with default arguments.
# Need to try other initializers for specific tasks.
# dropout for incoming
if dropout:
incoming = lasagne.layers.DropoutLayer(incoming, p=0.5)
ingate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
outgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
forgetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
# now use tanh for nonlinear function of cell, need to try pure linear cell
cell_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
nonlinearity=nonlinearities.tanh)
lstm_forward = lasagne.layers.LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
nonlinearity=nonlinearities.tanh, peepholes=peepholes,
precompute_input=precompute_input,
ingate=ingate_forward, outgate=outgate_forward,
forgetgate=forgetgate_forward, cell=cell_forward, name='forward')
ingate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
outgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
forgetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
# now use tanh for nonlinear function of cell, need to try pure linear cell
cell_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
nonlinearity=nonlinearities.tanh)
lstm_backward = lasagne.layers.LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
nonlinearity=nonlinearities.tanh, peepholes=peepholes,
precompute_input=precompute_input, backwards=True,
ingate=ingate_backward, outgate=outgate_backward,
forgetgate=forgetgate_backward, cell=cell_backward, name='backward')
# concatenate the outputs of forward and backward RNNs to combine them.
concat = lasagne.layers.concat([lstm_forward, lstm_backward], axis=2, name="bi-lstm")
# dropout for output
if dropout:
concat = lasagne.layers.DropoutLayer(concat, p=0.5)
if in_to_out:
concat = lasagne.layers.concat([concat, incoming], axis=2)
# the shape of BiRNN output (concat) is (batch_size, input_length, 2 * num_hidden_units)
return concat
python类nonlinearities()的实例源码
def build_BiGRU(incoming, num_units, mask=None, grad_clipping=0, precompute_input=True, dropout=True, in_to_out=False):
# construct the forward and backward grus. Now, Ws are initialized by Glorot initializer with default arguments.
# Need to try other initializers for specific tasks.
# dropout for incoming
if dropout:
incoming = lasagne.layers.DropoutLayer(incoming, p=0.5)
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# now use tanh for nonlinear function of hidden gate
hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
nonlinearity=nonlinearities.tanh)
gru_forward = lasagne.layers.GRULayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
precompute_input=precompute_input,
resetgate=resetgate_forward, updategate=updategate_forward,
hidden_update=hidden_forward, name='forward')
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# now use tanh for nonlinear function of hidden gate
hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
nonlinearity=nonlinearities.tanh)
gru_backward = lasagne.layers.GRULayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
precompute_input=precompute_input, backwards=True,
resetgate=resetgate_backward, updategate=updategate_backward,
hidden_update=hidden_backward, name='backward')
# concatenate the outputs of forward and backward GRUs to combine them.
concat = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru")
# dropout for output
if dropout:
concat = lasagne.layers.DropoutLayer(concat, p=0.5)
if in_to_out:
concat = lasagne.layers.concat([concat, incoming], axis=2)
# the shape of BiRNN output (concat) is (batch_size, input_length, 2 * num_hidden_units)
return concat
def build_network():
conv_defs = {
'W': lasagne.init.HeNormal('relu'),
'b': lasagne.init.Constant(0.0),
'filter_size': (3, 3),
'stride': (1, 1),
'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1)
}
nin_defs = {
'W': lasagne.init.HeNormal('relu'),
'b': lasagne.init.Constant(0.0),
'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1)
}
dense_defs = {
'W': lasagne.init.HeNormal(1.0),
'b': lasagne.init.Constant(0.0),
'nonlinearity': lasagne.nonlinearities.softmax
}
wn_defs = {
'momentum': .999
}
net = InputLayer ( name='input', shape=(None, 3, 32, 32))
net = GaussianNoiseLayer(net, name='noise', sigma=.15)
net = WN(Conv2DLayer (net, name='conv1a', num_filters=128, pad='same', **conv_defs), **wn_defs)
net = WN(Conv2DLayer (net, name='conv1b', num_filters=128, pad='same', **conv_defs), **wn_defs)
net = WN(Conv2DLayer (net, name='conv1c', num_filters=128, pad='same', **conv_defs), **wn_defs)
net = MaxPool2DLayer (net, name='pool1', pool_size=(2, 2))
net = DropoutLayer (net, name='drop1', p=.5)
net = WN(Conv2DLayer (net, name='conv2a', num_filters=256, pad='same', **conv_defs), **wn_defs)
net = WN(Conv2DLayer (net, name='conv2b', num_filters=256, pad='same', **conv_defs), **wn_defs)
net = WN(Conv2DLayer (net, name='conv2c', num_filters=256, pad='same', **conv_defs), **wn_defs)
net = MaxPool2DLayer (net, name='pool2', pool_size=(2, 2))
net = DropoutLayer (net, name='drop2', p=.5)
net = WN(Conv2DLayer (net, name='conv3a', num_filters=512, pad=0, **conv_defs), **wn_defs)
net = WN(NINLayer (net, name='conv3b', num_units=256, **nin_defs), **wn_defs)
net = WN(NINLayer (net, name='conv3c', num_units=128, **nin_defs), **wn_defs)
net = GlobalPoolLayer (net, name='pool3')
net = WN(DenseLayer (net, name='dense', num_units=10, **dense_defs), **wn_defs)
return net
def batch_norm(layer, **kwargs):
"""
Apply batch normalization to an existing layer. This is a convenience
function modifying an existing layer to include batch normalization: It
will steal the layer's nonlinearity if there is one (effectively
introducing the normalization right before the nonlinearity), remove
the layer's bias if there is one (because it would be redundant), and add
a :class:`BatchNormLayer` and :class:`NonlinearityLayer` on top.
Parameters
----------
layer : A :class:`Layer` instance
The layer to apply the normalization to; note that it will be
irreversibly modified as specified above
**kwargs
Any additional keyword arguments are passed on to the
:class:`BatchNormLayer` constructor.
Returns
-------
BatchNormLayer or NonlinearityLayer instance
A batch normalization layer stacked on the given modified `layer`, or
a nonlinearity layer stacked on top of both if `layer` was nonlinear.
Examples
--------
Just wrap any layer into a :func:`batch_norm` call on creating it:
>>> from lasagne.layers import InputLayer, DenseLayer, batch_norm
>>> from lasagne.nonlinearities import tanh
>>> l1 = InputLayer((64, 768))
>>> l2 = batch_norm(DenseLayer(l1, num_units=500, nonlinearity=tanh))
This introduces batch normalization right before its nonlinearity:
>>> from lasagne.layers import get_all_layers
>>> [l.__class__.__name__ for l in get_all_layers(l2)]
['InputLayer', 'DenseLayer', 'BatchNormLayer', 'NonlinearityLayer']
"""
nonlinearity = getattr(layer, 'nonlinearity', None)
if nonlinearity is not None:
layer.nonlinearity = lasagne.nonlinearities.identity
if hasattr(layer, 'b') and layer.b is not None:
del layer.params[layer.b]
layer.b = None
layer = BatchNormLayer(layer, **kwargs)
if nonlinearity is not None:
layer = L.NonlinearityLayer(layer, nonlinearity)
return layer
def batch_norm(layer, **kwargs):
"""
Apply batch normalization to an existing layer. This is a convenience
function modifying an existing layer to include batch normalization: It
will steal the layer's nonlinearity if there is one (effectively
introducing the normalization right before the nonlinearity), remove
the layer's bias if there is one (because it would be redundant), and add
a :class:`BatchNormLayer` and :class:`NonlinearityLayer` on top.
Parameters
----------
layer : A :class:`Layer` instance
The layer to apply the normalization to; note that it will be
irreversibly modified as specified above
**kwargs
Any additional keyword arguments are passed on to the
:class:`BatchNormLayer` constructor.
Returns
-------
BatchNormLayer or NonlinearityLayer instance
A batch normalization layer stacked on the given modified `layer`, or
a nonlinearity layer stacked on top of both if `layer` was nonlinear.
Examples
--------
Just wrap any layer into a :func:`batch_norm` call on creating it:
>>> from lasagne.layers import InputLayer, DenseLayer, batch_norm
>>> from lasagne.nonlinearities import tanh
>>> l1 = InputLayer((64, 768))
>>> l2 = batch_norm(DenseLayer(l1, num_units=500, nonlinearity=tanh))
This introduces batch normalization right before its nonlinearity:
>>> from lasagne.layers import get_all_layers
>>> [l.__class__.__name__ for l in get_all_layers(l2)]
['InputLayer', 'DenseLayer', 'BatchNormLayer', 'NonlinearityLayer']
"""
nonlinearity = getattr(layer, 'nonlinearity', None)
if nonlinearity is not None:
layer.nonlinearity = lasagne.nonlinearities.identity
if hasattr(layer, 'b') and layer.b is not None:
del layer.params[layer.b]
layer.b = None
layer = BatchNormLayer(layer, **kwargs)
if nonlinearity is not None:
layer = L.NonlinearityLayer(layer, nonlinearity)
return layer