def __init__(self, incoming, b=lasagne.init.Constant(0.), g=lasagne.init.Constant(1.),
W=lasagne.init.Normal(0.05), train_g=False, init_stdv=1., nonlinearity=relu, **kwargs):
super(WeightNormLayer, self).__init__(incoming, **kwargs)
self.nonlinearity = nonlinearity
self.init_stdv = init_stdv
k = self.input_shape[1]
if b is not None:
self.b = self.add_param(b, (k,), name="b", regularizable=False)
if g is not None:
self.g = self.add_param(g, (k,), name="g", regularizable=False, trainable=train_g)
if len(self.input_shape)==4:
self.axes_to_sum = (0,2,3)
self.dimshuffle_args = ['x',0,'x','x']
else:
self.axes_to_sum = 0
self.dimshuffle_args = ['x',0]
# scale weights in layer below
incoming.W_param = incoming.W
#incoming.W_param.set_value(W.sample(incoming.W_param.get_value().shape))
if incoming.W_param.ndim==4:
if isinstance(incoming, Deconv2DLayer):
W_axes_to_sum = (0,2,3)
W_dimshuffle_args = ['x',0,'x','x']
else:
W_axes_to_sum = (1,2,3)
W_dimshuffle_args = [0,'x','x','x']
else:
W_axes_to_sum = 0
W_dimshuffle_args = ['x',0]
if g is not None:
incoming.W = incoming.W_param * (self.g/T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum))).dimshuffle(*W_dimshuffle_args)
else:
incoming.W = incoming.W_param / T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum,keepdims=True))
评论列表
文章目录