def __init__(self, incoming, num_units, max_steps, peepholes=False, mask_input=None, **kwargs):
"""
initialization
:param incoming: bidirectional mLSTM for passane
:param num_units:
:param max_steps: max num steps to generate answer words, can be tensor scalar variable
:param peepholes:
:param mask_input: passage's length mask
:param kwargs:
"""
super(AnsPointerLayer, self).__init__(incoming, num_units, peepholes=peepholes,
precompute_input=False, mask_input=mask_input,
only_return_final=False, **kwargs)
self.max_steps = max_steps
# initializes attention weights
input_shape = self.input_shapes[0]
num_inputs = np.prod(input_shape[2:])
self.V_pointer = self.add_param(init.Normal(0.1), (num_inputs, num_units), 'V_pointer')
# doesn't need transpose
self.v_pointer = self.add_param(init.Normal(0.1), (num_units, 1), 'v_pointer')
self.W_a_pointer = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_a_pointer')
self.b_a_pointer = self.add_param(init.Constant(0.), (1, num_units), 'b_a_pointer')
self.c_pointer = self.add_param(init.Constant(0.), (1, 1), 'c_pointer')
python类Normal()的实例源码
def __init__(self, incoming, num_units, max_steps, peepholes=False, mask_input=None, **kwargs):
"""
initialization
:param incoming: bidirectional mLSTM for passane
:param num_units:
:param max_steps: max num steps to generate answer words, can be tensor scalar variable
:param peepholes:
:param mask_input: passage's length mask
:param kwargs:
"""
super(AnsPointerLayer, self).__init__(incoming, num_units, peepholes=peepholes,
precompute_input=False, mask_input=mask_input,
only_return_final=False, **kwargs)
self.max_steps = max_steps
# initializes attention weights
input_shape = self.input_shapes[0]
num_inputs = np.prod(input_shape[2:])
self.V_pointer = self.add_param(init.Normal(0.1), (num_inputs, num_units), 'V_pointer')
# doesn't need transpose
self.v_pointer = self.add_param(init.Normal(0.1), (num_units, 1), 'v_pointer')
self.W_a_pointer = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_a_pointer')
self.b_a_pointer = self.add_param(init.Constant(0.), (num_units, ), 'b_a_pointer')
c_pointer = theano.shared(np.array([0.], dtype='float32'), name='c_pointer', broadcastable=(True, ))
self.c_pointer = self.add_param(c_pointer, (1,), 'c_pointer')
def __init__(self, incoming, n_slots, d_slots, C=init.GlorotUniform(), M=init.Normal(),
b=init.Constant(0.), nonlinearity_final=nonlinearities.identity,
**kwargs):
super(MemoryLayer, self).__init__(incoming, **kwargs)
self.nonlinearity_final = nonlinearity_final
self.n_slots = n_slots
self.d_slots = d_slots
num_inputs = int(np.prod(self.input_shape[1:]))
self.C = self.add_param(C, (num_inputs, n_slots), name="C") # controller
self.M = self.add_param(M, (n_slots, d_slots), name="M") # memory slots
if b is None:
self.b = None
else:
self.b = self.add_param(b, (n_slots,), name="b",
regularizable=False)
def discriminator(input_var):
network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
input_var=input_var)
network = ll.DropoutLayer(network, p=0.5)
network = nn.weight_norm(dnn.Conv2DDNNLayer(network, 64, (4,4), pad='valid', W=Normal(0.05), nonlinearity=nn.lrelu))
network = nn.weight_norm(dnn.Conv2DDNNLayer(network, 32, (5,5), stride=2, pad='valid', W=Normal(0.05), nonlinearity=nn.lrelu))
network = nn.weight_norm(dnn.Conv2DDNNLayer(network, 32, (5,5), pad='valid', W=Normal(0.05), nonlinearity=nn.lrelu))
network = nn.weight_norm(dnn.Conv2DDNNLayer(network, 32, (5,5), pad='valid', W=Normal(0.05), nonlinearity=nn.lrelu))
network = nn.weight_norm(dnn.Conv2DDNNLayer(network, 16, (3,3), pad='valid', W=Normal(0.05), nonlinearity=nn.lrelu))
network =nn.weight_norm(ll.DenseLayer(network, num_units=1, W=Normal(0.05), nonlinearity=None), train_g=True, init_stdv=0.1)
return network
def generator(input_var):
network = lasagne.layers.InputLayer(shape=(None, NLAT,1,1),
input_var=input_var)
network = ll.DenseLayer(network, num_units=4*4*64, W=Normal(0.05), nonlinearity=nn.relu)
#print(input_var.shape[0])
network = ll.ReshapeLayer(network, (batch_size,64,4,4))
network = nn.Deconv2DLayer(network, (batch_size,32,7,7), (4,4), stride=(1,1), pad='valid', W=Normal(0.05), nonlinearity=nn.relu)
network = nn.Deconv2DLayer(network, (batch_size,32,11,11), (5,5), stride=(1,1), pad='valid', W=Normal(0.05), nonlinearity=nn.relu)
network = nn.Deconv2DLayer(network, (batch_size,32,25,25), (5,5), stride=(2,2), pad='valid', W=Normal(0.05), nonlinearity=nn.relu)
network = nn.Deconv2DLayer(network, (batch_size,1,28,28), (4,4), stride=(1,1), pad='valid', W=Normal(0.05), nonlinearity=sigmoid)
#network =lasagne.layers.Conv2DLayer(network, num_filters=1, filter_size=1, stride=1, nonlinearity=sigmoid)
return network
# In[23]:
def __init__(self, vocab_size, learning_rate=LEARNING_RATE, grad_clip=GRAD_CLIP, init_embedding=Normal()):
self.vocab_size = vocab_size
self.lr = learning_rate
self.gc = grad_clip
self.W = init_embedding
if USE_GRU:
self.rnn_layer = GRULayer
else:
self.rnn_layer = LSTMLayer
if CONSTANTLY_FEED_HIDDEN_STATE:
self.net = self._get_feed_net() # seq2seq v2
else:
self.net = self._get_net() # seq2seq v1
self.train = self._get_train_fun()
self.predict = self._get_predict_fun()
# self.encode = self._get_encoder_fun()
# self.decode = self._get_decoder_fun()
# self.embedding = self._get_embedding_fun()
# self.slicing = self._get_slice_fun()
# self.decoding = self._get_dec_fun()
def __init__(self, W_in=init.Normal(0.1), W_hid=init.Normal(0.1),
W_cell=init.Normal(0.1), W_to=init.Normal(0.1),
b=init.Constant(0.),
nonlinearity=nonlinearities.sigmoid):
self.W_in = W_in
self.W_hid = W_hid
self.W_to = W_to
# Don't store a cell weight vector when cell is None
if W_cell is not None:
self.W_cell = W_cell
self.b = b
# For the nonlinearity, if None is supplied, use identity
if nonlinearity is None:
self.nonlinearity = nonlinearities.identity
else:
self.nonlinearity = nonlinearity
def __init__(self, incoming, n_slots, d_slots, C=init.GlorotUniform(), M=init.Normal(),
b=init.Constant(0.), nonlinearity_final=nonlinearities.identity,
**kwargs):
super(MemoryLayer, self).__init__(incoming, **kwargs)
self.nonlinearity_final = nonlinearity_final
self.n_slots = n_slots
self.d_slots = d_slots
num_inputs = int(np.prod(self.input_shape[1:]))
self.C = self.add_param(C, (num_inputs, n_slots), name="C") # controller
self.M = self.add_param(M, (n_slots, d_slots), name="M") # memory slots
if b is None:
self.b = None
else:
self.b = self.add_param(b, (n_slots,), name="b",
regularizable=False)
def __init__(self, incoming, num_filters, filter_size, stride=(1, 1),
crop=0, untie_biases=False,
W=initmethod(), b=lasagne.init.Constant(0.),
nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False,
**kwargs):
super(DeconvLayer, self).__init__(
incoming, num_filters, filter_size, stride, crop, untie_biases,
W, b, nonlinearity, flip_filters, n=2, **kwargs)
# rename self.crop to self.pad
self.crop = self.pad
del self.pad
def InceptionUpscaleLayer(incoming,param_dict,block_name):
branch = [0]*len(param_dict)
# Loop across branches
for i,dict in enumerate(param_dict):
for j,style in enumerate(dict['style']): # Loop up branch
branch[i] = TC2D(
incoming = branch[i] if j else incoming,
num_filters = dict['num_filters'][j],
filter_size = dict['filter_size'][j],
crop = dict['pad'][j] if 'pad' in dict else None,
stride = dict['stride'][j],
W = initmethod('relu'),
nonlinearity = dict['nonlinearity'][j],
name = block_name+'_'+str(i)+'_'+str(j)) if style=='convolutional'\
else NL(
incoming = lasagne.layers.dnn.Pool2DDNNLayer(
incoming = lasagne.layers.Upscale2DLayer(
incoming=incoming if j == 0 else branch[i],
scale_factor = dict['stride'][j]),
pool_size = dict['filter_size'][j],
stride = [1,1],
mode = dict['mode'][j],
pad = dict['pad'][j],
name = block_name+'_'+str(i)+'_'+str(j)),
nonlinearity = dict['nonlinearity'][j])
# Apply Batchnorm
branch[i] = BN(branch[i],name = block_name+'_bnorm_'+str(i)+'_'+str(j)) if dict['bnorm'][j] else branch[i]
# Concatenate Sublayers
return CL(incomings=branch,name=block_name)
# Convenience function to efficiently generate param dictionaries for use with InceptioNlayer
def pd(num_layers=2,num_filters=32,filter_size=(3,3),pad=1,stride = (1,1),nonlinearity=elu,style='convolutional',bnorm=1,**kwargs):
input_args = locals()
input_args.pop('num_layers')
return {key:entry if type(entry) is list else [entry]*num_layers for key,entry in input_args.iteritems()}
# Possible Conv2DDNN convenience function. Remember to delete the C2D import at the top if you use this
# def C2D(incoming = None, num_filters = 32, filter_size= [3,3],pad = 'same',stride = [1,1], W = initmethod('relu'),nonlinearity = elu,name = None):
# return lasagne.layers.dnn.Conv2DDNNLayer(incoming,num_filters,filter_size,stride,pad,False,W,None,nonlinearity,False)
# Shape-Preserving Gaussian Sample layer for latent vectors with spatial dimensions.
# This is a holdover from an "old" (i.e. I abandoned it last month) idea.
def __init__(self, incoming, num_units, peepholes=True,
backwards=False, mask_input=None, only_return_final=True,
encoder_input=None, encoder_mask_input=None, **kwargs):
super(MatchLSTM, self).__init__(incoming, num_units, peepholes=peepholes,
backwards=backwards,
precompute_input=False, mask_input=mask_input,
only_return_final=only_return_final, **kwargs)
# encoder mask
self.encoder_input_incoming_index = -1
self.encoder_mask_incoming_index = -1
if encoder_mask_input is not None:
self.input_layers.append(encoder_mask_input)
self.input_shapes.append(encoder_mask_input.output_shape)
self.encoder_mask_incoming_index = len(self.input_layers) - 1
if encoder_input is not None:
self.input_layers.append(encoder_input)
encoder_input_output_shape = encoder_input.output_shape
self.input_shapes.append(encoder_input_output_shape)
self.encoder_input_incoming_index = len(self.input_layers) - 1
# hidden state length should equal to embedding size
assert encoder_input_output_shape[-1] == num_units
# input features length should equal to embedding size plus hidden state length
assert encoder_input_output_shape[-1] + num_units == self.input_shapes[0][-1]
# initializes attention weights
self.W_y_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'V_pointer')
self.W_h_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_h_attend')
# doesn't need transpose
self.w_attend = self.add_param(init.Normal(0.1), (num_units, 1), 'v_pointer')
self.W_m_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_a_pointer')
def __init__(self, incoming, n_slots, d_slots, M=init.Normal(), nonlinearity_final=nonlinearities.identity,
**kwargs):
super(SeparateMemoryLayer, self).__init__(incoming, **kwargs)
self.nonlinearity_final = nonlinearity_final
self.n_slots = n_slots
self.d_slots = d_slots
self.M = self.add_param(M, (n_slots, d_slots), name="M") # memory slots
def __init__(self, incomings, voc_size, hid_state_size, W=Normal(), **kwargs):
# Initialize parameters and create theano variables
super(SemMemModule, self).__init__(incomings, **kwargs)
self.hid_state_size = hid_state_size
self.W = self.add_param(W, (voc_size, hid_state_size), name='Word_Embedding', regularizable=False)
self.rand_stream = RandomStreams(np.random.randint(1, 2147462579))
def __init__(self, W_in=Normal(0.1), W_hid=Normal(0.1),
b=Constant(0.), nonlinearity=nonlin.sigmoid):
self.W_in = W_in
self.W_hid = W_hid
self.b = b
if nonlinearity is None:
self.nonlinearity = nonlin.identity
else:
self.nonlinearity = nonlinearity
def __init__(self, incomings, hid_state_size, voc_size,
resetgate = GRU_Gate(), updategate = GRU_Gate(),
hid_update = GRU_Gate(nonlinearity=nonlin.tanh),
W=Normal(), max_answer_word=1, **kwargs):
super(AnswerModule, self).__init__(incomings, **kwargs)
self.hid_state_size = hid_state_size
#FOR GRU
input_shape = self.input_shapes[0]
num_inputs = np.prod(input_shape[1]) + voc_size # concatenation of previous prediction
def add_gate(gate, gate_name):
return (self.add_param(gate.W_in, (num_inputs, hid_state_size),
name="W_in_to_{}".format(gate_name)),
self.add_param(gate.W_hid, (hid_state_size, hid_state_size),
name="W_hid_to_{}".format(gate_name)),
self.add_param(gate.b, (hid_state_size,),
name="b_{}".format(gate_name), regularizable=False),
gate.nonlinearity)
# Add in all parameters from gates
(self.W_in_to_updategate,
self.W_hid_to_updategate,
self.b_updategate,
self.nonlinearity_updategate)= add_gate(updategate, 'updategate')
(self.W_in_to_resetgate,
self.W_hid_to_resetgate,
self.b_resetgate,
self.nonlinearity_resetgate) = add_gate(resetgate, 'resetgate')
(self.W_in_to_hid_update,
self.W_hid_to_hid_update,
self.b_hid_update,
self.nonlinearity_hid) = add_gate(hid_update, 'hid_update')
self.W = self.add_param(W, (hid_state_size, voc_size), name="W")
self.max_answer_word = max_answer_word
self.rand_stream = RandomStreams(np.random.randint(1, 2147462579))
def __init__(self, W_g=init.Normal(0.1), W_s=init.Normal(0.1),
W_h=init.Normal(0.1), W_v=init.Normal(0.1),
nonlinearity=nonlinearities.softmax):
self.W_s = W_s
self.W_h = W_h
self.W_g = W_g
self.W_v = W_v
if nonlinearity is None:
self.nonlinearity = nonlinearities.identity
else:
self.nonlinearity = nonlinearity
def __init__(self, incoming, num_centers,
locs=init.Normal(std=1), log_sigma=init.Constant(0.),
**kwargs):
super(RBFLayer, self).__init__(incoming, **kwargs)
self.num_centers = num_centers
assert len(self.input_shape) == 2
in_dim = self.input_shape[1]
self.locs = self.add_param(locs, (num_centers, in_dim), name='locs',
regularizable=False)
self.log_sigma = self.add_param(log_sigma, (), name='log_sigma')
def __init__(self, incoming, num_freqs,
freqs=init.Normal(std=1), log_sigma=init.Constant(0.),
**kwargs):
super(SmoothedCFLayer, self).__init__(incoming, **kwargs)
self.num_freqs = num_freqs
assert len(self.input_shape) == 2
in_dim = self.input_shape[1]
self.freqs = self.add_param(freqs, (num_freqs, in_dim), name='freqs')
self.log_sigma = self.add_param(log_sigma, (), name='log_sigma')
def __init__(self, incoming, b=lasagne.init.Constant(0.), g=lasagne.init.Constant(1.),
W=lasagne.init.Normal(0.05), train_g=False, init_stdv=1., nonlinearity=relu, **kwargs):
super(WeightNormLayer, self).__init__(incoming, **kwargs)
self.nonlinearity = nonlinearity
self.init_stdv = init_stdv
k = self.input_shape[1]
if b is not None:
self.b = self.add_param(b, (k,), name="b", regularizable=False)
if g is not None:
self.g = self.add_param(g, (k,), name="g", regularizable=False, trainable=train_g)
if len(self.input_shape)==4:
self.axes_to_sum = (0,2,3)
self.dimshuffle_args = ['x',0,'x','x']
else:
self.axes_to_sum = 0
self.dimshuffle_args = ['x',0]
# scale weights in layer below
incoming.W_param = incoming.W
#incoming.W_param.set_value(W.sample(incoming.W_param.get_value().shape))
if incoming.W_param.ndim==4:
if isinstance(incoming, Deconv2DLayer):
W_axes_to_sum = (0,2,3)
W_dimshuffle_args = ['x',0,'x','x']
else:
W_axes_to_sum = (1,2,3)
W_dimshuffle_args = [0,'x','x','x']
else:
W_axes_to_sum = 0
W_dimshuffle_args = ['x',0]
if g is not None:
incoming.W = incoming.W_param * (self.g/T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum))).dimshuffle(*W_dimshuffle_args)
else:
incoming.W = incoming.W_param / T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum,keepdims=True))
def __init__(self, incoming, target_shape, filter_size, stride=(2, 2), pad='half',
W=lasagne.init.Normal(0.05), b=lasagne.init.Constant(0.), nonlinearity=relu, **kwargs):
super(Deconv2DLayer, self).__init__(incoming, **kwargs)
self.target_shape = target_shape
self.nonlinearity = (lasagne.nonlinearities.identity if nonlinearity is None else nonlinearity)
self.filter_size = lasagne.layers.dnn.as_tuple(filter_size, 2)
self.stride = lasagne.layers.dnn.as_tuple(stride, 2)
self.pad = pad
self.W_shape = (incoming.output_shape[1], target_shape[1], filter_size[0], filter_size[1])
self.W = self.add_param(W, self.W_shape, name="W")
if b is not None:
self.b = self.add_param(b, (target_shape[1],), name="b")
else:
self.b = None
def __init__(self, incoming, num_kernels, dim_per_kernel=5, theta=lasagne.init.Normal(0.05),
log_weight_scale=lasagne.init.Constant(0.), b=lasagne.init.Constant(-1.), **kwargs):
super(MinibatchLayer, self).__init__(incoming, **kwargs)
self.num_kernels = num_kernels
num_inputs = int(np.prod(self.input_shape[1:]))
self.theta = self.add_param(theta, (num_inputs, num_kernels, dim_per_kernel), name="theta")
self.log_weight_scale = self.add_param(log_weight_scale, (num_kernels, dim_per_kernel), name="log_weight_scale")
self.W = self.theta * (T.exp(self.log_weight_scale)/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0,1)
self.b = self.add_param(b, (num_kernels,), name="b")
def __init__(self, incoming, num_units, theta=lasagne.init.Normal(0.1), b=lasagne.init.Constant(0.),
weight_scale=lasagne.init.Constant(1.), train_scale=False, nonlinearity=relu, **kwargs):
super(DenseLayer, self).__init__(incoming, **kwargs)
self.nonlinearity = (lasagne.nonlinearities.identity if nonlinearity is None else nonlinearity)
self.num_units = num_units
num_inputs = int(np.prod(self.input_shape[1:]))
self.theta = self.add_param(theta, (num_inputs, num_units), name="theta")
self.weight_scale = self.add_param(weight_scale, (num_units,), name="weight_scale", trainable=train_scale)
self.W = self.theta * (self.weight_scale/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0)
self.b = self.add_param(b, (num_units,), name="b")
def conv_layer(input_, filter_size, num_filters, stride, pad, nonlinearity=relu, W=Normal(0.02), **kwargs):
return layers.conv.Conv2DDNNLayer(input_,
num_filters=num_filters,
stride=parse_tuple(stride),
filter_size=parse_tuple(filter_size),
pad=pad,
W=W, nonlinearity=nonlinearity, **kwargs)
def style_conv_block(conv_in, num_styles, num_filters, filter_size, stride, nonlinearity=rectify, normalization=instance_norm):
sc_network = ReflectLayer(conv_in, filter_size//2)
sc_network = normalization(ConvLayer(sc_network, num_filters, filter_size, stride, nonlinearity=nonlinearity, W=Normal()), num_styles=num_styles)
return sc_network
def __init__(self, incoming, b=lasagne.init.Constant(0.), g=lasagne.init.Constant(1.),
W=lasagne.init.Normal(0.05), train_g=False, init_stdv=1., nonlinearity=relu, **kwargs):
super(WeightNormLayer, self).__init__(incoming, **kwargs)
self.nonlinearity = nonlinearity
self.init_stdv = init_stdv
k = self.input_shape[1]
if b is not None:
self.b = self.add_param(b, (k,), name="b", regularizable=False)
if g is not None:
self.g = self.add_param(g, (k,), name="g", regularizable=False, trainable=train_g)
if len(self.input_shape)==4:
self.axes_to_sum = (0,2,3)
self.dimshuffle_args = ['x',0,'x','x']
else:
self.axes_to_sum = 0
self.dimshuffle_args = ['x',0]
# scale weights in layer below
incoming.W_param = incoming.W
#incoming.W_param.set_value(W.sample(incoming.W_param.get_value().shape))
if incoming.W_param.ndim==4:
if isinstance(incoming, Deconv2DLayer):
W_axes_to_sum = (0,2,3)
W_dimshuffle_args = ['x',0,'x','x']
else:
W_axes_to_sum = (1,2,3)
W_dimshuffle_args = [0,'x','x','x']
else:
W_axes_to_sum = 0
W_dimshuffle_args = ['x',0]
if g is not None:
incoming.W = incoming.W_param * (self.g/T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum))).dimshuffle(*W_dimshuffle_args)
else:
incoming.W = incoming.W_param / T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum,keepdims=True))
def __init__(self, incoming, target_shape, filter_size, stride=(2, 2), pad='half',
W=lasagne.init.Normal(0.05), b=lasagne.init.Constant(0.), nonlinearity=relu, **kwargs):
super(Deconv2DLayer, self).__init__(incoming, **kwargs)
self.target_shape = target_shape
self.nonlinearity = (lasagne.nonlinearities.identity if nonlinearity is None else nonlinearity)
self.filter_size = lasagne.layers.dnn.as_tuple(filter_size, 2)
self.stride = lasagne.layers.dnn.as_tuple(stride, 2)
self.pad = pad
self.W_shape = (incoming.output_shape[1], target_shape[1], filter_size[0], filter_size[1])
self.W = self.add_param(W, self.W_shape, name="W")
if b is not None:
self.b = self.add_param(b, (target_shape[1],), name="b")
else:
self.b = None
def __init__(self, incoming, num_kernels, dim_per_kernel=5, theta=lasagne.init.Normal(0.05),
log_weight_scale=lasagne.init.Constant(0.), b=lasagne.init.Constant(-1.), **kwargs):
super(MinibatchLayer, self).__init__(incoming, **kwargs)
self.num_kernels = num_kernels
num_inputs = int(np.prod(self.input_shape[1:]))
self.theta = self.add_param(theta, (num_inputs, num_kernels, dim_per_kernel), name="theta")
self.log_weight_scale = self.add_param(log_weight_scale, (num_kernels, dim_per_kernel), name="log_weight_scale")
self.W = self.theta * (T.exp(self.log_weight_scale)/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0,1)
self.b = self.add_param(b, (num_kernels,), name="b")
def __init__(self, incoming, num_units, theta=lasagne.init.Normal(0.1), b=lasagne.init.Constant(0.),
weight_scale=lasagne.init.Constant(1.), train_scale=False, nonlinearity=relu, **kwargs):
super(DenseLayer, self).__init__(incoming, **kwargs)
self.nonlinearity = (lasagne.nonlinearities.identity if nonlinearity is None else nonlinearity)
self.num_units = num_units
num_inputs = int(np.prod(self.input_shape[1:]))
self.theta = self.add_param(theta, (num_inputs, num_units), name="theta")
self.weight_scale = self.add_param(weight_scale, (num_units,), name="weight_scale", trainable=train_scale)
self.W = self.theta * (self.weight_scale/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0)
self.b = self.add_param(b, (num_units,), name="b")
def conv_layer(input_, filter_size, num_filters, stride, pad, nonlinearity=relu, W=Normal(0.02), **kwargs):
return dnn.Conv2DDNNLayer(input_,
num_filters=num_filters,
stride=parse_tuple(stride),
filter_size=parse_tuple(filter_size),
pad=pad,
W=W, nonlinearity=nonlinearity, **kwargs)