def __build_24_net__(self):
network = layers.InputLayer((None, 3, 24, 24), input_var=self.__input_var__)
network = layers.dropout(network, p=0.1)
network = layers.Conv2DLayer(network,num_filters=64,filter_size=(5,5),stride=1,nonlinearity=relu)
network = layers.batch_norm(network)
network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)
network = layers.DropoutLayer(network,p=0.5)
network = layers.batch_norm(network)
network = layers.DenseLayer(network,num_units = 64,nonlinearity = relu)
network = layers.DropoutLayer(network,p=0.5)
network = layers.DenseLayer(network,num_units = 2, nonlinearity = softmax)
return network
python类rectify()的实例源码
cnn_cascade_lasagne.py 文件源码
项目:Cascade-CNN-Face-Detection
作者: gogolgrind
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
cnn_cascade_lasagne.py 文件源码
项目:Cascade-CNN-Face-Detection
作者: gogolgrind
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def __build_12_calib_net__(self):
network = layers.InputLayer((None, 3, 12, 12), input_var=self.__input_var__)
network = layers.Conv2DLayer(network,num_filters=16,filter_size=(3,3),stride=1,nonlinearity=relu)
network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)
network = layers.DenseLayer(network,num_units = 128,nonlinearity = relu)
network = layers.DenseLayer(network,num_units = 45, nonlinearity = softmax)
return network
cnn_cascade_lasagne.py 文件源码
项目:Cascade-CNN-Face-Detection
作者: gogolgrind
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def __build_24_calib_net__(self):
network = layers.InputLayer((None, 3, 24, 24), input_var=self.__input_var__)
network = layers.Conv2DLayer(network,num_filters=32,filter_size=(5,5),stride=1,nonlinearity=relu)
network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)
network = layers.DenseLayer(network,num_units = 64,nonlinearity = relu)
network = layers.DenseLayer(network,num_units = 45, nonlinearity = softmax)
return network
def style_conv_block(conv_in, num_styles, num_filters, filter_size, stride, nonlinearity=rectify, normalization=instance_norm):
sc_network = ReflectLayer(conv_in, filter_size//2)
sc_network = normalization(ConvLayer(sc_network, num_filters, filter_size, stride, nonlinearity=nonlinearity, W=Normal()), num_styles=num_styles)
return sc_network
def network_discriminator(self, features):
network = {}
network['discriminator/conv2'] = Conv2DLayer(features, num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='discriminator/conv2')
network['discriminator/pool2'] = MaxPool2DLayer(network['discriminator/conv2'], pool_size=2, stride=2, pad=0, name='discriminator/pool2')
network['discriminator/conv3'] = Conv2DLayer(network['discriminator/pool2'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='discriminator/conv3')
network['discriminator/pool3'] = MaxPool2DLayer(network['discriminator/conv3'], pool_size=2, stride=2, pad=0, name='discriminator/pool3')
network['discriminator/conv4'] = Conv2DLayer(network['discriminator/pool3'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='discriminator/conv4')
network['discriminator/pool4'] = MaxPool2DLayer(network['discriminator/conv4'], pool_size=2, stride=2, pad=0, name='discriminator/pool4')
network['discriminator/dense1'] = DenseLayer(network['discriminator/pool4'], num_units=64, nonlinearity=rectify, name='discriminator/dense1')
network['discriminator/output'] = DenseLayer(network['discriminator/dense1'], num_units=2, nonlinearity=softmax, name='discriminator/output')
return network
def _initialize_network(self, img_input_shape, misc_len, output_size, img_input, misc_input=None, **kwargs):
input_layers = []
inputs = [img_input]
# weights_init = lasagne.init.GlorotUniform("relu")
weights_init = lasagne.init.HeNormal("relu")
network = ls.InputLayer(shape=img_input_shape, input_var=img_input)
input_layers.append(network)
network = ls.Conv2DLayer(network, num_filters=32, filter_size=8, nonlinearity=rectify, W=weights_init,
b=lasagne.init.Constant(0.1), stride=4)
network = ls.Conv2DLayer(network, num_filters=64, filter_size=4, nonlinearity=rectify, W=weights_init,
b=lasagne.init.Constant(0.1), stride=2)
network = ls.Conv2DLayer(network, num_filters=64, filter_size=3, nonlinearity=rectify, W=weights_init,
b=lasagne.init.Constant(0.1), stride=1)
if self.misc_state_included:
inputs.append(misc_input)
network = ls.FlattenLayer(network)
misc_input_layer = ls.InputLayer(shape=(None, misc_len), input_var=misc_input)
input_layers.append(misc_input_layer)
if "additional_misc_layer" in kwargs:
misc_input_layer = ls.DenseLayer(misc_input_layer, int(kwargs["additional_misc_layer"]),
nonlinearity=rectify,
W=weights_init, b=lasagne.init.Constant(0.1))
network = ls.ConcatLayer([network, misc_input_layer])
network = ls.DenseLayer(network, 512, nonlinearity=rectify,
W=weights_init, b=lasagne.init.Constant(0.1))
network = ls.DenseLayer(network, output_size, nonlinearity=None, b=lasagne.init.Constant(.1))
return network, input_layers, inputs
def _initialize_network(self, img_input_shape, misc_len, output_size, img_input, misc_input=None, **kwargs):
input_layers = []
inputs = [img_input]
# weights_init = lasagne.init.GlorotUniform("relu")
weights_init = lasagne.init.HeNormal("relu")
network = ls.InputLayer(shape=img_input_shape, input_var=img_input)
input_layers.append(network)
network = ls.Conv2DLayer(network, num_filters=32, filter_size=8, nonlinearity=rectify, W=weights_init,
b=lasagne.init.Constant(.1), stride=4)
network = ls.Conv2DLayer(network, num_filters=64, filter_size=4, nonlinearity=rectify, W=weights_init,
b=lasagne.init.Constant(.1), stride=2)
network = ls.Conv2DLayer(network, num_filters=64, filter_size=3, nonlinearity=rectify, W=weights_init,
b=lasagne.init.Constant(.1), stride=1)
if self.misc_state_included:
inputs.append(misc_input)
network = ls.FlattenLayer(network)
misc_input_layer = ls.InputLayer(shape=(None, misc_len), input_var=misc_input)
input_layers.append(misc_input_layer)
if "additional_misc_layer" in kwargs:
misc_input_layer = ls.DenseLayer(misc_input_layer, int(kwargs["additional_misc_layer"]),
nonlinearity=rectify,
W=weights_init, b=lasagne.init.Constant(0.1))
network = ls.ConcatLayer([network, misc_input_layer])
# Duelling here
advanteges_branch = ls.DenseLayer(network, 256, nonlinearity=rectify,
W=weights_init, b=lasagne.init.Constant(.1))
advanteges_branch = ls.DenseLayer(advanteges_branch, output_size, nonlinearity=None,
b=lasagne.init.Constant(.1))
state_value_branch = ls.DenseLayer(network, 256, nonlinearity=rectify,
W=weights_init, b=lasagne.init.Constant(.1))
state_value_branch = ls.DenseLayer(state_value_branch, 1, nonlinearity=None,
b=lasagne.init.Constant(.1))
network = DuellingMergeLayer([advanteges_branch, state_value_branch])
return network, input_layers, inputs
def _initialize_network(self, img_input_shape, misc_len, output_size, img_input, misc_input=None, **kwargs):
input_layers = []
inputs = [img_input]
# weights_init = lasagne.init.GlorotUniform("relu")
weights_init = lasagne.init.HeNormal("relu")
network = ls.InputLayer(shape=img_input_shape, input_var=img_input)
input_layers.append(network)
network = ls.Conv2DLayer(network, num_filters=32, filter_size=8, nonlinearity=rectify, W=weights_init,
b=lasagne.init.Constant(0.1), stride=4)
network = ls.Conv2DLayer(network, num_filters=64, filter_size=4, nonlinearity=rectify, W=weights_init,
b=lasagne.init.Constant(0.1), stride=2)
network = ls.Conv2DLayer(network, num_filters=64, filter_size=3, nonlinearity=rectify, W=weights_init,
b=lasagne.init.Constant(0.1), stride=1)
network = ls.FlattenLayer(network)
if self.misc_state_included:
health_inputs = 4
units_per_health_input = 100
layers_for_merge = []
for i in range(health_inputs):
oh_input = lasagne.utils.one_hot(misc_input[:, i] - 1, units_per_health_input)
health_input_layer = ls.InputLayer(shape=(None, units_per_health_input), input_var=oh_input)
inputs.append(oh_input)
input_layers.append(health_input_layer)
layers_for_merge.append(health_input_layer)
misc_input_layer = ls.InputLayer(shape=(None, misc_len - health_inputs),
input_var=misc_input[:, health_inputs:])
input_layers.append(misc_input_layer)
layers_for_merge.append(misc_input_layer)
inputs.append(misc_input[:, health_inputs:])
layers_for_merge.append(network)
network = ls.ConcatLayer(layers_for_merge)
network = ls.DenseLayer(network, 512, nonlinearity=rectify,
W=weights_init, b=lasagne.init.Constant(0.1))
network = ls.DenseLayer(network, output_size, nonlinearity=None, b=lasagne.init.Constant(.1))
return network, input_layers, inputs
def build_model(self, img_batch, pose_code):
img_size = self.options['img_size']
pose_code_size = self.options['pose_code_size']
filter_size = self.options['filter_size']
batch_size = img_batch.shape[0]
# image encoding
l_in = InputLayer(shape = [None, img_size[0], img_size[1], img_size[2]], input_var=img_batch)
l_in_dimshuffle = DimshuffleLayer(l_in, (0,3,1,2))
l_conv1_1 = Conv2DLayer(l_in_dimshuffle, num_filters=64, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_conv1_2 = Conv2DLayer(l_conv1_1, num_filters=64, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_pool1 = MaxPool2DLayer(l_conv1_2, pool_size=(2,2))
# pose encoding
l_in_2 = InputLayer(shape=(None, pose_code_size), input_var=pose_code)
l_pose_1 = DenseLayer(l_in_2, num_units=512, W=HeNormal(),nonlinearity=rectify)
l_pose_2 = DenseLayer(l_pose_1, num_units=pose_code_size*l_pool1.output_shape[2]*l_pool1.output_shape[3], W=HeNormal(),nonlinearity=rectify)
l_pose_reshape = ReshapeLayer(l_pose_2, shape=(batch_size, pose_code_size, l_pool1.output_shape[2], l_pool1.output_shape[3]))
# deeper fusion
l_concat = ConcatLayer([l_pool1, l_pose_reshape], axis=1)
l_pose_conv_1 = Conv2DLayer(l_concat, num_filters=128, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_pose_conv_2 = Conv2DLayer(l_pose_conv_1, num_filters=128, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_pool2 = MaxPool2DLayer(l_pose_conv_2, pool_size=(2,2))
l_conv_3 = Conv2DLayer(l_pool2, num_filters=128, filter_size=(1,1), W=HeNormal())
l_unpool1 = Unpool2DLayer(l_conv_3, ds = (2,2))
# image decoding
l_deconv_conv1_1 = Conv2DLayer(l_unpool1, num_filters=128, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_deconv_conv1_2 = Conv2DLayer(l_deconv_conv1_1, num_filters=64, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_unpool2 = Unpool2DLayer(l_deconv_conv1_2, ds = (2,2))
l_deconv_conv2_1 = Conv2DLayer(l_unpool2, num_filters=64, filter_size=filter_size, nonlinearity=None, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_deconv_conv2_2 = Conv2DLayer(l_deconv_conv2_1, num_filters=img_size[2], filter_size=filter_size, nonlinearity=None, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
return l_deconv_conv2_2, l_pose_reshape
def build_model(self, img_batch, img_batch_gen):
img_size = self.options['img_size']
pose_code_size = self.options['pose_code_size']
filter_size = self.options['filter_size']
batch_size = img_batch.shape[0]
# image encoding
l_in_1 = InputLayer(shape = [None, img_size[0], img_size[1], img_size[2]], input_var=img_batch)
l_in_1_dimshuffle = DimshuffleLayer(l_in_1, (0,3,1,2))
l_in_2 = InputLayer(shape = [None, img_size[0], img_size[1], img_size[2]], input_var=img_batch_gen)
l_in_2_dimshuffle = DimshuffleLayer(l_in_2, (0,3,1,2))
l_in_concat = ConcatLayer([l_in_1_dimshuffle, l_in_2_dimshuffle], axis=1)
l_conv1_1 = Conv2DLayer(l_in_concat, num_filters=64, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_conv1_2 = Conv2DLayer(l_conv1_1, num_filters=64, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_pool1 = MaxPool2DLayer(l_conv1_2, pool_size=(2,2))
l_conv2_1 = Conv2DLayer(l_pool1, num_filters=128, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_conv2_2 = Conv2DLayer(l_conv2_1, num_filters=128, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_pool2 = MaxPool2DLayer(l_conv2_2, pool_size=(2,2))
l_conv_3 = Conv2DLayer(l_pool2, num_filters=128, filter_size=(1,1), W=HeNormal())
l_unpool1 = Unpool2DLayer(l_conv_3, ds = (2,2))
# image decoding
l_deconv_conv1_1 = Conv2DLayer(l_unpool1, num_filters=128, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_deconv_conv1_2 = Conv2DLayer(l_deconv_conv1_1, num_filters=64, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_unpool2 = Unpool2DLayer(l_deconv_conv1_2, ds = (2,2))
l_deconv_conv2_1 = Conv2DLayer(l_unpool2, num_filters=64, filter_size=filter_size, nonlinearity=None, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_deconv_conv2_2 = Conv2DLayer(l_deconv_conv2_1, num_filters=img_size[2], filter_size=filter_size, nonlinearity=None, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
return l_deconv_conv2_2
def build_model(self, img_batch, pose_code):
img_size = self.options['img_size']
pose_code_size = self.options['pose_code_size']
filter_size = self.options['filter_size']
batch_size = img_batch.shape[0]
# image encoding
l_in = InputLayer(shape = [None, img_size[0], img_size[1], img_size[2]], input_var=img_batch)
l_in_dimshuffle = DimshuffleLayer(l_in, (0,3,1,2))
l_conv1_1 = Conv2DLayer(l_in_dimshuffle, num_filters=64, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_conv1_2 = Conv2DLayer(l_conv1_1, num_filters=64, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_pool1 = MaxPool2DLayer(l_conv1_2, pool_size=(2,2))
# pose encoding
l_in_2 = InputLayer(shape=(None, pose_code_size), input_var=pose_code)
l_pose_1 = DenseLayer(l_in_2, num_units=512, W=HeNormal(),nonlinearity=rectify)
l_pose_2 = DenseLayer(l_pose_1, num_units=pose_code_size*l_pool1.output_shape[2]*l_pool1.output_shape[3], W=HeNormal(),nonlinearity=rectify)
l_pose_reshape = ReshapeLayer(l_pose_2, shape=(batch_size, pose_code_size, l_pool1.output_shape[2], l_pool1.output_shape[3]))
# deeper fusion
l_concat = ConcatLayer([l_pool1, l_pose_reshape], axis=1)
l_pose_conv_1 = Conv2DLayer(l_concat, num_filters=128, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_pose_conv_2 = Conv2DLayer(l_pose_conv_1, num_filters=128, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_pool2 = MaxPool2DLayer(l_pose_conv_2, pool_size=(2,2))
l_conv_3 = Conv2DLayer(l_pool2, num_filters=128, filter_size=(1,1), W=HeNormal())
l_unpool1 = Unpool2DLayer(l_conv_3, ds = (2,2))
# image decoding
l_deconv_conv1_1 = Conv2DLayer(l_unpool1, num_filters=128, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_deconv_conv1_2 = Conv2DLayer(l_deconv_conv1_1, num_filters=64, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_unpool2 = Unpool2DLayer(l_deconv_conv1_2, ds = (2,2))
l_deconv_conv2_1 = Conv2DLayer(l_unpool2, num_filters=64, filter_size=filter_size, nonlinearity=None, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_deconv_conv2_2 = Conv2DLayer(l_deconv_conv2_1, num_filters=img_size[2], filter_size=filter_size, nonlinearity=None, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
return l_deconv_conv2_2, l_pose_reshape
def build_model(self, img_batch, img_batch_gen):
img_size = self.options['img_size']
pose_code_size = self.options['pose_code_size']
filter_size = self.options['filter_size']
batch_size = img_batch.shape[0]
# image encoding
l_in_1 = InputLayer(shape = [None, img_size[0], img_size[1], img_size[2]], input_var=img_batch)
l_in_1_dimshuffle = DimshuffleLayer(l_in_1, (0,3,1,2))
l_in_2 = InputLayer(shape = [None, img_size[0], img_size[1], img_size[2]], input_var=img_batch_gen)
l_in_2_dimshuffle = DimshuffleLayer(l_in_2, (0,3,1,2))
l_in_concat = ConcatLayer([l_in_1_dimshuffle, l_in_2_dimshuffle], axis=1)
l_conv1_1 = Conv2DLayer(l_in_concat, num_filters=64, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_conv1_2 = Conv2DLayer(l_conv1_1, num_filters=64, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_pool1 = MaxPool2DLayer(l_conv1_2, pool_size=(2,2))
l_conv2_1 = Conv2DLayer(l_pool1, num_filters=128, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_conv2_2 = Conv2DLayer(l_conv2_1, num_filters=128, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_pool2 = MaxPool2DLayer(l_conv2_2, pool_size=(2,2))
l_conv_3 = Conv2DLayer(l_pool2, num_filters=128, filter_size=(1,1), W=HeNormal())
l_unpool1 = Unpool2DLayer(l_conv_3, ds = (2,2))
# image decoding
l_deconv_conv1_1 = Conv2DLayer(l_unpool1, num_filters=128, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_deconv_conv1_2 = Conv2DLayer(l_deconv_conv1_1, num_filters=64, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_unpool2 = Unpool2DLayer(l_deconv_conv1_2, ds = (2,2))
l_deconv_conv2_1 = Conv2DLayer(l_unpool2, num_filters=64, filter_size=filter_size, nonlinearity=None, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_deconv_conv2_2 = Conv2DLayer(l_deconv_conv2_1, num_filters=img_size[2], filter_size=filter_size, nonlinearity=None, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
return l_deconv_conv2_2
def build_model(self, img_batch, pose_code):
img_size = self.options['img_size']
pose_code_size = self.options['pose_code_size']
filter_size = self.options['filter_size']
batch_size = img_batch.shape[0]
# image encoding
l_in = InputLayer(shape = [None, img_size[0], img_size[1], img_size[2]], input_var=img_batch)
l_in_dimshuffle = DimshuffleLayer(l_in, (0,3,1,2))
l_conv1_1 = Conv2DLayer(l_in_dimshuffle, num_filters=64, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_conv1_2 = Conv2DLayer(l_conv1_1, num_filters=64, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_pool1 = MaxPool2DLayer(l_conv1_2, pool_size=(2,2))
# pose encoding
l_in_2 = InputLayer(shape=(None, pose_code_size), input_var=pose_code)
l_pose_1 = DenseLayer(l_in_2, num_units=512, W=HeNormal(),nonlinearity=rectify)
l_pose_2 = DenseLayer(l_pose_1, num_units=pose_code_size*l_pool1.output_shape[2]*l_pool1.output_shape[3], W=HeNormal(),nonlinearity=rectify)
l_pose_reshape = ReshapeLayer(l_pose_2, shape=(batch_size, pose_code_size, l_pool1.output_shape[2], l_pool1.output_shape[3]))
# deeper fusion
l_concat = ConcatLayer([l_pool1, l_pose_reshape], axis=1)
l_pose_conv_1 = Conv2DLayer(l_concat, num_filters=128, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_pose_conv_2 = Conv2DLayer(l_pose_conv_1, num_filters=128, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_pool2 = MaxPool2DLayer(l_pose_conv_2, pool_size=(2,2))
l_conv_3 = Conv2DLayer(l_pool2, num_filters=128, filter_size=(1,1), W=HeNormal())
l_unpool1 = Unpool2DLayer(l_conv_3, ds = (2,2))
# image decoding
l_deconv_conv1_1 = Conv2DLayer(l_unpool1, num_filters=128, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_deconv_conv1_2 = Conv2DLayer(l_deconv_conv1_1, num_filters=64, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_unpool2 = Unpool2DLayer(l_deconv_conv1_2, ds = (2,2))
l_deconv_conv2_1 = Conv2DLayer(l_unpool2, num_filters=64, filter_size=filter_size, nonlinearity=None, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
l_deconv_conv2_2 = Conv2DLayer(l_deconv_conv2_1, num_filters=img_size[2], filter_size=filter_size, nonlinearity=None, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2))
return l_deconv_conv2_2, l_pose_reshape
def build_emission_network(r2):
if not isinstance(r2, lasagne.layers.Layer):
l_in = lasagne.layers.InputLayer((None, glimpse_output_size, recurrent_output_size), r2)
else:
l_in = r2
output = lasagne.layers.DenseLayer(l_in, 2, nonlinearity=nl.rectify,
W = emission_weights, b = emission_bias)
return output
#input is r1 of length glimpse_output_size
#output is labels of length classification_units
def build_context_network(downsample):
if not isinstance(downsample, lasagne.layers.Layer):
l_in = lasagne.layers.InputLayer((None, 1, downsample_rows, downsample_cols), downsample)
else:
l_in = downsample
first_conv = lasagne.layers.Conv2DLayer(l_in,
context_number_of_convolving_filters,
context_convolving_filter_size,
stride = 1,
pad = 'same',
nonlinearity = nl.rectify)
first_pool = lasagne.layers.MaxPool2DLayer(first_conv, context_pool_rate)
second_conv = lasagne.layers.Conv2DLayer(first_pool,
context_number_of_convolving_filters,
context_convolving_filter_size,
stride = 1,
pad = 'same',
nonlinearity = nl.rectify)
second_pool = lasagne.layers.MaxPool2DLayer(second_conv, context_pool_rate)
third_conv = lasagne.layers.Conv2DLayer(second_pool,
context_number_of_convolving_filters,
context_convolving_filter_size,
stride = 1,
pad = 'same',
nonlinearity = nl.rectify)
third_pool = lasagne.layers.MaxPool2DLayer(third_conv, context_pool_rate)
fc = lasagne.layers.DenseLayer(third_pool,
glimpse_output_size*recurrent_output_size,
nonlinearity = nl.rectify)
output = lasagne.layers.ReshapeLayer(fc, (-1, glimpse_output_size, recurrent_output_size))
return output
def build_cnn(self, input_var=None):
# Building the network
layer_in = InputLayer(shape=(None, 3, 32, 32), input_var=input_var)
# Conv1
# [NOTE]: normal vs. truncated normal?
# [NOTE]: conv in lasagne is not same as it in TensorFlow.
layer = ConvLayer(layer_in, num_filters=64, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify,
pad='same', W=lasagne.init.HeNormal(), flip_filters=False)
# Pool1
layer = MaxPool2DLayer(layer, pool_size=(3, 3), stride=(2, 2))
# Norm1
layer = LocalResponseNormalization2DLayer(layer, alpha=0.001 / 9.0, k=1.0, beta=0.75)
# Conv2
layer = ConvLayer(layer, num_filters=64, filter_size=(5, 5), stride=(1, 1), nonlinearity=rectify,
pad='same', W=lasagne.init.HeNormal(), flip_filters=False)
# Norm2
# [NOTE]: n must be odd, but n in Chang's code is 4?
layer = LocalResponseNormalization2DLayer(layer, alpha=0.001 / 9.0, k=1.0, beta=0.75)
# Pool2
layer = MaxPool2DLayer(layer, pool_size=(3, 3), stride=(2, 2))
# Reshape
layer = lasagne.layers.ReshapeLayer(layer, shape=([0], -1))
# Dense3
layer = DenseLayer(layer, num_units=384, W=lasagne.init.HeNormal(), b=lasagne.init.Constant(0.1))
# Dense4
layer = DenseLayer(layer, num_units=192, W=lasagne.init.Normal(std=0.04), b=lasagne.init.Constant(0.1))
# Softmax
layer = DenseLayer(layer, num_units=self.output_size,
W=lasagne.init.Normal(std=1. / 192.0), nonlinearity=softmax)
return layer
def __init__(self, incoming,
gamma=init.Uniform([0.95, 1.05]),
beta=init.Constant(0.),
nonlinearity=nonlinearities.rectify,
epsilon=0.001,
**kwargs):
super(BatchNormalizationLayer, self).__init__(incoming, **kwargs)
if nonlinearity is None:
self.nonlinearity = nonlinearities.identity
else:
self.nonlinearity = nonlinearity
self.num_units = int(numpy.prod(self.input_shape[1:]))
self.gamma = self.add_param(gamma, (self.num_units,), name="BatchNormalizationLayer:gamma", regularizable=True,
gamma=True, trainable=True)
self.beta = self.add_param(beta, (self.num_units,), name="BatchNormalizationLayer:beta", regularizable=False)
self.epsilon = epsilon
self.mean_inference = theano.shared(
numpy.zeros((1, self.num_units), dtype=theano.config.floatX),
borrow=True,
broadcastable=(True, False))
self.mean_inference.name = "shared:mean"
self.variance_inference = theano.shared(
numpy.zeros((1, self.num_units), dtype=theano.config.floatX),
borrow=True,
broadcastable=(True, False))
self.variance_inference.name = "shared:variance"
def __init__(self, incoming, num_units, cell_num, W=lasagne.init.GlorotUniform(),
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.rectify,
name=None, **kwargs):
super(LocallyDenseLayer, self).__init__(incoming, name)
self.nonlinearity = (nonlinearities.identity if nonlinearity is None
else nonlinearity)
self.num_units = num_units
num_inputs = int(np.prod(self.input_shape[1:]))
self.cell_input_size = num_inputs / cell_num
self.cell_size = self.num_units / cell_num
if isinstance(W, lasagne.init.Initializer):
W = [W for i in range(0, cell_num)]
if isinstance(b, lasagne.init.Initializer):
b = [b for i in range(0, cell_num)]
self._dense_layers = []
self.W = []
self.b = []
# Creating m number of tied dense layers
for i in range(cell_num):
self._dense_layers.append(TiedDenseLayer(CutLayer(incoming, cell_num),
self.cell_size, W[i], b[i], nonlinearity, **kwargs))
self.W.append(self._dense_layers[-1].W)
self.b.append(self._dense_layers[-1].b)
def MDCL(incoming,num_filters,scales,name,dnn=True):
if dnn:
from lasagne.layers.dnn import Conv2DDNNLayer as C2D
# W initialization method--this should also work as Orthogonal('relu'), but I have yet to validate that as thoroughly.
winit = initmethod(0.02)
# Initialization method for the coefficients
sinit = lasagne.init.Constant(1.0/(1+len(scales)))
# Number of incoming channels
ni =lasagne.layers.get_output_shape(incoming)[1]
# Weight parameter--the primary parameter for this block
W = theano.shared(lasagne.utils.floatX(winit.sample((num_filters,lasagne.layers.get_output_shape(incoming)[1],3,3))),name=name+'W')
# Primary Convolution Layer--No Dilation
n = C2D(incoming = incoming,
num_filters = num_filters,
filter_size = [3,3],
stride = [1,1],
pad = (1,1),
W = W*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_base').dimshuffle(0,'x','x','x'), # Note the broadcasting dimshuffle for the num_filter scalars.
b = None,
nonlinearity = None,
name = name+'base'
)
# List of remaining layers. This should probably just all be concatenated into a single list rather than being a separate deal.
nd = []
for i,scale in enumerate(scales):
# I don't think 0 dilation is technically defined (or if it is it's just the regular filter) but I use it here as a convenient keyword to grab the 1x1 mean conv.
if scale==0:
nd.append(C2D(incoming = incoming,
num_filters = num_filters,
filter_size = [1,1],
stride = [1,1],
pad = (0,0),
W = T.mean(W,axis=[2,3]).dimshuffle(0,1,'x','x')*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_1x1').dimshuffle(0,'x','x','x'),
b = None,
nonlinearity = None,
name = name+str(scale)))
# Note the dimshuffles in this layer--these are critical as the current DilatedConv2D implementation uses a backward pass.
else:
nd.append(lasagne.layers.DilatedConv2DLayer(incoming = lasagne.layers.PadLayer(incoming = incoming, width=(scale,scale)),
num_filters = num_filters,
filter_size = [3,3],
dilation=(scale,scale),
W = W.dimshuffle(1,0,2,3)*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_'+str(scale)).dimshuffle('x',0,'x','x'),
b = None,
nonlinearity = None,
name = name+str(scale)))
return ESL(nd+[n])
# MDC-based Upsample Layer.
# This is a prototype I don't make use of extensively. It's operational but it doesn't seem to improve results yet.
def InceptionLayer(incoming,param_dict,block_name):
branch = [0]*len(param_dict)
# Loop across branches
for i,dict in enumerate(param_dict):
for j,style in enumerate(dict['style']): # Loop up branch
branch[i] = C2D(
incoming = branch[i] if j else incoming,
num_filters = dict['num_filters'][j],
filter_size = dict['filter_size'][j],
pad = dict['pad'][j] if 'pad' in dict else None,
stride = dict['stride'][j],
W = initmethod('relu'),
nonlinearity = dict['nonlinearity'][j],
name = block_name+'_'+str(i)+'_'+str(j)) if style=='convolutional'\
else NL(lasagne.layers.dnn.Pool2DDNNLayer(
incoming=incoming if j == 0 else branch[i],
pool_size = dict['filter_size'][j],
mode = dict['mode'][j],
stride = dict['stride'][j],
pad = dict['pad'][j],
name = block_name+'_'+str(i)+'_'+str(j)),
nonlinearity = dict['nonlinearity'][j]) if style=='pool'\
else lasagne.layers.DilatedConv2DLayer(
incoming = lasagne.layers.PadLayer(incoming = incoming if j==0 else branch[i],width = dict['pad'][j]) if 'pad' in dict else incoming if j==0 else branch[i],
num_filters = dict['num_filters'][j],
filter_size = dict['filter_size'][j],
dilation = dict['dilation'][j],
# pad = dict['pad'][j] if 'pad' in dict else None,
W = initmethod('relu'),
nonlinearity = dict['nonlinearity'][j],
name = block_name+'_'+str(i)+'_'+str(j)) if style== 'dilation'\
else DL(
incoming = incoming if j==0 else branch[i],
num_units = dict['num_filters'][j],
W = initmethod('relu'),
b = None,
nonlinearity = dict['nonlinearity'][j],
name = block_name+'_'+str(i)+'_'+str(j))
# Apply Batchnorm
branch[i] = BN(branch[i],name = block_name+'_bnorm_'+str(i)+'_'+str(j)) if dict['bnorm'][j] else branch[i]
# Concatenate Sublayers
return CL(incomings=branch,name=block_name)
# Convenience function to define an inception-style block with upscaling