def build_mlp(input_var=None):
l_in = InputLayer(shape=(None, 1, 28, 28), input_var=input_var)
l_hid1 = DenseLayer(
l_in, num_units=500,
nonlinearity=rectify,
W=lasagne.init.GlorotUniform())
l_hid1_drop = DropoutLayer(l_hid1, p=0.4)
l_hid2 = DenseLayer(
l_hid1_drop, num_units=300,
nonlinearity=rectify)
l_hid2_drop = DropoutLayer(l_hid2, p=0.4)
l_out = DenseLayer(
l_hid2_drop, num_units=10,
nonlinearity=softmax)
return l_out
# generator giving the batches
python类softmax()的实例源码
cnn_cascade_lasagne.py 文件源码
项目:Cascade-CNN-Face-Detection
作者: gogolgrind
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def __build_48_net__(self):
network = layers.InputLayer((None, 3, 48, 48), input_var=self.__input_var__)
network = layers.Conv2DLayer(network,num_filters=64,filter_size=(5,5),stride=1,nonlinearity=relu)
network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)
network = layers.batch_norm(network)
network = layers.Conv2DLayer(network,num_filters=64,filter_size=(5,5),stride=1,nonlinearity=relu)
network = layers.batch_norm(network)
network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)
network = layers.Conv2DLayer(network,num_filters=64,filter_size=(3,3),stride=1,nonlinearity=relu)
network = layers.batch_norm(network)
network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)
network = layers.DenseLayer(network,num_units = 256,nonlinearity = relu)
network = layers.DenseLayer(network,num_units = 2, nonlinearity = softmax)
return network
def network_classifier(self, input_var):
network = {}
network['classifier/input'] = InputLayer(shape=(None, 3, 64, 64), input_var=input_var, name='classifier/input')
network['classifier/conv1'] = Conv2DLayer(network['classifier/input'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='classifier/conv1')
network['classifier/pool1'] = MaxPool2DLayer(network['classifier/conv1'], pool_size=2, stride=2, pad=0, name='classifier/pool1')
network['classifier/conv2'] = Conv2DLayer(network['classifier/pool1'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='classifier/conv2')
network['classifier/pool2'] = MaxPool2DLayer(network['classifier/conv2'], pool_size=2, stride=2, pad=0, name='classifier/pool2')
network['classifier/conv3'] = Conv2DLayer(network['classifier/pool2'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='classifier/conv3')
network['classifier/pool3'] = MaxPool2DLayer(network['classifier/conv3'], pool_size=2, stride=2, pad=0, name='classifier/pool3')
network['classifier/conv4'] = Conv2DLayer(network['classifier/pool3'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='classifier/conv4')
network['classifier/pool4'] = MaxPool2DLayer(network['classifier/conv4'], pool_size=2, stride=2, pad=0, name='classifier/pool4')
network['classifier/dense1'] = DenseLayer(network['classifier/pool4'], num_units=64, nonlinearity=rectify, name='classifier/dense1')
network['classifier/output'] = DenseLayer(network['classifier/dense1'], num_units=10, nonlinearity=softmax, name='classifier/output')
return network
def build_cnn(self):
# Building the network
layer_in = InputLayer(shape=(None, 784), input_var=self.input_var)
# Hidden layer
layer = DenseLayer(
layer_in,
num_units=self.hidden_size,
W=lasagne.init.Uniform(
range=(-np.sqrt(6. / (784 + self.hidden_size)),
np.sqrt(6. / (784 + self.hidden_size)))),
nonlinearity=tanh,
)
# LR layer
layer = DenseLayer(
layer,
num_units=self.output_size,
W=lasagne.init.Constant(0.),
nonlinearity=softmax,
)
return layer
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
num_seq_inputs=1,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def calc_loss(prediction, targets):
#categorical crossentropy is the best choice for a multi-class softmax output
loss = T.mean(objectives.categorical_crossentropy(prediction, targets))
return loss
def create_network():
l = 1000
pool_size = 5
test_size1 = 13
test_size2 = 7
test_size3 = 5
kernel1 = 128
kernel2 = 128
kernel3 = 128
layer1 = InputLayer(shape=(None, 1, 4, l+1024))
layer2_1 = SliceLayer(layer1, indices=slice(0, l), axis = -1)
layer2_2 = SliceLayer(layer1, indices=slice(l, None), axis = -1)
layer2_3 = SliceLayer(layer2_2, indices = slice(0,4), axis = -2)
layer2_f = FlattenLayer(layer2_3)
layer3 = Conv2DLayer(layer2_1,num_filters = kernel1, filter_size = (4,test_size1))
layer4 = Conv2DLayer(layer3,num_filters = kernel1, filter_size = (1,test_size1))
layer5 = Conv2DLayer(layer4,num_filters = kernel1, filter_size = (1,test_size1))
layer6 = MaxPool2DLayer(layer5, pool_size = (1,pool_size))
layer7 = Conv2DLayer(layer6,num_filters = kernel2, filter_size = (1,test_size2))
layer8 = Conv2DLayer(layer7,num_filters = kernel2, filter_size = (1,test_size2))
layer9 = Conv2DLayer(layer8,num_filters = kernel2, filter_size = (1,test_size2))
layer10 = MaxPool2DLayer(layer9, pool_size = (1,pool_size))
layer11 = Conv2DLayer(layer10,num_filters = kernel3, filter_size = (1,test_size3))
layer12 = Conv2DLayer(layer11,num_filters = kernel3, filter_size = (1,test_size3))
layer13 = Conv2DLayer(layer12,num_filters = kernel3, filter_size = (1,test_size3))
layer14 = MaxPool2DLayer(layer13, pool_size = (1,pool_size))
layer14_d = DenseLayer(layer14, num_units= 256)
layer3_2 = DenseLayer(layer2_f, num_units = 128)
layer15 = ConcatLayer([layer14_d,layer3_2])
layer16 = DropoutLayer(layer15,p=0.5)
layer17 = DenseLayer(layer16, num_units=256)
network = DenseLayer(layer17, num_units= 2, nonlinearity=softmax)
return network
#random search to initialize the weights
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
num_seq_inputs=1,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def get_output_for(self, input, **kwargs):
activation = T.dot(input, self.C)
if self.b is not None:
activation = activation + self.b.dimshuffle('x', 0)
return nonlinearities.softmax(activation)
def __init__(self, x, y, args):
self.params_theta = []
self.params_lambda = []
self.params_weight = []
if args.dataset == 'mnist':
input_size = (None, 1, 28, 28)
elif args.dataset == 'cifar10':
input_size = (None, 3, 32, 32)
else:
raise AssertionError
layers = [ll.InputLayer(input_size)]
self.penalty = theano.shared(np.array(0.))
#conv1
layers.append(Conv2DLayerWithReg(args, layers[-1], 20, 5))
self.add_params_to_self(args, layers[-1])
layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2))
#conv1
layers.append(Conv2DLayerWithReg(args, layers[-1], 50, 5))
self.add_params_to_self(args, layers[-1])
layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2))
#fc1
layers.append(DenseLayerWithReg(args, layers[-1], num_units=500))
self.add_params_to_self(args, layers[-1])
#softmax
layers.append(DenseLayerWithReg(args, layers[-1], num_units=10, nonlinearity=nonlinearities.softmax))
self.add_params_to_self(args, layers[-1])
self.layers = layers
self.y = ll.get_output(layers[-1], x, deterministic=False)
self.prediction = T.argmax(self.y, axis=1)
# self.penalty = penalty if penalty != 0. else T.constant(0.)
print(self.params_lambda)
# time.sleep(20)
# cost function
self.loss = T.mean(categorical_crossentropy(self.y, y))
self.lossWithPenalty = T.add(self.loss, self.penalty)
print "loss and losswithpenalty", type(self.loss), type(self.lossWithPenalty)
def __init__(self, W_g=init.Normal(0.1), W_s=init.Normal(0.1),
W_h=init.Normal(0.1), W_v=init.Normal(0.1),
nonlinearity=nonlinearities.softmax):
self.W_s = W_s
self.W_h = W_h
self.W_g = W_g
self.W_v = W_v
if nonlinearity is None:
self.nonlinearity = nonlinearities.identity
else:
self.nonlinearity = nonlinearity
cnn_cascade_lasagne.py 文件源码
项目:Cascade-CNN-Face-Detection
作者: gogolgrind
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def __build_12_net__(self):
network = layers.InputLayer((None, 3, 12, 12), input_var=self.__input_var__)
network = layers.dropout(network, p=0.1)
network = layers.Conv2DLayer(network,num_filters=16,filter_size=(3,3),stride=1,nonlinearity=relu)
network = layers.batch_norm(network)
network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)
network = layers.DropoutLayer(network,p=0.3)
network = layers.DenseLayer(network,num_units = 16,nonlinearity = relu)
network = layers.batch_norm(network)
network = layers.DropoutLayer(network,p=0.3)
network = layers.DenseLayer(network,num_units = 2, nonlinearity = softmax)
return network
cnn_cascade_lasagne.py 文件源码
项目:Cascade-CNN-Face-Detection
作者: gogolgrind
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def __build_24_net__(self):
network = layers.InputLayer((None, 3, 24, 24), input_var=self.__input_var__)
network = layers.dropout(network, p=0.1)
network = layers.Conv2DLayer(network,num_filters=64,filter_size=(5,5),stride=1,nonlinearity=relu)
network = layers.batch_norm(network)
network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)
network = layers.DropoutLayer(network,p=0.5)
network = layers.batch_norm(network)
network = layers.DenseLayer(network,num_units = 64,nonlinearity = relu)
network = layers.DropoutLayer(network,p=0.5)
network = layers.DenseLayer(network,num_units = 2, nonlinearity = softmax)
return network
cnn_cascade_lasagne.py 文件源码
项目:Cascade-CNN-Face-Detection
作者: gogolgrind
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def __build_12_calib_net__(self):
network = layers.InputLayer((None, 3, 12, 12), input_var=self.__input_var__)
network = layers.Conv2DLayer(network,num_filters=16,filter_size=(3,3),stride=1,nonlinearity=relu)
network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)
network = layers.DenseLayer(network,num_units = 128,nonlinearity = relu)
network = layers.DenseLayer(network,num_units = 45, nonlinearity = softmax)
return network
cnn_cascade_lasagne.py 文件源码
项目:Cascade-CNN-Face-Detection
作者: gogolgrind
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def __build_24_calib_net__(self):
network = layers.InputLayer((None, 3, 24, 24), input_var=self.__input_var__)
network = layers.Conv2DLayer(network,num_filters=32,filter_size=(5,5),stride=1,nonlinearity=relu)
network = layers.MaxPool2DLayer(network, pool_size = (3,3),stride = 2)
network = layers.DenseLayer(network,num_units = 64,nonlinearity = relu)
network = layers.DenseLayer(network,num_units = 45, nonlinearity = softmax)
return network
def network_discriminator(self, features):
network = {}
network['discriminator/conv2'] = Conv2DLayer(features, num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='discriminator/conv2')
network['discriminator/pool2'] = MaxPool2DLayer(network['discriminator/conv2'], pool_size=2, stride=2, pad=0, name='discriminator/pool2')
network['discriminator/conv3'] = Conv2DLayer(network['discriminator/pool2'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='discriminator/conv3')
network['discriminator/pool3'] = MaxPool2DLayer(network['discriminator/conv3'], pool_size=2, stride=2, pad=0, name='discriminator/pool3')
network['discriminator/conv4'] = Conv2DLayer(network['discriminator/pool3'], num_filters=32, filter_size=3, stride=1, pad='valid', nonlinearity=rectify, name='discriminator/conv4')
network['discriminator/pool4'] = MaxPool2DLayer(network['discriminator/conv4'], pool_size=2, stride=2, pad=0, name='discriminator/pool4')
network['discriminator/dense1'] = DenseLayer(network['discriminator/pool4'], num_units=64, nonlinearity=rectify, name='discriminator/dense1')
network['discriminator/output'] = DenseLayer(network['discriminator/dense1'], num_units=2, nonlinearity=softmax, name='discriminator/output')
return network
def build_model(input_var):
net = {}
net['input'] = InputLayer((None, 3, 224, 224), input_var=input_var)
net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1, flip_filters=False)
net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1, flip_filters=False)
net['pool1'] = PoolLayer(net['conv1_2'], 2)
net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1, flip_filters=False)
net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1, flip_filters=False)
net['pool2'] = PoolLayer(net['conv2_2'], 2)
net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1, flip_filters=False)
net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1, flip_filters=False)
net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1, flip_filters=False)
net['pool3'] = PoolLayer(net['conv3_3'], 2)
net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1, flip_filters=False)
net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1, flip_filters=False)
net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1, flip_filters=False)
net['pool4'] = PoolLayer(net['conv4_3'], 2)
net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1, flip_filters=False)
net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1, flip_filters=False)
net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1, flip_filters=False)
net['pool5'] = PoolLayer(net['conv5_3'], 2)
net['fc6'] = DenseLayer(net['pool5'], num_units=4096)
net['fc6_dropout'] = DropoutLayer(net['fc6'], p=0.5)
net['fc7'] = DenseLayer(net['fc6_dropout'], num_units=4096)
net['fc7_dropout'] = DropoutLayer(net['fc7'], p=0.5)
net['fc8'] = DenseLayer(net['fc7_dropout'], num_units=1000, nonlinearity=None)
net['prob'] = NonlinearityLayer(net['fc8'], softmax)
return net
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
num_seq_inputs=1,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(
env_spec.observation_space.flat_dim * num_seq_inputs,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def build_classification_network(r1):
if not isinstance(r1, lasagne.layers.Layer):
l_in = lasagne.layers.InputLayer((None, glimpse_output_size, recurrent_output_size), r1)
else:
l_in = r1
output = lasagne.layers.DenseLayer(l_in, classification_units,
nonlinearity = nl.softmax,
W = class_weights, b = class_bias)
return output
#input is downsampled batch of images
#output is initial r2, of length glimpse_output_size
def build_cnn(self, input_var=None):
# Building the network
layer_in = InputLayer(shape=(None, 3, 32, 32), input_var=input_var)
# Conv1
# [NOTE]: normal vs. truncated normal?
# [NOTE]: conv in lasagne is not same as it in TensorFlow.
layer = ConvLayer(layer_in, num_filters=64, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify,
pad='same', W=lasagne.init.HeNormal(), flip_filters=False)
# Pool1
layer = MaxPool2DLayer(layer, pool_size=(3, 3), stride=(2, 2))
# Norm1
layer = LocalResponseNormalization2DLayer(layer, alpha=0.001 / 9.0, k=1.0, beta=0.75)
# Conv2
layer = ConvLayer(layer, num_filters=64, filter_size=(5, 5), stride=(1, 1), nonlinearity=rectify,
pad='same', W=lasagne.init.HeNormal(), flip_filters=False)
# Norm2
# [NOTE]: n must be odd, but n in Chang's code is 4?
layer = LocalResponseNormalization2DLayer(layer, alpha=0.001 / 9.0, k=1.0, beta=0.75)
# Pool2
layer = MaxPool2DLayer(layer, pool_size=(3, 3), stride=(2, 2))
# Reshape
layer = lasagne.layers.ReshapeLayer(layer, shape=([0], -1))
# Dense3
layer = DenseLayer(layer, num_units=384, W=lasagne.init.HeNormal(), b=lasagne.init.Constant(0.1))
# Dense4
layer = DenseLayer(layer, num_units=192, W=lasagne.init.Normal(std=0.04), b=lasagne.init.Constant(0.1))
# Softmax
layer = DenseLayer(layer, num_units=self.output_size,
W=lasagne.init.Normal(std=1. / 192.0), nonlinearity=softmax)
return layer
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
num_seq_inputs=1,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
num_seq_inputs=1,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def calc_loss(prediction, targets):
#categorical crossentropy is the best choice for a multi-class softmax output
loss = T.mean(objectives.categorical_crossentropy(prediction, targets))
return loss
def get_output_for(self, input, **kwargs):
activation = T.dot(input, self.C)
if self.b is not None:
activation = activation + self.b.dimshuffle('x', 0)
return nonlinearities.softmax(activation)
def __init__(
self,
env_spec,
latent_dim=0, # all this is fake
latent_name='categorical',
bilinear_integration=False,
resample=False, # until here
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
#bullshit
self.latent_dim = latent_dim ##could I avoid needing this self for the get_action?
self.latent_name = latent_name
self.bilinear_integration = bilinear_integration
self.resample = resample
self._set_std_to_0 = False
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def __init__(self, input_shape, output_dim, hidden_sizes,
conv_filters, conv_filter_sizes, conv_strides, conv_pads,
hidden_W_init=LI.GlorotUniform(), hidden_b_init=LI.Constant(0.),
output_W_init=LI.GlorotUniform(), output_b_init=LI.Constant(0.),
# conv_W_init=LI.GlorotUniform(), conv_b_init=LI.Constant(0.),
hidden_nonlinearity=LN.rectify,
output_nonlinearity=LN.softmax,
name=None, input_var=None):
if name is None:
prefix = ""
else:
prefix = name + "_"
if len(input_shape) == 3:
l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
l_hid = L.reshape(l_in, ([0],) + input_shape)
elif len(input_shape) == 2:
l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
input_shape = (1,) + input_shape
l_hid = L.reshape(l_in, ([0],) + input_shape)
else:
l_in = L.InputLayer(shape=(None,) + input_shape, input_var=input_var)
l_hid = l_in
for idx, conv_filter, filter_size, stride, pad in zip(
range(len(conv_filters)),
conv_filters,
conv_filter_sizes,
conv_strides,
conv_pads,
):
l_hid = L.Conv2DLayer(
l_hid,
num_filters=conv_filter,
filter_size=filter_size,
stride=(stride, stride),
pad=pad,
nonlinearity=hidden_nonlinearity,
name="%sconv_hidden_%d" % (prefix, idx),
convolution=wrapped_conv,
)
for idx, hidden_size in enumerate(hidden_sizes):
l_hid = L.DenseLayer(
l_hid,
num_units=hidden_size,
nonlinearity=hidden_nonlinearity,
name="%shidden_%d" % (prefix, idx),
W=hidden_W_init,
b=hidden_b_init,
)
l_out = L.DenseLayer(
l_hid,
num_units=output_dim,
nonlinearity=output_nonlinearity,
name="%soutput" % (prefix,),
W=output_W_init,
b=output_b_init,
)
self._l_in = l_in
self._l_out = l_out
self._input_var = l_in.input_var
def __init__(
self,
name,
env_spec,
conv_filters, conv_filter_sizes, conv_strides, conv_pads,
hidden_sizes=[],
hidden_nonlinearity=NL.rectify,
output_nonlinearity=NL.softmax,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
self._env_spec = env_spec
if prob_network is None:
prob_network = ConvNetwork(
input_shape=env_spec.observation_space.shape,
output_dim=env_spec.action_space.n,
conv_filters=conv_filters,
conv_filter_sizes=conv_filter_sizes,
conv_strides=conv_strides,
conv_pads=conv_pads,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
name="prob_network",
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function(
[prob_network.input_layer.input_var],
L.get_output(prob_network.output_layer)
)
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalConvPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def build_std_dropout_gru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p,
reset_input):
# Construct Bi-directional LSTM-CNNs-CRF with standard dropout.
# first get some necessary dimensions or parameters
conv_window = 3
# shape = [batch, n-step, c_dim, char_length]
incoming1 = lasagne.layers.DropoutLayer(incoming1, p=p)
# construct convolution layer
# shape = [batch, n-step, c_filters, output_length]
cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
# infer the pool size for pooling (pool size should go through all time step of cnn)
_, _, _, pool_size = cnn_layer.output_shape
# construct max pool layer
# shape = [batch, n-step, c_filters, 1]
pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
# reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))
# finally, concatenate the two incoming layers together.
# shape = [batch, n-step, c_filter&w_dim]
incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)
# dropout for incoming
incoming = lasagne.layers.DropoutLayer(incoming, p=0.2)
resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=None, nonlinearity=nonlinearities.tanh)
gru_forward = GRULayer(incoming, num_units, mask_input=mask, resetgate=resetgate_forward,
updategate=updategate_forward, hidden_update=hidden_update_forward,
grad_clipping=grad_clipping, reset_input=reset_input, name='forward')
resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=None, nonlinearity=nonlinearities.tanh)
gru_backward = GRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate=resetgate_backward,
updategate=updategate_backward, hidden_update=hidden_update_backward,
grad_clipping=grad_clipping, reset_input=reset_input, name='backward')
# concatenate the outputs of forward and backward LSTMs to combine them.
bi_gru_cnn = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru")
bi_gru_cnn = lasagne.layers.DropoutLayer(bi_gru_cnn, p=p)
# reshape bi-rnn-cnn to [batch * max_length, num_units]
bi_gru_cnn = lasagne.layers.reshape(bi_gru_cnn, (-1, [2]))
# construct output layer (dense layer with softmax)
layer_output = lasagne.layers.DenseLayer(bi_gru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
name='softmax')
return layer_output
def build_std_dropout_sgru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p):
# Construct Bi-directional LSTM-CNNs-CRF with standard dropout.
# first get some necessary dimensions or parameters
conv_window = 3
# shape = [batch, n-step, c_dim, char_length]
incoming1 = lasagne.layers.DropoutLayer(incoming1, p=p)
# construct convolution layer
# shape = [batch, n-step, c_filters, output_length]
cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
# infer the pool size for pooling (pool size should go through all time step of cnn)
_, _, _, pool_size = cnn_layer.output_shape
# construct max pool layer
# shape = [batch, n-step, c_filters, 1]
pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
# reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))
# finally, concatenate the two incoming layers together.
# shape = [batch, n-step, c_filter&w_dim]
incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)
# dropout for incoming
incoming = lasagne.layers.DropoutLayer(incoming, p=0.2)
resetgate_input_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
resetgate_hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=None, nonlinearity=nonlinearities.tanh)
sgru_forward = SGRULayer(incoming, num_units, mask_input=mask,
resetgate_input=resetgate_input_forward, resetgate_hidden=resetgate_hidden_forward,
updategate=updategate_forward, hidden_update=hidden_update_forward,
grad_clipping=grad_clipping, name='forward')
resetgate_input_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
resetgate_hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=None, nonlinearity=nonlinearities.tanh)
sgru_backward = SGRULayer(incoming, num_units, mask_input=mask, backwards=True,
resetgate_input=resetgate_input_backward, resetgate_hidden=resetgate_hidden_backward,
updategate=updategate_backward, hidden_update=hidden_update_backward,
grad_clipping=grad_clipping, name='backward')
# concatenate the outputs of forward and backward LSTMs to combine them.
bi_sgru_cnn = lasagne.layers.concat([sgru_forward, sgru_backward], axis=2, name="bi-sgru")
bi_sgru_cnn = lasagne.layers.DropoutLayer(bi_sgru_cnn, p=p)
# reshape bi-rnn-cnn to [batch * max_length, num_units]
bi_sgru_cnn = lasagne.layers.reshape(bi_sgru_cnn, (-1, [2]))
# construct output layer (dense layer with softmax)
layer_output = lasagne.layers.DenseLayer(bi_sgru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
name='softmax')
return layer_output
def build_recur_dropout_gru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p,
reset_input):
# Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout.
# first get some necessary dimensions or parameters
conv_window = 3
# shape = [batch, n-step, c_dim, char_length]
# construct convolution layer
# shape = [batch, n-step, c_filters, output_length]
cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
# infer the pool size for pooling (pool size should go through all time step of cnn)
_, _, _, pool_size = cnn_layer.output_shape
# construct max pool layer
# shape = [batch, n-step, c_filters, 1]
pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
# reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))
# finally, concatenate the two incoming layers together.
# shape = [batch, n-step, c_filter&w_dim]
incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)
# dropout for incoming
incoming = lasagne.layers.DropoutLayer(incoming, p=0.2, shared_axes=(1,))
resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=None, nonlinearity=nonlinearities.tanh)
gru_forward = GRULayer(incoming, num_units, mask_input=mask, resetgate=resetgate_forward,
updategate=updategate_forward, hidden_update=hidden_update_forward,
grad_clipping=grad_clipping, reset_input=reset_input, p=p, name='forward')
resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=None, nonlinearity=nonlinearities.tanh)
gru_backward = GRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate=resetgate_backward,
updategate=updategate_backward, hidden_update=hidden_update_backward,
grad_clipping=grad_clipping, reset_input=reset_input, p=p, name='backward')
# concatenate the outputs of forward and backward LSTMs to combine them.
bi_gru_cnn = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru")
# shape = [batch, n-step, num_units]
bi_gru_cnn = lasagne.layers.DropoutLayer(bi_gru_cnn, p=p, shared_axes=(1,))
# reshape bi-rnn-cnn to [batch * max_length, num_units]
bi_gru_cnn = lasagne.layers.reshape(bi_gru_cnn, (-1, [2]))
# construct output layer (dense layer with softmax)
layer_output = lasagne.layers.DenseLayer(bi_gru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
name='softmax')
return layer_output