def build_recur_dropout_sgru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p):
# Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout.
# first get some necessary dimensions or parameters
conv_window = 3
# shape = [batch, n-step, c_dim, char_length]
# construct convolution layer
# shape = [batch, n-step, c_filters, output_length]
cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
# infer the pool size for pooling (pool size should go through all time step of cnn)
_, _, _, pool_size = cnn_layer.output_shape
# construct max pool layer
# shape = [batch, n-step, c_filters, 1]
pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
# reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))
# finally, concatenate the two incoming layers together.
# shape = [batch, n-step, c_filter&w_dim]
incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)
# dropout for incoming
incoming = lasagne.layers.DropoutLayer(incoming, p=0.2, shared_axes=(1,))
resetgate_input_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
resetgate_hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=None, nonlinearity=nonlinearities.tanh)
sgru_forward = SGRULayer(incoming, num_units, mask_input=mask,
resetgate_input=resetgate_input_forward, resetgate_hidden=resetgate_hidden_forward,
updategate=updategate_forward, hidden_update=hidden_update_forward,
grad_clipping=grad_clipping, p=p, name='forward')
resetgate_input_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
resetgate_hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=None, nonlinearity=nonlinearities.tanh)
sgru_backward = SGRULayer(incoming, num_units, mask_input=mask, backwards=True,
resetgate_input=resetgate_input_backward, resetgate_hidden=resetgate_hidden_backward,
updategate=updategate_backward, hidden_update=hidden_update_backward,
grad_clipping=grad_clipping, p=p, name='backward')
# concatenate the outputs of forward and backward LSTMs to combine them.
bi_sgru_cnn = lasagne.layers.concat([sgru_forward, sgru_backward], axis=2, name="bi-sgru")
# shape = [batch, n-step, num_units]
bi_sgru_cnn = lasagne.layers.DropoutLayer(bi_sgru_cnn, p=p, shared_axes=(1,))
# reshape bi-rnn-cnn to [batch * max_length, num_units]
bi_sgru_cnn = lasagne.layers.reshape(bi_sgru_cnn, (-1, [2]))
# construct output layer (dense layer with softmax)
layer_output = lasagne.layers.DenseLayer(bi_sgru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
name='softmax')
return layer_output
python类softmax()的实例源码
def buildModel(mtype=1):
print "BUILDING MODEL TYPE", mtype, "..."
#default settings (Model 1)
filters = 64
first_stride = 2
last_filter_multiplier = 16
#specific model type settings (see working notes for details)
if mtype == 2:
first_stride = 1
elif mtype == 3:
filters = 32
last_filter_multiplier = 8
#input layer
net = l.InputLayer((None, IM_DIM, IM_SIZE[1], IM_SIZE[0]))
#conv layers
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=7, pad='same', stride=first_stride, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
if mtype == 2:
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 2, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 4, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 8, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * last_filter_multiplier, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
print "\tFINAL POOL OUT SHAPE:", l.get_output_shape(net)
#dense layers
net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.DropoutLayer(net, DROPOUT)
net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.DropoutLayer(net, DROPOUT)
#Classification Layer
if MULTI_LABEL:
net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.sigmoid, W=init.HeNormal(gain=1))
else:
net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.softmax, W=init.HeNormal(gain=1))
print "...DONE!"
#model stats
print "MODEL HAS", (sum(hasattr(layer, 'W') for layer in l.get_all_layers(net))), "WEIGHTED LAYERS"
print "MODEL HAS", l.count_params(net), "PARAMS"
return net
def buildModel(mtype=1):
print "BUILDING MODEL TYPE", mtype, "..."
#default settings (Model 1)
filters = 64
first_stride = 2
last_filter_multiplier = 16
#specific model type settings (see working notes for details)
if mtype == 2:
first_stride = 1
elif mtype == 3:
filters = 32
last_filter_multiplier = 8
#input layer
net = l.InputLayer((None, IM_DIM, IM_SIZE[1], IM_SIZE[0]))
#conv layers
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=7, pad='same', stride=first_stride, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
if mtype == 2:
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 2, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 4, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 8, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * last_filter_multiplier, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
print "\tFINAL POOL OUT SHAPE:", l.get_output_shape(net)
#dense layers
net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
#Classification Layer
if MULTI_LABEL:
net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.sigmoid, W=init.HeNormal(gain=1))
else:
net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.softmax, W=init.HeNormal(gain=1))
print "...DONE!"
#model stats
print "MODEL HAS", (sum(hasattr(layer, 'W') for layer in l.get_all_layers(net))), "WEIGHTED LAYERS"
print "MODEL HAS", l.count_params(net), "PARAMS"
return net
def buildModel(mtype=1):
print "BUILDING MODEL TYPE", mtype, "..."
#default settings (Model 1)
filters = 64
first_stride = 2
last_filter_multiplier = 16
#specific model type settings (see working notes for details)
if mtype == 2:
first_stride = 1
elif mtype == 3:
filters = 32
last_filter_multiplier = 8
#input layer
net = l.InputLayer((None, IM_DIM, IM_SIZE[1], IM_SIZE[0]))
#conv layers
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=7, pad='same', stride=first_stride, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
if mtype == 2:
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 2, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 4, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 8, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * last_filter_multiplier, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.MaxPool2DLayer(net, pool_size=2)
print "\tFINAL POOL OUT SHAPE:", l.get_output_shape(net)
#dense layers
net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
#Classification Layer
if MULTI_LABEL:
net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.sigmoid, W=init.HeNormal(gain=1))
else:
net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.softmax, W=init.HeNormal(gain=1))
print "...DONE!"
#model stats
print "MODEL HAS", (sum(hasattr(layer, 'W') for layer in l.get_all_layers(net))), "WEIGHTED LAYERS"
print "MODEL HAS", l.count_params(net), "PARAMS"
return net
def __init__(self, input_shape, output_dim, hidden_sizes,
conv_filters, conv_filter_sizes, conv_strides, conv_pads,
hidden_W_init=LI.GlorotUniform(), hidden_b_init=LI.Constant(0.),
output_W_init=LI.GlorotUniform(), output_b_init=LI.Constant(0.),
# conv_W_init=LI.GlorotUniform(), conv_b_init=LI.Constant(0.),
hidden_nonlinearity=LN.rectify,
output_nonlinearity=LN.softmax,
name=None, input_var=None):
if name is None:
prefix = ""
else:
prefix = name + "_"
if len(input_shape) == 3:
l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
l_hid = L.reshape(l_in, ([0],) + input_shape)
elif len(input_shape) == 2:
l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
input_shape = (1,) + input_shape
l_hid = L.reshape(l_in, ([0],) + input_shape)
else:
l_in = L.InputLayer(shape=(None,) + input_shape, input_var=input_var)
l_hid = l_in
for idx, conv_filter, filter_size, stride, pad in zip(
range(len(conv_filters)),
conv_filters,
conv_filter_sizes,
conv_strides,
conv_pads,
):
l_hid = L.Conv2DLayer(
l_hid,
num_filters=conv_filter,
filter_size=filter_size,
stride=(stride, stride),
pad=pad,
nonlinearity=hidden_nonlinearity,
name="%sconv_hidden_%d" % (prefix, idx),
convolution=wrapped_conv,
)
for idx, hidden_size in enumerate(hidden_sizes):
l_hid = L.DenseLayer(
l_hid,
num_units=hidden_size,
nonlinearity=hidden_nonlinearity,
name="%shidden_%d" % (prefix, idx),
W=hidden_W_init,
b=hidden_b_init,
)
l_out = L.DenseLayer(
l_hid,
num_units=output_dim,
nonlinearity=output_nonlinearity,
name="%soutput" % (prefix,),
W=output_W_init,
b=output_b_init,
)
self._l_in = l_in
self._l_out = l_out
self._input_var = l_in.input_var
def __init__(
self,
name,
env_spec,
conv_filters, conv_filter_sizes, conv_strides, conv_pads,
hidden_sizes=[],
hidden_nonlinearity=NL.rectify,
output_nonlinearity=NL.softmax,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
self._env_spec = env_spec
if prob_network is None:
prob_network = ConvNetwork(
input_shape=env_spec.observation_space.shape,
output_dim=env_spec.action_space.n,
conv_filters=conv_filters,
conv_filter_sizes=conv_filter_sizes,
conv_strides=conv_strides,
conv_pads=conv_pads,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
name="prob_network",
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function(
[prob_network.input_layer.input_var],
L.get_output(prob_network.output_layer)
)
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalConvPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def build_model_resnet50(input_shape):
net = {}
net['input'] = InputLayer(input_shape)
sub_net, parent_layer_name = build_simple_block(
net['input'], ['conv1', 'bn_conv1', 'conv1_relu'],
64, 7, 2, 3, use_bias=True)
net.update(sub_net)
net['pool1'] = PoolLayer(net[parent_layer_name], pool_size=3, stride=2, pad=0, mode='max', ignore_border=False)
block_size = list('abc')
parent_layer_name = 'pool1'
for c in block_size:
if c == 'a':
sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1, 1, True, 4, ix='2%s' % c)
else:
sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='2%s' % c)
net.update(sub_net)
# block_size = ['a'] + ['b'+str(i+1) for i in range(7)]
block_size = list('abcd')
for c in block_size:
if c == 'a':
sub_net, parent_layer_name = build_residual_block(
net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='3%s' % c)
else:
sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='3%s' % c)
net.update(sub_net)
# block_size = ['a'] + ['b'+str(i+1) for i in range(35)]
block_size = list('abcdef')
for c in block_size:
if c == 'a':
sub_net, parent_layer_name = build_residual_block(
net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='4%s' % c)
else:
sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='4%s' % c)
net.update(sub_net)
block_size = list('abc')
for c in block_size:
if c == 'a':
sub_net, parent_layer_name = build_residual_block(
net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='5%s' % c)
else:
sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='5%s' % c)
net.update(sub_net)
net['pool5'] = PoolLayer(net[parent_layer_name], pool_size=7, stride=1, pad=0,
mode='average_exc_pad', ignore_border=False)
net['fc1000'] = DenseLayer(net['pool5'], num_units=1000, nonlinearity=None, W=lasagne.init.Normal(std=0.01, mean=0.0))
net['prob'] = NonlinearityLayer(net['fc1000'], nonlinearity=softmax)
return net
# model hyperparams
def build_model_resnet152(input_shape):
net = {}
net['input'] = InputLayer(input_shape)
sub_net, parent_layer_name = build_simple_block(
net['input'], ['conv1', 'bn_conv1', 'conv1_relu'],
64, 7, 2, 3, use_bias=True)
net.update(sub_net)
net['pool1'] = PoolLayer(net[parent_layer_name], pool_size=3, stride=2, pad=0, mode='max', ignore_border=False)
block_size = list('abc')
parent_layer_name = 'pool1'
for c in block_size:
if c == 'a':
sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1, 1, True, 4, ix='2%s' % c)
else:
sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='2%s' % c)
net.update(sub_net)
block_size = ['a'] + ['b'+str(i+1) for i in range(7)]
# block_size = list('abcd')
for c in block_size:
if c == 'a':
sub_net, parent_layer_name = build_residual_block(
net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='3%s' % c)
else:
sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='3%s' % c)
net.update(sub_net)
block_size = ['a'] + ['b'+str(i+1) for i in range(35)]
# block_size = list('abcdef')
for c in block_size:
if c == 'a':
sub_net, parent_layer_name = build_residual_block(
net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='4%s' % c)
else:
sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='4%s' % c)
net.update(sub_net)
block_size = list('abc')
for c in block_size:
if c == 'a':
sub_net, parent_layer_name = build_residual_block(
net[parent_layer_name], 1.0/2, 1.0/2, True, 4, ix='5%s' % c)
else:
sub_net, parent_layer_name = build_residual_block(net[parent_layer_name], 1.0/4, 1, False, 4, ix='5%s' % c)
net.update(sub_net)
net['pool5'] = PoolLayer(net[parent_layer_name], pool_size=7, stride=1, pad=0,
mode='average_exc_pad', ignore_border=False)
net['fc1000'] = DenseLayer(net['pool5'], num_units=1000, nonlinearity=None)
net['prob'] = NonlinearityLayer(net['fc1000'], nonlinearity=softmax)
print('Total number of layers:', len(lasagne.layers.get_all_layers(net['prob'])))
return net
# model hyperparams
def main():
################
# LOAD DATASET #
################
dataset = './data/ubiquitous_aug.hkl'
kfd = './data/ubiquitous_kfold.hkl'
print('Loading dataset {}...'.format(dataset))
X, y = hkl.load(open(dataset, 'r'))
X = X.reshape(-1, 4, 1, 400).astype(floatX)
y = y.astype('int32')
print('X shape: {}, y shape: {}'.format(X.shape, y.shape))
kf = hkl.load(open(kfd, 'r'))
kfold = [(train, test) for train, test in kf]
(train, test) = kfold[0]
print('train_set size: {}, test_set size: {}'.format(len(train), len(test)))
# shuffle +/- labels in minibatch
print('shuffling train_set and test_set')
shuffle(train)
shuffle(test)
X_train = X[train]
X_test = X[test]
y_train = y[train]
y_test = y[test]
print('data prepared!')
layers = [
(InputLayer, {'shape': (None, 4, 1, 400)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 4)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}),
(MaxPool2DLayer, {'pool_size': (1, 2)}),
(Conv2DLayer, {'num_filters': 32, 'filter_size': (1, 2)}),
(Conv2DLayer, {'num_filters': 32, 'filter_size': (1, 2)}),
(Conv2DLayer, {'num_filters': 32, 'filter_size': (1, 2)}),
(MaxPool2DLayer, {'pool_size': (1, 2)}),
(DenseLayer, {'num_units': 64}),
(DropoutLayer, {}),
(DenseLayer, {'num_units': 64}),
(DenseLayer, {'num_units': 2, 'nonlinearity': softmax})]
net = NeuralNet(
layers=layers,
max_epochs=100,
update=adam,
update_learning_rate=1e-4,
train_split=TrainSplit(eval_size=0.1),
on_epoch_finished=[
AdjustVariable(1e-4, target=0, half_life=20)],
verbose=2)
net.fit(X_train, y_train)
plot_loss(net)
def main(resume=None):
l = 300
dataset = './data/ubiquitous_train.hkl'
print('Loading dataset {}...'.format(dataset))
X_train, y_train = hkl.load(dataset)
X_train = X_train.reshape(-1, 4, 1, l).astype(floatX)
y_train = np.array(y_train, dtype='int32')
indice = np.arange(X_train.shape[0])
np.random.shuffle(indice)
X_train = X_train[indice]
y_train = y_train[indice]
print('X_train shape: {}, y_train shape: {}'.format(X_train.shape, y_train.shape))
layers = [
(InputLayer, {'shape': (None, 4, 1, l)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 4)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 3)}),
(MaxPool2DLayer, {'pool_size': (1, 2)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 2)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 2)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (1, 2)}),
(MaxPool2DLayer, {'pool_size': (1, 2)}),
(DenseLayer, {'num_units': 64}),
(DropoutLayer, {}),
(DenseLayer, {'num_units': 64}),
(DenseLayer, {'num_units': 2, 'nonlinearity': softmax})]
lr = theano.shared(np.float32(1e-4))
net = NeuralNet(
layers=layers,
max_epochs=100,
update=adam,
update_learning_rate=lr,
train_split=TrainSplit(eval_size=0.1),
on_epoch_finished=[
AdjustVariable(lr, target=1e-8, half_life=20)],
verbose=4)
if resume != None:
net.load_params_from(resume)
net.fit(X_train, y_train)
net.save_params_to('./models/net_params.pkl')
def get_output_for(self, inputs, **kwargs):
# typical GRU, but prediction produced by softmax layer is applied to GRU's input
q = inputs[0]
m = inputs[1]
epmem_dropout = inputs[2]
#q = q * self.rand_stream.binomial(q.shape, p=1-epmem_dropout, dtype=theano.config.floatX)
m = m * self.rand_stream.binomial(m.shape, p=1-epmem_dropout, dtype=theano.config.floatX)
W_in_stacked = T.concatenate([self.W_in_to_resetgate,
self.W_in_to_updategate,
self.W_in_to_hid_update], axis=1)
W_hid_stacked = T.concatenate([self.W_hid_to_resetgate,
self.W_hid_to_updategate,
self.W_hid_to_hid_update], axis=1)
b_stacked = T.concatenate([self.b_resetgate,
self.b_updategate,
self.b_hid_update], axis=0)
def slice_w(x, n):
return x[:, n*self.hid_state_size:(n+1)*self.hid_state_size]
def get_output(a):
return nonlin.softmax(T.dot(a,self.W))
def step(hid_previous, out_previous, *args):
input_n = T.concatenate([out_previous, q], axis=1)
hid_input = T.dot(hid_previous, W_hid_stacked)
input_n = T.dot(input_n, W_in_stacked) + b_stacked
resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0)
updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
resetgate = self.nonlinearity_resetgate(resetgate)
updategate = self.nonlinearity_updategate(updategate)
hid_update_in = slice_w(input_n, 2)
hid_update_hid = slice_w(hid_input, 2)
hid_update = hid_update_in + resetgate*hid_update_hid
hid_update = self.nonlinearity_hid(hid_update)
hid = (1 - updategate)*hid_previous + updategate+hid_update
out = nonlin.softmax(T.dot(hid, self.W))
return (hid, out)
non_seqs = [W_in_stacked, b_stacked, W_hid_stacked, q, m, self.W]
hid_and_out, b = theano.scan(
fn=step,
outputs_info=[m, get_output(m)],
non_sequences=non_seqs,
strict=True,
n_steps=self.max_answer_word)
return T.transpose(hid_and_out[1], (1,0,2))
def __init__(self, x, y, args):
self.params_theta = []
self.params_lambda = []
self.params_weight = []
if args.dataset == 'mnist':
input_size = (None, 28*28)
elif args.dataset == 'cifar10':
input_size = (None, 3, 32*32)
else:
raise AssertionError
layers = [ll.InputLayer(input_size)]
penalty = theano.shared(np.array(0.))
for (k, num) in enumerate(args.MLPlayer):
# the last layer should use softmax
if k == len(args.MLPlayer) - 1:
# layers.append(ll.DenseLayer(layers[-1], num, nonlinearity=nonlinearities.softmax))
layers.append(DenseLayerWithReg(args, layers[-1], num_units=num,
nonlinearity=nonlinearities.softmax))
else:
# layers.append(ll.DenseLayer(layers[-1], num))
layers.append(DenseLayerWithReg(args, layers[-1], num_units=num))
if layers[-1].W is not None:
self.params_theta += [layers[-1].W, layers[-1].b]
self.params_weight += [layers[-1].W]
# define new regularization term for a layer
if args.regL2 is True:
tempL2 = layers[-1].L2 * T.sqr(layers[-1].W)
penalty += T.sum(tempL2)
self.params_lambda += [layers[-1].L2]
if args.regL1 is True:
tempL1 = layers[-1].L1 * layers[-1].W
penalty += T.sum(tempL1)
self.params_lambda += [layers[-1].L1]
self.layers = layers
self.y = ll.get_output(layers[-1], x, deterministic=False)
self.prediction = T.argmax(self.y, axis=1)
self.penalty = penalty
# self.penalty = penalty if penalty != 0. else T.constant(0.)
print(self.params_lambda)
# time.sleep(20)
# cost function
self.loss = T.mean(categorical_crossentropy(self.y, y))
self.lossWithPenalty = T.add(self.loss, self.penalty)
print "loss and losswithpenalty", type(self.loss), type(self.lossWithPenalty)
# self.classError = T.mean(T.cast(T.neq(self.prediction, y), 'float32'))
def __init__(
self, incomings, num_units,
W_g=init.Normal(0.1),
W_h=init.Normal(0.1),
W_v=init.Normal(0.1),
W_s=init.Normal(0.1),
W_p=init.Normal(0.1),
nonlinearity=nonlinearities.tanh,
nonlinearity_atten=nonlinearities.softmax,
**kwargs
):
super(AttenLayer, self).__init__(incomings, **kwargs)
self.batch_size = self.input_shapes[0][0] # None
num_inputs = self.input_shapes[2][1] # k
feature_dim = self.input_shapes[0][1] # d
self.num_units = num_units
self.nonlinearity = nonlinearity
self.nonlinearity_atten = nonlinearity_atten
self.W_h_to_attenGate = self.add_param(
W_h, (num_inputs, 1),
name='W_h_to_atten'
)
self.W_g_to_attenGate = self.add_param(
W_g,
(feature_dim, num_inputs),
name='W_g_to_atten'
)
self.W_v_to_attenGate = self.add_param(
W_v,
(feature_dim, num_inputs),
name='W_v_to_atten'
)
self.W_s_to_attenGate = self.add_param(
W_s,
(feature_dim, num_inputs),
name='W_s_to_atten'
)
self.W_p = self.add_param(
W_p,
(feature_dim, num_units),
name='W_p_to_atten'
)
self.num_inputs = num_inputs
def get_output_for(self, inputs, **kwargs):
s_hat_t = inputs[0]
h_hat_t = inputs[1]
# s_hat_t = s_hat_t.dimshuffle(1, 0)
# h_hat_t = h_hat_t.dimshuffle(1, 0)
H = inputs[2]
# H = H.dimshuffle(2, 0, 1)
# H_len = H.shape[-1]
# z_t 1*none*k
zt = T.dot(
self.nonlinearity(
T.dot(H, self.W_v_to_attenGate) +
T.dot(
T.dot(h_hat_t, self.W_g_to_attenGate).dimshuffle(0, 1, 'x'),
T.ones((1, self.num_inputs))
)
),
self.W_h_to_attenGate
)[:, :, 0]
vt = T.dot(
self.nonlinearity(
T.dot(
s_hat_t, self.W_s_to_attenGate
) +
T.dot(
h_hat_t, self.W_g_to_attenGate
)
),
self.W_h_to_attenGate
)
alpha_hat_t = self.nonlinearity_atten(T.concatenate(
[zt, vt],
axis=-1
))
feature = T.concatenate(
[H, s_hat_t.dimshuffle(0, 'x', 1)],
axis=1
).dimshuffle(2, 0, 1)
c_hat_t = T.sum(alpha_hat_t*feature, axis=-1)
out = T.dot(
(c_hat_t.T+h_hat_t), self.W_p
)
return nonlinearities.softmax(out)
def __init__(
self,
name,
env_spec,
conv_filters, conv_filter_sizes, conv_strides, conv_pads,
hidden_sizes=[],
hidden_nonlinearity=NL.rectify,
output_nonlinearity=NL.softmax,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
self._env_spec = env_spec
if prob_network is None:
prob_network = ConvNetwork(
input_shape=env_spec.observation_space.shape,
output_dim=env_spec.action_space.n,
conv_filters=conv_filters,
conv_filter_sizes=conv_filter_sizes,
conv_strides=conv_strides,
conv_pads=conv_pads,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
name="prob_network",
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function(
[prob_network.input_layer.input_var],
L.get_output(prob_network.output_layer)
)
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalConvPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def __init__(self, input_shape, output_dim, hidden_sizes,
conv_filters, conv_filter_sizes, conv_strides, conv_pads,
hidden_W_init=LI.GlorotUniform(), hidden_b_init=LI.Constant(0.),
output_W_init=LI.GlorotUniform(), output_b_init=LI.Constant(0.),
# conv_W_init=LI.GlorotUniform(), conv_b_init=LI.Constant(0.),
hidden_nonlinearity=LN.rectify,
output_nonlinearity=LN.softmax,
name=None, input_var=None):
if name is None:
prefix = ""
else:
prefix = name + "_"
if len(input_shape) == 3:
l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
l_hid = L.reshape(l_in, ([0],) + input_shape)
elif len(input_shape) == 2:
l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
input_shape = (1,) + input_shape
l_hid = L.reshape(l_in, ([0],) + input_shape)
else:
l_in = L.InputLayer(shape=(None,) + input_shape, input_var=input_var)
l_hid = l_in
for idx, conv_filter, filter_size, stride, pad in zip(
range(len(conv_filters)),
conv_filters,
conv_filter_sizes,
conv_strides,
conv_pads,
):
l_hid = L.Conv2DLayer(
l_hid,
num_filters=conv_filter,
filter_size=filter_size,
stride=(stride, stride),
pad=pad,
nonlinearity=hidden_nonlinearity,
name="%sconv_hidden_%d" % (prefix, idx),
convolution=wrapped_conv,
)
for idx, hidden_size in enumerate(hidden_sizes):
l_hid = L.DenseLayer(
l_hid,
num_units=hidden_size,
nonlinearity=hidden_nonlinearity,
name="%shidden_%d" % (prefix, idx),
W=hidden_W_init,
b=hidden_b_init,
)
l_out = L.DenseLayer(
l_hid,
num_units=output_dim,
nonlinearity=output_nonlinearity,
name="%soutput" % (prefix,),
W=output_W_init,
b=output_b_init,
)
self._l_in = l_in
self._l_out = l_out
self._input_var = l_in.input_var
def __init__(
self,
name,
env_spec,
conv_filters, conv_filter_sizes, conv_strides, conv_pads,
hidden_sizes=[],
hidden_nonlinearity=NL.rectify,
output_nonlinearity=NL.softmax,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
self._env_spec = env_spec
if prob_network is None:
prob_network = ConvNetwork(
input_shape=env_spec.observation_space.shape,
output_dim=env_spec.action_space.n,
conv_filters=conv_filters,
conv_filter_sizes=conv_filter_sizes,
conv_strides=conv_strides,
conv_pads=conv_pads,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
name="prob_network",
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function(
[prob_network.input_layer.input_var],
L.get_output(prob_network.output_layer)
)
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalConvPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def __init__(self, input_shape, output_dim, hidden_sizes,
conv_filters, conv_filter_sizes, conv_strides, conv_pads,
hidden_W_init=LI.GlorotUniform(), hidden_b_init=LI.Constant(0.),
output_W_init=LI.GlorotUniform(), output_b_init=LI.Constant(0.),
# conv_W_init=LI.GlorotUniform(), conv_b_init=LI.Constant(0.),
hidden_nonlinearity=LN.rectify,
output_nonlinearity=LN.softmax,
name=None, input_var=None):
if name is None:
prefix = ""
else:
prefix = name + "_"
if len(input_shape) == 3:
l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
l_hid = L.reshape(l_in, ([0],) + input_shape)
elif len(input_shape) == 2:
l_in = L.InputLayer(shape=(None, np.prod(input_shape)), input_var=input_var)
input_shape = (1,) + input_shape
l_hid = L.reshape(l_in, ([0],) + input_shape)
else:
l_in = L.InputLayer(shape=(None,) + input_shape, input_var=input_var)
l_hid = l_in
for idx, conv_filter, filter_size, stride, pad in zip(
range(len(conv_filters)),
conv_filters,
conv_filter_sizes,
conv_strides,
conv_pads,
):
l_hid = L.Conv2DLayer(
l_hid,
num_filters=conv_filter,
filter_size=filter_size,
stride=(stride, stride),
pad=pad,
nonlinearity=hidden_nonlinearity,
name="%sconv_hidden_%d" % (prefix, idx),
convolution=wrapped_conv,
)
for idx, hidden_size in enumerate(hidden_sizes):
l_hid = L.DenseLayer(
l_hid,
num_units=hidden_size,
nonlinearity=hidden_nonlinearity,
name="%shidden_%d" % (prefix, idx),
W=hidden_W_init,
b=hidden_b_init,
)
l_out = L.DenseLayer(
l_hid,
num_units=output_dim,
nonlinearity=output_nonlinearity,
name="%soutput" % (prefix,),
W=output_W_init,
b=output_b_init,
)
self._l_in = l_in
self._l_out = l_out
self._input_var = l_in.input_var
def __init__(
self,
name,
env_spec,
conv_filters, conv_filter_sizes, conv_strides, conv_pads,
hidden_sizes=[],
hidden_nonlinearity=NL.rectify,
output_nonlinearity=NL.softmax,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
self._env_spec = env_spec
if prob_network is None:
prob_network = ConvNetwork(
input_shape=env_spec.observation_space.shape,
output_dim=env_spec.action_space.n,
conv_filters=conv_filters,
conv_filter_sizes=conv_filter_sizes,
conv_strides=conv_strides,
conv_pads=conv_pads,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
name="prob_network",
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function(
[prob_network.input_layer.input_var],
L.get_output(prob_network.output_layer)
)
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalConvPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def build_model(self, input_var, forward, dropout):
net = dict()
net['input'] = InputLayer((None, 3, None, None), input_var=input_var)
net['conv1/7x7_s2'] = ConvLayer(
net['input'], 64, 7, stride=2, pad=3, flip_filters=False)
net['pool1/3x3_s2'] = PoolLayer(
net['conv1/7x7_s2'], pool_size=3, stride=2, ignore_border=False)
net['pool1/norm1'] = LRNLayer(net['pool1/3x3_s2'], alpha=0.00002, k=1)
net['conv2/3x3_reduce'] = ConvLayer(
net['pool1/norm1'], 64, 1, flip_filters=False)
net['conv2/3x3'] = ConvLayer(
net['conv2/3x3_reduce'], 192, 3, pad=1, flip_filters=False)
net['conv2/norm2'] = LRNLayer(net['conv2/3x3'], alpha=0.00002, k=1)
net['pool2/3x3_s2'] = PoolLayerDNN(net['conv2/norm2'], pool_size=3, stride=2)
net.update(self.build_inception_module('inception_3a',
net['pool2/3x3_s2'],
[32, 64, 96, 128, 16, 32]))
net.update(self.build_inception_module('inception_3b',
net['inception_3a/output'],
[64, 128, 128, 192, 32, 96]))
net['pool3/3x3_s2'] = PoolLayerDNN(net['inception_3b/output'],
pool_size=3, stride=2)
net.update(self.build_inception_module('inception_4a',
net['pool3/3x3_s2'],
[64, 192, 96, 208, 16, 48]))
net.update(self.build_inception_module('inception_4b',
net['inception_4a/output'],
[64, 160, 112, 224, 24, 64]))
net.update(self.build_inception_module('inception_4c',
net['inception_4b/output'],
[64, 128, 128, 256, 24, 64]))
net.update(self.build_inception_module('inception_4d',
net['inception_4c/output'],
[64, 112, 144, 288, 32, 64]))
net.update(self.build_inception_module('inception_4e',
net['inception_4d/output'],
[128, 256, 160, 320, 32, 128]))
net['pool4/3x3_s2'] = PoolLayerDNN(net['inception_4e/output'],
pool_size=3, stride=2)
net.update(self.build_inception_module('inception_5a',
net['pool4/3x3_s2'],
[128, 256, 160, 320, 32, 128]))
net.update(self.build_inception_module('inception_5b',
net['inception_5a/output'],
[128, 384, 192, 384, 48, 128]))
net['pool5/7x7_s1'] = GlobalPoolLayer(net['inception_5b/output'])
if forward:
#net['fc6'] = DenseLayer(net['pool5/7x7_s1'], num_units=1000)
net['prob'] = DenseLayer(net['pool5/7x7_s1'], num_units=4, nonlinearity=softmax)
else:
net['dropout1'] = DropoutLayer(net['pool5/7x7_s1'], p=dropout)
#net['fc6'] = DenseLayer(net['dropout1'], num_units=1000)
#net['dropout2'] = DropoutLayer(net['fc6'], p=dropout)
net['prob'] = DenseLayer(net['dropout1'], num_units=4, nonlinearity=softmax)
return net