def get_objective(l1=0, l2=0.005):
def objective(layers, loss_function, target, aggregate=aggregate,
deterministic=False, get_output_kw=None):
if get_output_kw is None:
get_output_kw = {}
output_layer = layers[-1]
first_layer = layers[1]
network_output = lasagne.layers.get_output(
output_layer, deterministic=deterministic, **get_output_kw)
if not deterministic:
losses = loss_function(network_output, target) \
+ l2 * regularization.regularize_network_params(
output_layer, regularization.l2) \
+ l1 * regularization.regularize_layer_params(
output_layer, regularization.l1)
else:
losses = loss_function(network_output, target)
return aggregate(losses)
return objective
python类get_output()的实例源码
def dist_info_sym(self, obs_var, latent_var=None): # this is ment to be for one path!
# now this is not doing anything! And for computing the dist_info_vars of npo_snn_rewardMI it doesn't work
if latent_var is None:
latent_var1 = theano.shared(np.expand_dims(self.latent_fix, axis=0)) # new fix to avoid putting the latent as an input: just take the one fixed!
latent_var = TT.tile(latent_var1, [obs_var.shape[0], 1])
# generate the generalized input (append latents to obs.)
if self.bilinear_integration:
extended_obs_var = TT.concatenate([obs_var, latent_var,
TT.flatten(obs_var[:, :, np.newaxis] * latent_var[:, np.newaxis, :],
outdim=2)]
, axis=1)
else:
extended_obs_var = TT.concatenate([obs_var, latent_var], axis=1)
mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], extended_obs_var)
if self.min_std is not None:
log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
return dict(mean=mean_var, log_std=log_std_var)
test_bahdanauAttentionLayer.py 文件源码
项目:e2e-ie-release
作者: rasmusbergpalm
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def test_get_output_for(self):
keys_var = T.ftensor3()
values_var = T.ftensor3()
mask_var = T.fmatrix()
queries_var = T.ftensor3()
keys_layer = L.InputLayer((None, None, 3), input_var=keys_var)
values_layer = L.InputLayer((None, None, 5), input_var=values_var)
mask_layer = L.InputLayer((None, None), input_var=mask_var)
queries_layer = L.InputLayer((None, None, 7), input_var=queries_var)
attention_layer = BahdanauKeyValueAttentionLayer([keys_layer, values_layer, mask_layer, queries_layer], 9)
attention_outputs = L.get_output(attention_layer)
fn = theano.function([keys_var, values_var, mask_var, queries_var], attention_outputs, on_unused_input='warn')
keys = np.random.rand(32, 13, 3).astype(np.float32)
values = np.random.rand(32, 13, 5).astype(np.float32)
mask = np.random.rand(32, 13).astype(np.float32)
queries = np.random.rand(32, 17, 7).astype(np.float32)
_att = fn(keys, values, mask, queries)
self.assertEqual((32, 17, 5), _att.shape)
cnn_cascade_lasagne.py 文件源码
项目:Cascade-CNN-Face-Detection
作者: gogolgrind
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def __build_loss_train__fn__(self):
# create loss function
prediction = layers.get_output(self.net)
loss = objectives.categorical_crossentropy(prediction, self.__target_var__)
loss = loss.mean() + 1e-4 * regularization.regularize_network_params(self.net, regularization.l2)
val_acc = T.mean(T.eq(T.argmax(prediction, axis=1), self.__target_var__),dtype=theano.config.floatX)
# create parameter update expressions
params = layers.get_all_params(self.net, trainable=True)
self.eta = theano.shared(sp.array(sp.float32(0.05), dtype=sp.float32))
update_rule = updates.nesterov_momentum(loss, params, learning_rate=self.eta,
momentum=0.9)
# compile training function that updates parameters and returns training loss
self.__train_fn__ = theano.function([self.__input_var__,self.__target_var__], loss, updates=update_rule)
self.__predict_fn__ = theano.function([self.__input_var__], layers.get_output(self.net,deterministic=True))
self.__val_fn__ = theano.function([self.__input_var__,self.__target_var__], [loss,val_acc])
def build_vis(self, l, gamma, lr):
conv_layer = self.conv_layers[l]
nonlinearity = conv_layer.nonlinearity
conv_layer.nonlinearity = lasagne.nonlinearities.identity
output_shape = layers.get_output_shape(conv_layer)
self.x_shared = theano.shared(numpy.zeros((output_shape[1], self.n_visible)).astype('float32'))
conv_out = layers.get_output(conv_layer, inputs=self.x_shared, deterministic=True)
idx = output_shape[2] / 2
cost = -T.sum(conv_out[:, :, idx, idx].diagonal()) + \
gamma * T.sum(self.x_shared**2)
updates = lasagne.updates.adadelta(cost, [self.x_shared], learning_rate=lr)
fn['train'] = theano.function([], cost, updates=updates)
conv_layer.nonlinearity = nonlinearity
return fn
def create_infer_func(layers):
Xa, Xb = T.tensor4('Xa'), T.tensor4('Xb')
Xa_batch, Xb_batch = T.tensor4('Xa_batch'), T.tensor4('Xb_batch')
Tp = get_output(
layers['trans'],
inputs={
layers['inputa']: Xa, layers['inputb']: Xb,
}, deterministic=True,
)
infer_func = theano.function(
inputs=[theano.In(Xa_batch), theano.In(Xb_batch)],
outputs=Tp,
givens={
Xa: Xa_batch, Xb: Xb_batch, # Ia, Ib
}
)
return infer_func
def build_train_func(rank=0, **kwargs):
print("rank: {} Building model".format(rank))
resnet = build_resnet()
print("Building training function")
x = T.ftensor4('x')
y = T.imatrix('y')
prob = L.get_output(resnet['prob'], x, deterministic=False)
loss = T.nnet.categorical_crossentropy(prob, y.flatten()).mean()
params = L.get_all_params(resnet.values(), trainable=True)
sgd_updates = updates.sgd(loss, params, learning_rate=1e-4)
# make a function to compute and store the raw gradient
f_train = theano.function(inputs=[x, y],
outputs=loss, # (assumes this is an avg)
updates=sgd_updates)
return f_train, "original"
def _init_explain_function(self, patterns=None, **kwargs):
with umisc.ignore_sigmoids(self.output_layer) as output_layer:
Y = L.get_output(output_layer, deterministic=True)
X = self.input_layer.input_var # original
I = T.iscalar() # Output neuron
S = T.iscalar() # Sample that is desired
E = T.grad(Y[S].flatten()[I], X)
self.grad_function = theano.function(inputs=[X, S, I], outputs=E)
def _init_relevance_function(self):
with umisc.ignore_sigmoids(self.output_layer) as output_layer:
output = L.get_output(output_layer,
deterministic=True)
self.relevance_function = theano.function(
inputs=[self.input_layer.input_var], outputs=output)
pass
def _init_explain_function(self, patterns=None, **kwargs):
self._init_network(patterns=patterns)
explanation = L.get_output(self.explain_output_layer,
deterministic=True)
self.explain_function = theano.function(
inputs=[self.input_layer.input_var, self.relevance_values],
outputs=explanation)
def get_dense_xy(layer, deterministic=True):
x = L.get_output(L.FlattenLayer(layer.input_layer),
deterministic=deterministic) # N, D
w = layer.W # D, O
y = T.dot(x, w) # (N,O)
if layer.b is not None:
y += T.shape_padaxis(layer.b, axis=0)
return x, y
def get_conv_xy(layer, deterministic=True):
w_np = layer.W.get_value()
input_layer = layer.input_layer
if layer.pad == 'same':
input_layer = L.PadLayer(layer.input_layer,
width=np.array(w_np.shape[2:])/2,
batch_ndim=2)
input_shape = L.get_output_shape(input_layer)
max_x = input_shape[2] - w_np.shape[2]
max_y = input_shape[3] - w_np.shape[3]
srng = RandomStreams()
patch_x = srng.random_integers(low=0, high=max_x)
patch_y = srng.random_integers(low=0, high=max_y)
#print("input_shape shape: ", input_shape)
#print("pad: \"%s\""% (layer.pad,))
#print(" stride: " ,layer.stride)
#print("max_x %d max_y %d"%(max_x,max_y))
x = L.get_output(input_layer, deterministic=deterministic)
x = x[:, :,
patch_x:patch_x + w_np.shape[2], patch_y:patch_y + w_np.shape[3]]
x = T.flatten(x, 2) # N,D
w = layer.W
if layer.flip_filters:
w = w[:, :, ::-1, ::-1]
w = T.flatten(w, outdim=2).T # D,O
y = T.dot(x, w) # N,O
if layer.b is not None:
y += T.shape_padaxis(layer.b, axis=0)
return x, y
def dist_info_sym(self, obs_var, state_info_var=None):
mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], obs_var)
if self.min_std is not None:
log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
return dict(mean=mean_var, log_std=log_std_var)
def dist_info_sym(self, obs_var, state_info_var=None):
mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], obs_var)
if self.min_std is not None:
log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
return dict(mean=mean_var, log_std=log_std_var)
def dist_info_sym(self, obs_var, state_info_vars=None):
mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], obs_var)
if self.min_std is not None:
log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
return dict(mean=mean_var, log_std=log_std_var)
def log_likelihood_sym(self, x_var, y_var):
normalized_xs_var = (x_var - self._x_mean_var) / self._x_std_var
prob = L.get_output(self._l_prob, {self._prob_network.input_layer: normalized_xs_var})
return self._dist.log_likelihood_sym(TT.cast(y_var, 'int32'), dict(prob=prob))
def get_qval_sym(self, obs_var, action_var, **kwargs):
qvals = L.get_output(
self._output_layer,
{self._obs_layer: obs_var, self._action_layer: action_var},
**kwargs
)
return TT.reshape(qvals, (-1,))
def get_action_sym(self, obs_var):
return L.get_output(self._output_layer, obs_var)
def dist_info_sym(self, obs_var, state_info_vars):
n_batches, n_steps = obs_var.shape[:2]
obs_var = obs_var.reshape((n_batches, n_steps, -1))
if self.state_include_action:
prev_action_var = state_info_vars["prev_action"]
all_input_var = TT.concatenate(
[obs_var, prev_action_var],
axis=2
)
else:
all_input_var = obs_var
if self.feature_network is None:
return dict(
prob=L.get_output(
self.prob_network.output_layer,
{self.l_input: all_input_var}
)
)
else:
flat_input_var = TT.reshape(all_input_var, (-1, self.input_dim))
return dict(
prob=L.get_output(
self.prob_network.output_layer,
{self.l_input: all_input_var, self.feature_network.input_layer: flat_input_var}
)
)
def dist_info_sym(self, obs_var, state_info_vars):
n_batches, n_steps = obs_var.shape[:2]
obs_var = obs_var.reshape((n_batches, n_steps, -1))
if self._state_include_action:
prev_action_var = state_info_vars["prev_action"]
all_input_var = TT.concatenate(
[obs_var, prev_action_var],
axis=2
)
else:
all_input_var = obs_var
means, log_stds = L.get_output([self._mean_network.output_layer, self._l_log_std], all_input_var)
return dict(mean=means, log_std=log_stds)
def dist_info_sym(self, obs_var, state_info_vars=None):
return dict(
prob=L.get_output(
self._l_prob,
{self._l_obs: obs_var}
)
)
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
num_seq_inputs=1,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def dist_info_sym(self, obs_var, state_info_vars=None):
return dict(prob=L.get_output(self._l_prob, {self._l_obs: obs_var}))
def log_likelihood_sym(self, x_var, y_var):
normalized_xs_var = (x_var - self._x_mean_var) / self._x_std_var
normalized_means_var, normalized_log_stds_var = \
L.get_output([self._l_mean, self._l_log_std], {
self._mean_network.input_layer: normalized_xs_var})
means_var = normalized_means_var * self._y_std_var + self._y_mean_var
log_stds_var = normalized_log_stds_var + TT.log(self._y_std_var)
return self._dist.log_likelihood_sym(y_var, dict(mean=means_var, log_std=log_stds_var))
def log_likelihood_sym(self, x_var, y_var):
normalized_xs_var = (x_var - self._x_mean_var) / self._x_std_var
normalized_means_var, normalized_log_stds_var = \
L.get_output([self._l_mean, self._l_log_std], {
self._mean_network.input_layer: normalized_xs_var})
means_var = normalized_means_var * self._y_std_var + self._y_mean_var
log_stds_var = normalized_log_stds_var + TT.log(self._y_std_var)
return self._dist.log_likelihood_sym(y_var, dict(mean=means_var, log_std=log_stds_var))
def test_gru_network():
from rllab.core.network import GRUNetwork
import lasagne.layers as L
from rllab.misc import ext
import numpy as np
network = GRUNetwork(
input_shape=(2, 3),
output_dim=5,
hidden_dim=4,
)
f_output = ext.compile_function(
inputs=[network.input_layer.input_var],
outputs=L.get_output(network.output_layer)
)
assert f_output(np.zeros((6, 8, 2, 3))).shape == (6, 8, 5)
def getPredictionFuntion(net):
net_output = l.get_output(net, deterministic=True)
print "COMPILING THEANO TEST FUNCTION...",
start = time.time()
test_net = theano.function([l.get_all_layers(NET)[0].input_var], net_output, allow_input_downcast=True)
print "DONE! (", int(time.time() - start), "s )"
return test_net
def getPredictionFuntion(net):
net_output = l.get_output(net, deterministic=True)
print "COMPILING THEANO TEST FUNCTION...",
start = time.time()
test_net = theano.function([l.get_all_layers(net)[0].input_var], net_output, allow_input_downcast=True)
print "DONE! (", int(time.time() - start), "s )"
return test_net
################# PREDICTION POOLING ####################
def get_qval_sym(self, obs_var, action_var, **kwargs):
qvals = L.get_output(
self._output_layer,
{self._obs_layer: obs_var, self._action_layer: action_var},
**kwargs
)
return TT.reshape(qvals, (-1,))
def get_action_sym(self, obs_var):
return L.get_output(self._output_layer, obs_var)