def dist_info_sym(self, obs_var, state_info_vars):
n_batches, n_steps = obs_var.shape[:2]
obs_var = obs_var.reshape((n_batches, n_steps, -1))
if self.state_include_action:
prev_action_var = state_info_vars["prev_action"]
all_input_var = TT.concatenate(
[obs_var, prev_action_var],
axis=2
)
else:
all_input_var = obs_var
if self.feature_network is None:
return dict(
prob=L.get_output(
self.prob_network.output_layer,
{self.l_input: all_input_var}
)
)
else:
flat_input_var = TT.reshape(all_input_var, (-1, self.input_dim))
return dict(
prob=L.get_output(
self.prob_network.output_layer,
{self.l_input: all_input_var, self.feature_network.input_layer: flat_input_var}
)
)
python类get_output()的实例源码
def dist_info_sym(self, obs_var, state_info_vars):
n_batches, n_steps = obs_var.shape[:2]
obs_var = obs_var.reshape((n_batches, n_steps, -1))
if self._state_include_action:
prev_action_var = state_info_vars["prev_action"]
all_input_var = TT.concatenate(
[obs_var, prev_action_var],
axis=2
)
else:
all_input_var = obs_var
means, log_stds = L.get_output([self._mean_network.output_layer, self._l_log_std], all_input_var)
return dict(mean=means, log_std=log_stds)
def dist_info_sym(self, obs_var, state_info_vars=None):
return dict(
prob=L.get_output(
self._l_prob,
{self._l_obs: obs_var}
)
)
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
num_seq_inputs=1,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def dist_info_sym(self, obs_var, state_info_vars=None):
return dict(prob=L.get_output(self._l_prob, {self._l_obs: obs_var}))
def log_likelihood_sym(self, x_var, y_var):
normalized_xs_var = (x_var - self._x_mean_var) / self._x_std_var
normalized_means_var, normalized_log_stds_var = \
L.get_output([self._l_mean, self._l_log_std], {
self._mean_network.input_layer: normalized_xs_var})
means_var = normalized_means_var * self._y_std_var + self._y_mean_var
log_stds_var = normalized_log_stds_var + TT.log(self._y_std_var)
return self._dist.log_likelihood_sym(y_var, dict(mean=means_var, log_std=log_stds_var))
def log_likelihood_sym(self, x_var, y_var):
normalized_xs_var = (x_var - self._x_mean_var) / self._x_std_var
normalized_means_var, normalized_log_stds_var = \
L.get_output([self._l_mean, self._l_log_std], {
self._mean_network.input_layer: normalized_xs_var})
means_var = normalized_means_var * self._y_std_var + self._y_mean_var
log_stds_var = normalized_log_stds_var + TT.log(self._y_std_var)
return self._dist.log_likelihood_sym(y_var, dict(mean=means_var, log_std=log_stds_var))
def test_gru_network():
from rllab.core.network import GRUNetwork
import lasagne.layers as L
from rllab.misc import ext
import numpy as np
network = GRUNetwork(
input_shape=(2, 3),
output_dim=5,
hidden_dim=4,
)
f_output = ext.compile_function(
inputs=[network.input_layer.input_var],
outputs=L.get_output(network.output_layer)
)
assert f_output(np.zeros((6, 8, 2, 3))).shape == (6, 8, 5)
def __init__(self, x, y, args):
self.params_theta = []
self.params_lambda = []
self.params_weight = []
if args.dataset == 'mnist':
input_size = (None, 1, 28, 28)
elif args.dataset == 'cifar10':
input_size = (None, 3, 32, 32)
else:
raise AssertionError
layers = [ll.InputLayer(input_size)]
self.penalty = theano.shared(np.array(0.))
#conv1
layers.append(Conv2DLayerWithReg(args, layers[-1], 20, 5))
self.add_params_to_self(args, layers[-1])
layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2))
#conv1
layers.append(Conv2DLayerWithReg(args, layers[-1], 50, 5))
self.add_params_to_self(args, layers[-1])
layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2))
#fc1
layers.append(DenseLayerWithReg(args, layers[-1], num_units=500))
self.add_params_to_self(args, layers[-1])
#softmax
layers.append(DenseLayerWithReg(args, layers[-1], num_units=10, nonlinearity=nonlinearities.softmax))
self.add_params_to_self(args, layers[-1])
self.layers = layers
self.y = ll.get_output(layers[-1], x, deterministic=False)
self.prediction = T.argmax(self.y, axis=1)
# self.penalty = penalty if penalty != 0. else T.constant(0.)
print(self.params_lambda)
# time.sleep(20)
# cost function
self.loss = T.mean(categorical_crossentropy(self.y, y))
self.lossWithPenalty = T.add(self.loss, self.penalty)
print "loss and losswithpenalty", type(self.loss), type(self.lossWithPenalty)
def predict(self,candidates):
print(colored('Predicting {} samples...'.format(len(candidates)), 'green'))
inputx = [n.input_var for n in self.input_layers]
output = [layers.get_output(n) for n in self.nets] # Actual output
gen_output = [theano.function([inputx[i]], output[i]) for i in range(len(self.nets))]
vs = [gen_output[i](candidates) for i in range(len(self.nets))]
vs = np.transpose(vs)
# TAODEBUG:
print(vs[0])
return vs[0]
# NOTE:
# Sample of [save] / [load] of Lasagne CNN model
# can be found at:
# https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py
# def save(self, path):
# print(colored('Saving the models at {}'.format(path),'green'))
# i = 0
# for net in self.nets:
# print('...Saving {}'.format(path + str(i)))
# np.savez(path + str(i), *lasagne.layers.get_all_param_values(self.nets[i]))
# i += 1
# print('...Done')
def get_action_sym(self, obs_var):
return L.get_output(self._output_layer, obs_var)
def dist_info_sym(self, obs_var, state_info_vars):
n_batches, n_steps = obs_var.shape[:2]
obs_var = obs_var.reshape((n_batches, n_steps, -1))
if self.state_include_action:
prev_action_var = state_info_vars["prev_action"]
all_input_var = TT.concatenate(
[obs_var, prev_action_var],
axis=2
)
else:
all_input_var = obs_var
if self.feature_network is None:
return dict(
prob=L.get_output(
self.prob_network.output_layer,
{self.l_input: all_input_var}
)
)
else:
flat_input_var = TT.reshape(all_input_var, (-1, self.input_dim))
return dict(
prob=L.get_output(
self.prob_network.output_layer,
{self.l_input: all_input_var,
self.feature_network.input_layer: flat_input_var}
)
)
def dist_info_sym(self, obs_var, state_info_vars=None):
mean_var, log_std_var = L.get_output(
[self._l_mean, self._l_log_std], obs_var)
if self.min_std is not None:
log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
return dict(mean=mean_var, log_std=log_std_var)
def dist_info_sym(self, obs_var, state_info_vars):
n_batches, n_steps = obs_var.shape[:2]
obs_var = obs_var.reshape((n_batches, n_steps, -1))
if self._state_include_action:
prev_action_var = state_info_vars["prev_action"]
all_input_var = TT.concatenate(
[obs_var, prev_action_var],
axis=2
)
else:
all_input_var = obs_var
means, log_stds = L.get_output(
[self._mean_network.output_layer, self._l_log_std], all_input_var)
return dict(mean=means, log_std=log_stds)
def dist_info_sym(self, obs_var, state_info_vars=None):
return dict(
prob=L.get_output(
self._l_prob,
{self._l_obs: obs_var}
)
)
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
num_seq_inputs=1,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(
env_spec.observation_space.flat_dim * num_seq_inputs,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def dist_info_sym(self, obs_var, state_info_vars=None):
return dict(prob=L.get_output(self._l_prob, {self._l_obs: obs_var}))
def log_likelihood_sym(self, x_var, y_var):
normalized_xs_var = (x_var - self._x_mean_var) / self._x_std_var
prob = L.get_output(
self._l_prob, {self._prob_network.input_layer: normalized_xs_var})
return self._dist.log_likelihood_sym(TT.cast(y_var, 'int32'), dict(prob=prob))
def log_likelihood_sym(self, x_var, y_var):
normalized_xs_var = (x_var - self._x_mean_var) / self._x_std_var
normalized_means_var, normalized_log_stds_var = \
L.get_output([self._l_mean, self._l_log_std], {
self._mean_network.input_layer: normalized_xs_var})
means_var = normalized_means_var * self._y_std_var + self._y_mean_var
log_stds_var = normalized_log_stds_var + TT.log(self._y_std_var)
return self._dist.log_likelihood_sym(y_var, dict(mean=means_var, log_std=log_stds_var))
def prep(self, deterministic=False):
layer_pairs = list(self.layer_iter)
layers = [v for k,v in layer_pairs]
names = [k for k,v in layer_pairs]
outputs = get_output(layers, deterministic=deterministic)
for name, output in zip(names, outputs):
out_name = "{}_out".format(name)
self.__dict__[out_name] = output