def build_network_normal(self):
# Symbolic variables for observation, action, and advantage
self.states = tf.placeholder(tf.float32, [None, self.env_runner.nO], name="states") # Observation
self.a_n = tf.placeholder(tf.float32, name="a_n") # Continuous action
self.adv_n = tf.placeholder(tf.float32, name="adv_n") # Advantage
L1 = tf.contrib.layers.fully_connected(
inputs=self.states,
num_outputs=self.config["n_hidden_units"],
activation_fn=tf.tanh,
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer())
mu, sigma = mu_sigma_layer(L1, 1)
self.normal_dist = tf.contrib.distributions.Normal(mu, sigma)
self.action = self.normal_dist.sample(1)
self.action = tf.clip_by_value(self.action, self.env.action_space.low[0], self.env.action_space.high[0])
评论列表
文章目录