def build_network(self):
# Symbolic variables for observation, action, and advantage
self.states = tf.placeholder(tf.float32, [None, self.env_runner.nO], name="states") # Observation
self.a_n = tf.placeholder(tf.float32, name="a_n") # Discrete action
self.adv_n = tf.placeholder(tf.float32, name="adv_n") # Advantage
L1 = tf.contrib.layers.fully_connected(
inputs=self.states,
num_outputs=self.config["n_hidden_units"],
activation_fn=tf.tanh,
weights_initializer=tf.random_normal_initializer(),
biases_initializer=tf.zeros_initializer())
self.probs = tf.contrib.layers.fully_connected(
inputs=L1,
num_outputs=self.env_runner.nA,
activation_fn=tf.nn.softmax,
weights_initializer=tf.random_normal_initializer(),
biases_initializer=tf.zeros_initializer())
self.action = tf.squeeze(tf.multinomial(tf.log(self.probs), 1), name="action")
评论列表
文章目录