def build_q_network(self, hiddens):
out = self._inputs
for hidden in hiddens:
out= layers.fully_connected(inputs=out, num_outputs= hidden, activation_fn=tf.tanh, weights_regularizer=layers.l2_regularizer(scale=0.1))
out = tf.nn.dropout(out, self.keep_prob)
self.Q_t = layers.fully_connected(out, self.num_actions, activation_fn=None)
self.Q_action = tf.argmax(self.Q_t, dimension=1)
评论列表
文章目录