def build_train(self):
self.op_rewards = tf.placeholder(tf.float32, [None])
self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=tf.get_variable_scope().name)
regularization = tf.contrib.layers.apply_regularization(
tf.contrib.layers.l2_regularizer(0.01),
self.variables,
)
self.op_loss = tf.reduce_mean((self.op_rewards - self.op_critic)**2) + regularization
self.op_summary = tf.merge_summary([
tf.scalar_summary("critic loss", self.op_loss),
tf.histogram_summary("critic", self.op_critic),
])
self.op_grad_actions = tf.gradients(self.op_critic, self.op_actions)[0]
self.op_train = tf.train.AdamOptimizer(self.learning_rate).minimize(self.op_loss)
评论列表
文章目录