def build_train(self):
self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=tf.get_variable_scope().name)
regularization = tf.contrib.layers.apply_regularization(
tf.contrib.layers.l2_regularizer(0.01),
self.variables,
)
self.op_grad_actions = tf.placeholder(tf.float32, [None, self.action_dim])
self.op_loss = tf.reduce_sum(-self.op_grad_actions * self.op_actions) # + regularization
self.op_summary = tf.merge_summary([
tf.scalar_summary("actor loss", self.op_loss),
tf.histogram_summary("actor", self.op_actions),
])
self.op_train = tf.train.AdamOptimizer(self.learning_rate).minimize(self.op_loss)
# def get_op_train(self):
# self.op_grads = tf.gradients(self.op_actions, self.variables, -self.op_grad_actions)
# self.op_grads2 = tf.gradients(self.op_loss, self.variables)
# return tf.train.AdamOptimizer(1e-4).apply_gradients(zip(self.op_grads2, self.variables))
评论列表
文章目录