actor.py 文件源码-python代码片段

actor.py 文件源码
python
阅读 28 收藏 0 点赞 0 评论 0
def build_train(self):

        self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=tf.get_variable_scope().name)
        regularization = tf.contrib.layers.apply_regularization(
            tf.contrib.layers.l2_regularizer(0.01),
            self.variables,
        )

        self.op_grad_actions = tf.placeholder(tf.float32, [None, self.action_dim])
        self.op_loss = tf.reduce_sum(-self.op_grad_actions * self.op_actions) # + regularization
        self.op_summary = tf.merge_summary([
            tf.scalar_summary("actor loss", self.op_loss),
            tf.histogram_summary("actor", self.op_actions),
        ])

        self.op_train = tf.train.AdamOptimizer(self.learning_rate).minimize(self.op_loss)

    # def get_op_train(self):
    #     self.op_grads = tf.gradients(self.op_actions, self.variables, -self.op_grad_actions)
    #     self.op_grads2 = tf.gradients(self.op_loss, self.variables)
    #     return tf.train.AdamOptimizer(1e-4).apply_gradients(zip(self.op_grads2, self.variables))