policy_agent.py 文件源码-python代码片段

policy_agent.py 文件源码

python

阅读 31 收藏 0 点赞 0 评论 0

def __init__(self, scope = 'policy_network', learning_rate = 0.001):
        self.initializer = tf.contrib.layers.xavier_initializer()
        with tf.variable_scope(scope):
            self.state = tf.placeholder(tf.float32, [None, state_dim], name = 'state')
            self.action = tf.placeholder(tf.int32, [None], name = 'action')
            self.target = tf.placeholder(tf.float32, name = 'target')
            self.action_prob = policy_nn(self.state, state_dim, action_space, self.initializer)

            action_mask = tf.cast(tf.one_hot(self.action, depth = action_space), tf.bool)
            self.picked_action_prob = tf.boolean_mask(self.action_prob, action_mask)

            self.loss = tf.reduce_sum(-tf.log(self.picked_action_prob)*self.target) + sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES, scope=scope))
            self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
            self.train_op = self.optimizer.minimize(self.loss)