sl_policy.py 文件源码-python代码片段

sl_policy.py 文件源码

python

阅读 23 收藏 0 点赞 0 评论 0

def __init__(self, learning_rate = 0.001):
        self.initializer = tf.contrib.layers.xavier_initializer()
        with tf.variable_scope('supervised_policy'):
            self.state = tf.placeholder(tf.float32, [None, state_dim], name = 'state')
            self.action = tf.placeholder(tf.int32, [None], name = 'action')
            self.action_prob = policy_nn(self.state, state_dim, action_space, self.initializer)

            action_mask = tf.cast(tf.one_hot(self.action, depth = action_space), tf.bool)
            self.picked_action_prob = tf.boolean_mask(self.action_prob, action_mask)

            self.loss = tf.reduce_sum(-tf.log(self.picked_action_prob)) + sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES, scope = 'supervised_policy'))
            self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
            self.train_op = self.optimizer.minimize(self.loss)