learning_agent.py 文件源码-python代码片段

def actor_network():
    '''
    Actor network, including policy gradient equation and optimizer
    '''
    with tf.variable_scope('policy'):
        # Inputs
        state = tf.placeholder('float', [None, OBS_WIDTH])  # batch_size x obs_width
        actions = tf.placeholder('float', [None, NUM_ACTIONS])  # batch_size x num_actions
        advantages = tf.placeholder('float', [None, 1])  # batch_size x 1

        # 3-layer fully-connected neural network
        mlp_out = slim.stack(state, slim.fully_connected, [6, NUM_ACTIONS], weights_regularizer=slim.l2_regularizer(scale=A_REG_SCALE))

        # Network output
        probabilities = tf.nn.softmax(mlp_out)

        good_probabilities = tf.reduce_sum(tf.mul(probabilities, actions), reduction_indices=[1])
        eligibility = tf.log(good_probabilities) * advantages

        # Loss & optimizer
        data_loss = -tf.reduce_sum(eligibility)
        reg_losses = slim.losses.get_regularization_losses(scope='policy')
        reg_loss = tf.reduce_sum(reg_losses)
        total_loss = data_loss + reg_loss

        optimizer = tf.train.AdamOptimizer(ACTOR_LR).minimize(total_loss)

        return probabilities, state, actions, advantages, optimizer