def actor_network():
'''
Actor network, including policy gradient equation and optimizer
'''
with tf.variable_scope('policy'):
# Inputs
state = tf.placeholder('float', [None, OBS_WIDTH]) # batch_size x obs_width
actions = tf.placeholder('float', [None, NUM_ACTIONS]) # batch_size x num_actions
advantages = tf.placeholder('float', [None, 1]) # batch_size x 1
# 3-layer fully-connected neural network
mlp_out = slim.stack(state, slim.fully_connected, [6, NUM_ACTIONS], weights_regularizer=slim.l2_regularizer(scale=A_REG_SCALE))
# Network output
probabilities = tf.nn.softmax(mlp_out)
good_probabilities = tf.reduce_sum(tf.mul(probabilities, actions), reduction_indices=[1])
eligibility = tf.log(good_probabilities) * advantages
# Loss & optimizer
data_loss = -tf.reduce_sum(eligibility)
reg_losses = slim.losses.get_regularization_losses(scope='policy')
reg_loss = tf.reduce_sum(reg_losses)
total_loss = data_loss + reg_loss
optimizer = tf.train.AdamOptimizer(ACTOR_LR).minimize(total_loss)
return probabilities, state, actions, advantages, optimizer
评论列表
文章目录