learning_agent.py 文件源码-python代码片段

learning_agent.py 文件源码

python

阅读 21 收藏 0 点赞 0 评论 0

项目：deep_rl_acrobot 作者: georgesung 项目源码文件源码

def critic_network():
    '''
    Critic network, including loss and optimizer
    '''
    with tf.variable_scope('value'):
        # Inputs
        state = tf.placeholder('float', [None, OBS_WIDTH])  # batch_size x obs_width
        newvals = tf.placeholder('float', [None, 1])  # batch_size x 1

        # 4-layer fully-connected neural network
        calculated = slim.stack(state, slim.fully_connected, [6, 6, 1], weights_regularizer=slim.l2_regularizer(scale=C_REG_SCALE))

        # Error value
        diffs = calculated - newvals

        # Loss & optimizer
        data_loss = tf.nn.l2_loss(diffs)
        reg_losses = slim.losses.get_regularization_losses(scope='value')
        reg_loss = tf.reduce_sum(reg_losses)
        total_loss = data_loss + reg_loss

        optimizer = tf.train.AdamOptimizer(CRITIC_LR).minimize(total_loss)

        return calculated, state, newvals, optimizer, total_loss

########################################
# Training and inference processes
########################################