learning_agent.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:deep_rl_acrobot 作者: georgesung 项目源码 文件源码
def critic_network():
    '''
    Critic network, including loss and optimizer
    '''
    with tf.variable_scope('value'):
        # Inputs
        state = tf.placeholder('float', [None, OBS_WIDTH])  # batch_size x obs_width
        newvals = tf.placeholder('float', [None, 1])  # batch_size x 1

        # 4-layer fully-connected neural network
        calculated = slim.stack(state, slim.fully_connected, [6, 6, 1], weights_regularizer=slim.l2_regularizer(scale=C_REG_SCALE))

        # Error value
        diffs = calculated - newvals

        # Loss & optimizer
        data_loss = tf.nn.l2_loss(diffs)
        reg_losses = slim.losses.get_regularization_losses(scope='value')
        reg_loss = tf.reduce_sum(reg_losses)
        total_loss = data_loss + reg_loss

        optimizer = tf.train.AdamOptimizer(CRITIC_LR).minimize(total_loss)

        return calculated, state, newvals, optimizer, total_loss

########################################
# Training and inference processes
########################################
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号