ddpg.py 文件源码-python代码片段

ddpg.py 文件源码

python

阅读 36 收藏 0 点赞 0 评论 0

项目：rl_algorithms 作者: DanielTakeshi 项目源码文件源码

def update_weights(self, f):
        """ 
        Gradient-based update of current Critic parameters.  Also return the
        action gradients for the Actor update later. This is the dQ/da in the
        paper, and Q is the current Q network, not the target Q network.
        """
        feed = {
            self.obs_t_BO:    f['obs_t_BO'],
            self.act_t_BA:    f['act_t_BA'],
            self.rew_t_B:     f['rew_t_B'],
            self.obs_tp1_BO:  f['obs_tp1_BO'],
            self.done_mask_B: f['done_mask_B']
        }
        action_grads_BA, _, l2_error = self.sess.run([self.act_grads_BA, \
                self.optimize_c, self.l2_error], feed)

        # We assume that the only item in the list has what we want.
        assert len(action_grads_BA) == 1
        return action_grads_BA[0], l2_error