def _compute_y_and_t(self, exp_batch, gamma):
batch_state = exp_batch['state']
batch_size = len(batch_state)
# Compute Q-values for current states
qout = self.q_function(batch_state)
batch_actions = exp_batch['action']
batch_q = F.reshape(qout.evaluate_actions(
batch_actions), (batch_size, 1))
# Target values must also backprop gradients
batch_q_target = F.reshape(
self._compute_target_values(exp_batch, gamma), (batch_size, 1))
return batch_q, scale_grad.scale_grad(batch_q_target, self.grad_scale)
评论列表
文章目录