def _compute_loss(self, exp_batch, gamma, errors_out=None):
"""Compute the Q-learning loss for a batch of experiences
Args:
experiences (list): see update()'s docstring
gamma (float): discount factor
Returns:
loss
"""
y, t = self._compute_y_and_t(exp_batch, gamma)
if errors_out is not None:
del errors_out[:]
delta = F.sum(abs(y - t), axis=1)
delta = cuda.to_cpu(delta.data)
for e in delta:
errors_out.append(e)
if 'weights' in exp_batch:
return compute_weighted_value_loss(
y, t, exp_batch['weights'],
clip_delta=self.clip_delta,
batch_accumulator=self.batch_accumulator)
else:
return compute_value_loss(y, t, clip_delta=self.clip_delta,
batch_accumulator=self.batch_accumulator)
评论列表
文章目录