def forward_gpu(self, inputs):
cupy = cuda.cupy
x, t = inputs
if chainer.is_debug():
self._check_input_values(x, t)
log_y = cupy.log(x)
if self.cache_score:
self.y = x
if getattr(self, 'normalize', True):
coeff = cupy.maximum(1, (t != self.ignore_label).sum())
else:
coeff = max(1, len(t))
self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype)
log_y = cupy.rollaxis(log_y, 1, log_y.ndim)
ret = cuda.reduce(
'S t, raw T log_y, int32 n_channel, raw T coeff', 'T out',
't == -1 ? 0 : log_y[_j * n_channel + t]',
'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd'
)(t, log_y.reduced_view(), log_y.shape[-1], self._coeff)
return ret,
评论列表
文章目录