def backward_gpu(self, inputs, grad_outputs):
cupy = cuda.cupy
x, t = inputs
if hasattr(self, 'y'):
y = self.y
else:
y = log_softmax._log_softmax(x)
cupy.exp(y, out=y)
gloss = grad_outputs[0]
n_unit = t.size // len(t)
if self.reduce == 'mean':
coeff = gloss * self._coeff
else:
coeff = gloss[:, None, ...]
if self.class_weight is None:
gx = cuda.elementwise(
'T y, S t, T coeff, S n_channel, S n_unit, S ignore_label',
'T gx',
'''
const int c = (i / n_unit % n_channel);
gx = t == ignore_label ? 0 : coeff * (y - (c == t));
''',
'softmax_crossent_bwd')(
y, cupy.expand_dims(t, 1), coeff, x.shape[1],
n_unit, self.ignore_label)
else:
gx = cuda.elementwise(
'T y, raw T w, S t, T coeff, S n_channel, S n_unit, '
'S ignore_label',
'T gx',
'''
const int c = (i / n_unit % n_channel);
gx = t == ignore_label ? 0 : coeff * (y - (c == t)) * w[t];
''',
'softmax_crossent_weight_bwd')(
y, self.class_weight, cupy.expand_dims(t, 1), coeff,
x.shape[1], n_unit, self.ignore_label)
return gx, None
评论列表
文章目录