def backward_cpu(self, inputs, grad_outputs):
x, t = inputs
gloss = grad_outputs[0]
if hasattr(self, 'y'):
y = self.y.copy()
else:
y = log_softmax._log_softmax(x)
np.exp(y, out=y)
if y.ndim == 2:
gx = y
gx[np.arange(len(t)), np.maximum(t, 0)] -= 1
if self.class_weight is not None:
shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)]
c = _broadcast_to(self.class_weight.reshape(shape), x.shape)
c = c[np.arange(len(t)), np.maximum(t, 0)]
gx *= _broadcast_to(np.expand_dims(c, 1), gx.shape)
gx *= (t != self.ignore_label).reshape((len(t), 1))
else:
n_unit = t.size // len(t)
gx = y.reshape(y.shape[0], y.shape[1], -1)
fst_index = np.arange(t.size) // n_unit
trd_index = np.arange(t.size) % n_unit
gx[fst_index, np.maximum(t.ravel(), 0), trd_index] -= 1
if self.class_weight is not None:
shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)]
c = _broadcast_to(self.class_weight.reshape(shape), x.shape)
c = c.reshape(gx.shape)
c = c[fst_index, np.maximum(t.ravel(), 0), trd_index]
c = c.reshape(y.shape[0], 1, -1)
gx *= _broadcast_to(c, gx.shape)
gx *= (t != self.ignore_label).reshape((len(t), 1, -1))
gx = gx.reshape(y.shape)
if self.reduce == 'mean':
gx *= gloss * self._coeff
else:
gx *= gloss[:, None]
return gx, None
评论列表
文章目录