def backward_cpu(self, inputs, grad_outputs):
x, t = inputs
gloss = grad_outputs[0]
if hasattr(self, 'y'):
y = self.y.copy()
else:
y = log_softmax._log_softmax(x, self.use_cudnn)
numpy.exp(y, out=y)
if y.ndim == 2:
gx = y
gx[numpy.arange(len(t)), numpy.maximum(t, 0)] -= 1
if self.class_weight is not None:
c = self.class_weight[
numpy.arange(len(t)), numpy.maximum(t, 0)]
gx *= numpy.broadcast_to(numpy.expand_dims(c, 1), gx.shape)
gx *= (t != self.ignore_label).reshape((len(t), 1))
else:
# in the case where y.ndim is higher than 2,
# we think that a current implementation is inefficient
# because it yields two provisional arrays for indexing.
n_unit = t.size // len(t)
gx = y.reshape(y.shape[0], y.shape[1], -1)
fst_index = numpy.arange(t.size) // n_unit
trd_index = numpy.arange(t.size) % n_unit
gx[fst_index, numpy.maximum(t.ravel(), 0), trd_index] -= 1
if self.class_weight is not None:
c = self.class_weight.reshape(gx.shape)
c = c[fst_index, numpy.maximum(t.ravel(), 0), trd_index]
c = c.reshape(y.shape[0], 1, -1)
gx *= numpy.broadcast_to(c, gx.shape)
gx *= (t != self.ignore_label).reshape((len(t), 1, -1))
gx = gx.reshape(y.shape)
gx *= gloss * self._coeff
return gx, None
softmax_cross_entropy.py 文件源码
python
阅读 19
收藏 0
点赞 0
评论 0
评论列表
文章目录