python类reduce()的实例源码-面圈网

error.py 文件源码项目：chainer-qrnn 作者: musyoku 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def __init__(self, use_cudnn=True, normalize=True, cache_score=True,
                 class_weight=None, ignore_label=-1, reduce='mean'):
        self.use_cudnn = use_cudnn
        self.normalize = normalize
        self.cache_score = cache_score
        self.class_weight = class_weight
        if class_weight is not None:
            if self.class_weight.ndim != 1:
                raise ValueError('class_weight.ndim should be 1')
            if self.class_weight.dtype.kind != 'f':
                raise ValueError('The dtype of class_weight should be \'f\'')
            if isinstance(self.class_weight, chainer.Variable):
                raise ValueError('class_weight should be a numpy.ndarray or '
                                 'cupy.ndarray, not a chainer.Variable')
        self.ignore_label = ignore_label
        if reduce not in ('mean', 'no'):
            raise ValueError(
                "only 'mean' and 'no' are valid for 'reduce', but '%s' is "
                'given' % reduce)
        self.reduce = reduce

cross_entropy.py 文件源码项目：chainer-deconv 作者: germanRos 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def forward_gpu(self, inputs):
        cupy = cuda.cupy
        x, t = inputs
        log_y = cupy.log(x + 1e-5)
        self.y = x

    if(self.debug):
        ipdb.set_trace()

        if getattr(self, 'normalize', True):
            coeff = cupy.maximum(1, (t != self.ignore_label).sum())
        else:
            coeff = max(1, len(t))
        self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype)

        log_y = cupy.rollaxis(log_y, 1, log_y.ndim)
        ret = cuda.reduce(
            'S t, raw T log_y, int32 n_channel, raw T coeff', 'T out',
            't == -1 ? 0 : log_y[_j * n_channel + t]',
            'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd'
        )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff)
        return ret,

softmax_cross_entropy.py 文件源码项目：chainer-deconv 作者: germanRos 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def forward_gpu(self, inputs):
        cupy = cuda.cupy
        x, t = inputs
        if chainer.is_debug():
            self._check_input_values(x, t)

        log_y = softmax_log(x, self.use_cudnn)
        if self.cache_score:
            self.y = cupy.exp(log_y)
        if getattr(self, 'normalize', True):
            coeff = cupy.maximum(1, (t != self.ignore_label).sum())
        else:
            coeff = max(1, len(t))
        self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype)

        log_y = cupy.rollaxis(log_y, 1, log_y.ndim)
        ret = cuda.reduce(
            'S t, raw T log_y, int32 n_channel, raw T coeff', 'T out',
            't == -1 ? T(0) : log_y[_j * n_channel + t]',
            'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd'
        )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff)
        return ret,

weighted_cross_entropy.py 文件源码项目：chainer-deconv 作者: germanRos 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def forward_gpu(self, inputs):
        cupy = cuda.cupy
        x, t = inputs
        log_y = cupy.log(x + 1e-5)
        self.y = x

    if(self.debug):
        ipdb.set_trace()

        if getattr(self, 'normalize', True):
            coeff = cupy.maximum(1, (t != self.ignore_label).sum())
        else:
            coeff = max(1, len(t))
        self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype)

        log_y = cupy.rollaxis(log_y, 1, log_y.ndim)
        ret = cuda.reduce(
            'S t, raw T log_y, int32 n_channel, raw T coeff, raw T weights', 'T out',
            't == -1 ? 0 : log_y[_j * n_channel + t] * weights[t]',
            'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd'
        )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff, self.weights.reduced_view())
        return ret,

cross_entropy.py 文件源码项目：nmtrain 作者: philip30 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def forward_gpu(self, inputs):
    cupy = cuda.cupy
    x, t = inputs
    if chainer.is_debug():
      self._check_input_values(x, t)

    log_y = cupy.log(x)
    if self.cache_score:
      self.y = x
    if getattr(self, 'normalize', True):
      coeff = cupy.maximum(1, (t != self.ignore_label).sum())
    else:
      coeff = max(1, len(t))
    self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype)

    log_y = cupy.rollaxis(log_y, 1, log_y.ndim)
    ret = cuda.reduce(
      'S t, raw T log_y, int32 n_channel, raw T coeff', 'T out',
      't == -1 ? 0 : log_y[_j * n_channel + t]',
      'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd'
    )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff)
    return ret,

error.py 文件源码项目：chainer-glu 作者: musyoku 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def __init__(self, use_cudnn=True, normalize=True, cache_score=True,
                 class_weight=None, ignore_label=-1, reduce='mean'):
        self.use_cudnn = use_cudnn
        self.normalize = normalize
        self.cache_score = cache_score
        self.class_weight = class_weight
        if class_weight is not None:
            if self.class_weight.ndim != 1:
                raise ValueError('class_weight.ndim should be 1')
            if self.class_weight.dtype.kind != 'f':
                raise ValueError('The dtype of class_weight should be \'f\'')
            if isinstance(self.class_weight, chainer.Variable):
                raise ValueError('class_weight should be a numpy.ndarray or '
                                 'cupy.ndarray, not a chainer.Variable')
        self.ignore_label = ignore_label
        if reduce not in ('mean', 'no'):
            raise ValueError(
                "only 'mean' and 'no' are valid for 'reduce', but '%s' is "
                'given' % reduce)
        self.reduce = reduce

softmax_cross_entropy.py 文件源码项目：chainer-cf-nade 作者: dsanno 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def forward_gpu(self, inputs):
        cupy = cuda.cupy
        x, t, w = inputs
        if chainer.is_debug():
            self._check_input_values(x, t)

        log_y = softmax_log(x, self.use_cudnn)
        if self.cache_score:
            self.y = cupy.exp(log_y)
        if getattr(self, 'normalize', True):
            coeff = cupy.maximum(1, (t != self.ignore_label).sum())
        else:
            coeff = max(1, len(t))
        self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype)

        log_y = cupy.rollaxis(log_y, 1, log_y.ndim)
        ret = cuda.reduce(
            'S t, T w, raw T log_y, int32 n_channel, raw T coeff', 'T out',
            't == -1 ? T(0) : log_y[_j * n_channel + t] * w',
            'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd'
        )(t, w, log_y.reduced_view(), log_y.shape[-1], self._coeff)
        return ret,

softmax_cross_entropy.py 文件源码项目：chainer-segnet 作者: pfnet-research 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def forward_gpu(self, inputs):
        cupy = cuda.cupy
        x, t = inputs
        if chainer.is_debug():
            self._check_input_values(x, t)

        log_y = log_softmax._log_softmax(x, self.use_cudnn)
        if self.cache_score:
            self.y = cupy.exp(log_y)
        if self.class_weight is not None:
            shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)]
            log_y *= cupy.broadcast_to(
                self.class_weight.reshape(shape), x.shape)
        if self.normalize:
            coeff = cupy.maximum(1, (t != self.ignore_label).sum())
        else:
            coeff = max(1, len(t))
        self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype)

        log_y = cupy.rollaxis(log_y, 1, log_y.ndim)
        ret = cuda.reduce(
            'S t, raw T log_y, int32 n_channel, raw T coeff', 'T out',
            't == -1 ? T(0) : log_y[_j * n_channel + t]',
            'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd'
        )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff)
        return ret,

error.py 文件源码项目：chainer-qrnn 作者: musyoku 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def forward_cpu(self, inputs):
        x, t = inputs
        if chainer.is_debug():
            self._check_input_values(x, t)

        log_y = log_softmax._log_softmax(x)
        if self.cache_score:
            self.y = np.exp(log_y)
        if self.class_weight is not None:
            shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)]
            log_y *= _broadcast_to(self.class_weight.reshape(shape), x.shape)
        log_yd = np.rollaxis(log_y, 1)
        log_yd = log_yd.reshape(len(log_yd), -1)
        log_p = log_yd[np.maximum(t.ravel(), 0), np.arange(t.size)]

        log_p *= (t.ravel() != self.ignore_label)
        if self.reduce == 'mean':
            # deal with the case where the SoftmaxCrossEntropy is
            # unpickled from the old version
            if self.normalize:
                count = (t != self.ignore_label).sum()
            else:
                count = len(x)
            self._coeff = 1.0 / max(count, 1)

            y = log_p.sum(keepdims=True) * (-self._coeff)
            return y.reshape(()),
        else:
            return -log_p.reshape(t.shape),

error.py 文件源码项目：chainer-qrnn 作者: musyoku 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def backward_cpu(self, inputs, grad_outputs):
        x, t = inputs
        gloss = grad_outputs[0]
        if hasattr(self, 'y'):
            y = self.y.copy()
        else:
            y = log_softmax._log_softmax(x)
            np.exp(y, out=y)
        if y.ndim == 2:
            gx = y
            gx[np.arange(len(t)), np.maximum(t, 0)] -= 1
            if self.class_weight is not None:
                shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)]
                c = _broadcast_to(self.class_weight.reshape(shape), x.shape)
                c = c[np.arange(len(t)), np.maximum(t, 0)]
                gx *= _broadcast_to(np.expand_dims(c, 1), gx.shape)
            gx *= (t != self.ignore_label).reshape((len(t), 1))
        else:
            n_unit = t.size // len(t)
            gx = y.reshape(y.shape[0], y.shape[1], -1)
            fst_index = np.arange(t.size) // n_unit
            trd_index = np.arange(t.size) % n_unit
            gx[fst_index, np.maximum(t.ravel(), 0), trd_index] -= 1
            if self.class_weight is not None:
                shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)]
                c = _broadcast_to(self.class_weight.reshape(shape), x.shape)
                c = c.reshape(gx.shape)
                c = c[fst_index, np.maximum(t.ravel(), 0), trd_index]
                c = c.reshape(y.shape[0], 1, -1)
                gx *= _broadcast_to(c, gx.shape)
            gx *= (t != self.ignore_label).reshape((len(t), 1, -1))
            gx = gx.reshape(y.shape)
        if self.reduce == 'mean':
            gx *= gloss * self._coeff
        else:
            gx *= gloss[:, None]
        return gx, None

error.py 文件源码项目：chainer-qrnn 作者: musyoku 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def backward_gpu(self, inputs, grad_outputs):
        cupy = cuda.cupy
        x, t = inputs
        if hasattr(self, 'y'):
            y = self.y
        else:
            y = log_softmax._log_softmax(x)
            cupy.exp(y, out=y)
        gloss = grad_outputs[0]
        n_unit = t.size // len(t)
        if self.reduce == 'mean':
            coeff = gloss * self._coeff
        else:
            coeff = gloss[:, None, ...]

        if self.class_weight is None:
            gx = cuda.elementwise(
                'T y, S t, T coeff, S n_channel, S n_unit, S ignore_label',
                'T gx',
                '''
                    const int c = (i / n_unit % n_channel);
                    gx = t == ignore_label ? 0 : coeff * (y - (c == t));
                ''',
                'softmax_crossent_bwd')(
                    y, cupy.expand_dims(t, 1), coeff, x.shape[1],
                    n_unit, self.ignore_label)
        else:
            gx = cuda.elementwise(
                'T y, raw T w, S t, T coeff, S n_channel, S n_unit, '
                'S ignore_label',
                'T gx',
                '''
                    const int c = (i / n_unit % n_channel);
                    gx = t == ignore_label ? 0 : coeff * (y - (c == t)) * w[t];
                ''',
                'softmax_crossent_weight_bwd')(
                    y, self.class_weight, cupy.expand_dims(t, 1), coeff,
                    x.shape[1], n_unit, self.ignore_label)

        return gx, None

bilinear.py 文件源码项目：chainer-deconv 作者: germanRos 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def backward(self, inputs, grad_outputs):
        e1 = array.as_mat(inputs[0])
        e2 = array.as_mat(inputs[1])
        W = inputs[2]
        gy = grad_outputs[0]

        xp = cuda.get_array_module(*inputs)
        if xp is numpy:
            gW = numpy.einsum('ij,ik,il->jkl', e1, e2, gy)
            ge1 = numpy.einsum('ik,jkl,il->ij', e2, W, gy)
            ge2 = numpy.einsum('ij,jkl,il->ik', e1, W, gy)
        else:
            kern = cuda.reduce('T in0, T in1, T in2', 'T out',
                               'in0 * in1 * in2', 'a + b', 'out = a', 0,
                               'bilinear_product')

            e1_b = e1[:, :, None, None]  # ij
            e2_b = e2[:, None, :, None]  # ik
            gy_b = gy[:, None, None, :]  # il
            W_b = W[None, :, :, :]  # jkl

            gW = kern(e1_b, e2_b, gy_b, axis=0)  # 'ij,ik,il->jkl'
            ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3))  # 'ik,jkl,il->ij'
            ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3))  # 'ij,jkl,il->ik'

        ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW
        if len(inputs) == 6:
            V1, V2, b = inputs[3:]
            gV1 = e1.T.dot(gy)
            gV2 = e2.T.dot(gy)
            gb = gy.sum(0)
            ge1 += gy.dot(V1.T)
            ge2 += gy.dot(V2.T)
            ret += gV1, gV2, gb
        return ret

error.py 文件源码项目：chainer-glu 作者: musyoku 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def forward_cpu(self, inputs):
        x, t = inputs
        if chainer.is_debug():
            self._check_input_values(x, t)

        log_y = log_softmax._log_softmax(x)
        if self.cache_score:
            self.y = np.exp(log_y)
        if self.class_weight is not None:
            shape = [1 if d != 1 else -1 for d in xrange(x.ndim)]
            log_y *= _broadcast_to(self.class_weight.reshape(shape), x.shape)
        log_yd = np.rollaxis(log_y, 1)
        log_yd = log_yd.reshape(len(log_yd), -1)
        log_p = log_yd[np.maximum(t.ravel(), 0), np.arange(t.size)]

        log_p *= (t.ravel() != self.ignore_label)
        if self.reduce == 'mean':
            # deal with the case where the SoftmaxCrossEntropy is
            # unpickled from the old version
            if self.normalize:
                count = (t != self.ignore_label).sum()
            else:
                count = len(x)
            self._coeff = 1.0 / max(count, 1)

            y = log_p.sum(keepdims=True) * (-self._coeff)
            return y.reshape(()),
        else:
            return -log_p.reshape(t.shape),

error.py 文件源码项目：chainer-glu 作者: musyoku 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def backward_cpu(self, inputs, grad_outputs):
        x, t = inputs
        gloss = grad_outputs[0]
        if hasattr(self, 'y'):
            y = self.y.copy()
        else:
            y = log_softmax._log_softmax(x)
            np.exp(y, out=y)
        if y.ndim == 2:
            gx = y
            gx[np.arange(len(t)), np.maximum(t, 0)] -= 1
            if self.class_weight is not None:
                shape = [1 if d != 1 else -1 for d in xrange(x.ndim)]
                c = _broadcast_to(self.class_weight.reshape(shape), x.shape)
                c = c[np.arange(len(t)), np.maximum(t, 0)]
                gx *= _broadcast_to(np.expand_dims(c, 1), gx.shape)
            gx *= (t != self.ignore_label).reshape((len(t), 1))
        else:
            n_unit = t.size // len(t)
            gx = y.reshape(y.shape[0], y.shape[1], -1)
            fst_index = np.arange(t.size) // n_unit
            trd_index = np.arange(t.size) % n_unit
            gx[fst_index, np.maximum(t.ravel(), 0), trd_index] -= 1
            if self.class_weight is not None:
                shape = [1 if d != 1 else -1 for d in xrange(x.ndim)]
                c = _broadcast_to(self.class_weight.reshape(shape), x.shape)
                c = c.reshape(gx.shape)
                c = c[fst_index, np.maximum(t.ravel(), 0), trd_index]
                c = c.reshape(y.shape[0], 1, -1)
                gx *= _broadcast_to(c, gx.shape)
            gx *= (t != self.ignore_label).reshape((len(t), 1, -1))
            gx = gx.reshape(y.shape)
        if self.reduce == 'mean':
            gx *= gloss * self._coeff
        else:
            gx *= gloss[:, None]
        return gx, None

error.py 文件源码项目：chainer-glu 作者: musyoku 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def backward_gpu(self, inputs, grad_outputs):
        cupy = cuda.cupy
        x, t = inputs
        if hasattr(self, 'y'):
            y = self.y
        else:
            y = log_softmax._log_softmax(x)
            cupy.exp(y, out=y)
        gloss = grad_outputs[0]
        n_unit = t.size // len(t)
        if self.reduce == 'mean':
            coeff = gloss * self._coeff
        else:
            coeff = gloss[:, None, ...]

        if self.class_weight is None:
            gx = cuda.elementwise(
                'T y, S t, T coeff, S n_channel, S n_unit, S ignore_label',
                'T gx',
                '''
                    const int c = (i / n_unit % n_channel);
                    gx = t == ignore_label ? 0 : coeff * (y - (c == t));
                ''',
                'softmax_crossent_bwd')(
                    y, cupy.expand_dims(t, 1), coeff, x.shape[1],
                    n_unit, self.ignore_label)
        else:
            gx = cuda.elementwise(
                'T y, raw T w, S t, T coeff, S n_channel, S n_unit, '
                'S ignore_label',
                'T gx',
                '''
                    const int c = (i / n_unit % n_channel);
                    gx = t == ignore_label ? 0 : coeff * (y - (c == t)) * w[t];
                ''',
                'softmax_crossent_weight_bwd')(
                    y, self.class_weight, cupy.expand_dims(t, 1), coeff,
                    x.shape[1], n_unit, self.ignore_label)

        return gx, None

error.py 文件源码项目：chainer-qrnn 作者: musyoku 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def forward_gpu(self, inputs):
        cupy = cuda.cupy
        x, t = inputs
        if chainer.is_debug():
            self._check_input_values(x, t)

        log_y = log_softmax._log_softmax(x)
        if self.cache_score:
            self.y = cupy.exp(log_y)
        if self.class_weight is not None:
            shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)]
            log_y *= cupy.broadcast_to(
                self.class_weight.reshape(shape), x.shape)
        if self.normalize:
            coeff = cupy.maximum(1, (t != self.ignore_label).sum())
        else:
            coeff = max(1, len(t))
        self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype)

        log_y = cupy.rollaxis(log_y, 1, log_y.ndim)
        if self.reduce == 'mean':
            ret = cuda.reduce(
                'S t, raw T log_y, int32 n_channel, raw T coeff, '
                'S ignore_label',
                'T out',
                't == ignore_label ? T(0) : log_y[_j * n_channel + t]',
                'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd'
            )(t, log_y.reduced_view(), log_y.shape[-1],
              self._coeff, self.ignore_label)
        else:
            ret = cuda.elementwise(
                'S t, raw T log_y, int32 n_channel, T ignore', 'T out',
                '''
                if (t == ignore) {
                  out = 0;
                } else {
                  out = -log_y[i * n_channel + t];
                }
                ''',
                'softmax_crossent_no_reduce_fwd'
            )(t, log_y.reduced_view(), log_y.shape[-1], self.ignore_label)
            ret = ret.reshape(t.shape)
        return ret,

hierarchical_softmax.py 文件源码项目：chainer-deconv 作者: germanRos 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def forward_gpu(self, inputs):
        x, t, W = inputs
        max_length = cuda.reduce(
            'T t, raw T begins', 'T out', 'begins[t + 1] - begins[t]',
            'max(a, b)', 'out = a', '0',
            'binary_hierarchical_softmax_max_length')(t, self.begins)
        max_length = cuda.to_cpu(max_length)[()]

        length = max_length * x.shape[0]
        ls = cuda.cupy.empty((length,), dtype=numpy.float32)
        n_in = x.shape[1]
        wxy = cuda.cupy.empty_like(ls)
        cuda.elementwise(
            '''raw T x, raw T w, raw int32 ts, raw int32 paths,
            raw T codes, raw int32 begins, int32 c, int32 max_length''',
            'T ls, T wxy',
            '''
            int ind = i / max_length;
            int offset = i - ind * max_length;
            int t = ts[ind];

            int begin = begins[t];
            int length = begins[t + 1] - begins[t];

            if (offset < length) {
              int p = begin + offset;
              int node = paths[p];

              T wx = 0;
              for (int j = 0; j < c; ++j) {
                int w_ind[] = {node, j};
                int x_ind[] = {ind, j};
                wx += w[w_ind] * x[x_ind];
              }
              wxy = wx * codes[p];
              ls = log(1 + exp(-wxy));
            } else {
              ls = 0;
            }
            ''',
            'binary_hierarchical_softmax_forward'
        )(x, W, t, self.paths, self.codes, self.begins, n_in, max_length, ls,
          wxy)
        self.max_length = max_length
        self.wxy = wxy
        return ls.sum(),

error.py 文件源码项目：chainer-glu 作者: musyoku 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def forward_gpu(self, inputs):
        cupy = cuda.cupy
        x, t = inputs
        if chainer.is_debug():
            self._check_input_values(x, t)

        log_y = log_softmax._log_softmax(x)
        if self.cache_score:
            self.y = cupy.exp(log_y)
        if self.class_weight is not None:
            shape = [1 if d != 1 else -1 for d in xrange(x.ndim)]
            log_y *= cupy.broadcast_to(
                self.class_weight.reshape(shape), x.shape)
        if self.normalize:
            coeff = cupy.maximum(1, (t != self.ignore_label).sum())
        else:
            coeff = max(1, len(t))
        self._coeff = cupy.divide(1.0, coeff, dtype=x.dtype)

        log_y = cupy.rollaxis(log_y, 1, log_y.ndim)
        if self.reduce == 'mean':
            ret = cuda.reduce(
                'S t, raw T log_y, int32 n_channel, raw T coeff, '
                'S ignore_label',
                'T out',
                't == ignore_label ? T(0) : log_y[_j * n_channel + t]',
                'a + b', 'out = a * -coeff[0]', '0', 'crossent_fwd'
            )(t, log_y.reduced_view(), log_y.shape[-1],
              self._coeff, self.ignore_label)
        else:
            ret = cuda.elementwise(
                'S t, raw T log_y, int32 n_channel, T ignore', 'T out',
                '''
                if (t == ignore) {
                  out = 0;
                } else {
                  out = -log_y[i * n_channel + t];
                }
                ''',
                'softmax_crossent_no_reduce_fwd'
            )(t, log_y.reduced_view(), log_y.shape[-1], self.ignore_label)
            ret = ret.reshape(t.shape)
        return ret,