def backward_gpu(self, inputs, grad_outputs):
x, W = inputs
gy = grad_outputs[0]
masked = cuda.elementwise(
'T x, T gy', 'T masked',
'masked = x >= 0 ? (T)0 : (T)(x * gy)',
'prelu_masked')(x, gy)
axes = (0,) + tuple(six.moves.range(1 + W.ndim, gy.ndim))
gW = masked.sum(axis=axes)
gx = masked # reuse buffer
shape = _get_extended_shape(W, gx)
_fwd_kern()(gy, x, W.reshape(shape), gx)
return gx, gW
评论列表
文章目录