def update_one_gpu(self, param, state):
cuda.elementwise(
'T grad, T one_minus_rho, T eps',
'T param, T msg, T msdx',
'''msg = msg + one_minus_rho * (grad * grad - msg);
T dx = sqrt((msdx + eps) / (msg + eps)) * grad;
msdx += one_minus_rho * (dx * dx - msdx);
param -= dx;''',
'adadelta')(param.grad, 1 - self.rho, self.eps,
param.data, state['msg'], state['msdx'])
python类elementwise()的实例源码
def update_one_gpu(self, param, state):
cuda.elementwise('T grad, T lr', 'T param',
'param -= lr * grad',
'sgd')(param.grad, self.lr, param.data)
def __call__(self, opt):
if cuda.available:
kernel = cuda.elementwise(
'T p, T decay', 'T g', 'g += decay * p', 'weight_decay')
rate = self.rate
for param in opt.target.params():
p, g = param.data, param.grad
with cuda.get_device(p) as dev:
if int(dev) == -1:
g += rate * p
else:
kernel(p, rate, g)
def forward_gpu(self, x):
if (cuda.cudnn_enabled and self.use_cudnn and
pooling_2d._check_cudnn_acceptable_type(x[0].dtype)):
return super(AveragePooling2D, self).forward_gpu(x)
n, c, h, w = x[0].shape
y_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph)
y_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw)
y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype)
coeff = 1. / (self.kh * self.kw)
kern = cuda.elementwise(
'raw T in, int32 h, int32 w,'
'int32 out_h, int32 out_w, int32 kh, int32 kw,'
'int32 sy, int32 sx, int32 ph, int32 pw, T coeff',
'T out', '''
int c0 = i / (out_h * out_w);
int out_y = i / out_w % out_h;
int out_x = i % out_w;
int in_y_0 = max(0, out_y * sy - ph);
int in_y_1 = min(h, out_y * sy + kh - ph);
int in_x_0 = max(0, out_x * sx - pw);
int in_x_1 = min(w, out_x * sx + kw - pw);
T val = 0;
for (int y = in_y_0; y < in_y_1; ++y) {
int offset_y = w * (y + h * c0);
for (int x = in_x_0; x < in_x_1; ++x) {
val = val + in[x + offset_y];
}
}
out = val * coeff;
''', 'avg_pool_fwd')
kern(x[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw,
self.sy, self.sx, self.ph, self.pw, coeff, y)
return y,
def backward_gpu(self, x, gy):
if (cuda.cudnn_enabled and self.use_cudnn and
pooling_2d._check_cudnn_acceptable_type(x[0].dtype)):
return super(AveragePooling2D, self).backward_gpu(x, gy)
n, c, h, w = x[0].shape
y_h, y_w = gy[0].shape[2:]
gx = cuda.cupy.empty_like(x[0])
coeff = 1. / (self.kh * self.kw)
cuda.elementwise(
'raw T gy, int32 h, int32 w,'
'int32 out_h, int32 out_w, int32 kh, int32 kw,'
'int32 sy, int32 sx, int32 ph, int32 pw, T coeff',
'T gx',
'''
int c0 = i / (h * w);
int y = i / w % h + ph;
int x = i % w + pw;
int out_y_0 = max(0, (y - kh + sy) / sy);
int out_y_1 = min(out_h, (y + sy) / sy);
int out_x_0 = max(0, (x - kw + sx) / sx);
int out_x_1 = min(out_w, (x + sx) / sx);
int hc0 = out_h * c0;
T val = 0;
for (int out_y = out_y_0; out_y < out_y_1; ++out_y) {
for (int out_x = out_x_0; out_x < out_x_1; ++out_x) {
val = val + gy[out_x + out_w * (out_y + hc0)];
}
}
gx = val * coeff;
''', 'avg_pool_bwd')(gy[0].reduced_view(),
h, w, y_h, y_w, self.kh, self.kw,
self.sy, self.sx, self.ph, self.pw, coeff,
gx)
return gx,
def forward_gpu(self, x):
shape = self.shape.tolist()
y = cuda.cupy.zeros((shape[0], shape[1], shape[2], shape[3]), dtype=x[0].dtype)
cuda.elementwise(
'T in, S indices',
'raw T out',
'''
out[indices] = in;
''', 'unpool')(x[0], self.indices, y.reduced_view())
return y,
def backward_gpu(self, x, gy):
gx = cuda.cupy.empty_like(x[0])
cuda.elementwise(
'raw T in, S indices',
'T out',
'''
out = in[indices];
''', 'unpool')(gy[0].reduced_view(), self.indices, gx)
return gx,
def backward_gpu(self, x, gy):
if (cuda.cudnn_enabled and self.use_cudnn and
pooling_2d._check_cudnn_acceptable_type(x[0].dtype)):
return super(MaxPooling2D, self).backward_gpu(x, gy)
n, c, h, w = x[0].shape
y_h, y_w = gy[0].shape[2:]
gx = cuda.cupy.empty_like(x[0])
cuda.elementwise(
'raw T gy, raw S indexes, int32 h, int32 w,'
'int32 out_h, int32 out_w, int32 kh, int32 kw,'
'int32 sy, int32 sx, int32 ph, int32 pw',
'T gx',
'''
int c0 = i / (h * w);
int y = i / w % h + ph;
int x = i % w + pw;
int out_y_0 = max(0, (y - kh + sy) / sy);
int out_y_1 = min(out_h, (y + sy) / sy);
int out_x_0 = max(0, (x - kw + sx) / sx);
int out_x_1 = min(out_w, (x + sx) / sx);
T val = 0;
for (int out_y = out_y_0; out_y < out_y_1; ++out_y) {
int ky = y - out_y * sy;
for (int out_x = out_x_0; out_x < out_x_1; ++out_x) {
int kx = x - out_x * sx;
int offset = out_x + out_w * (out_y + out_h * c0);
if (indexes[offset] == kx + kw * ky) {
val = val + gy[offset];
}
}
}
gx = val;
''',
'max_pool_bwd')(gy[0].reduced_view(), self.indexes.reduced_view(),
h, w, y_h, y_w, self.kh, self.kw,
self.sy, self.sx, self.ph, self.pw,
gx)
return gx,
def backward_gpu(self, inputs, grad_outputs):
x, t = inputs
gloss = grad_outputs[0]
gx = cuda.cupy.zeros_like(x)
gx = cuda.elementwise(
'S t, T gloss',
'raw T gx',
'int ind[] = {i, t}; gx[ind] = gloss;',
'getitem_bwd'
)(t, gloss, gx)
return gx, None
def forward_gpu(self, inputs):
cupy = cuda.cupy
mean, ln_var = inputs
if self.eps is None:
self.eps = cupy.random.standard_normal(
ln_var.shape, dtype=mean.dtype)
self.noise = cuda.cupy.empty_like(mean)
self.noise = cuda.elementwise(
'T v, T e', 'T noise',
'noise = exp(v / 2) * e',
'gaussian_forward'
)(ln_var, self.eps)
return mean + self.noise,
def forward_gpu(self, inputs):
x = inputs[0]
return cuda.elementwise(
'T x', 'T y',
'y = min(1.0, max(0.0, x * 0.2 + 0.5))',
'hard_sigmoid_fwd'
)(x),
def backward_gpu(self, inputs, grads):
x = inputs[0]
g = grads[0]
return cuda.elementwise(
'T x, T g', 'T gx',
'gx = fabs(x) < 2.5 ? 0.2 * g : 0',
'hard_sigmoid_bwd'
)(x, g),
def forward_gpu(self, inputs):
x = inputs[0]
if (cuda.cudnn_enabled and self.use_cudnn and
(_cudnn_version >= 3000 or x.dtype != numpy.float16)):
self.y = cuda.cupy.cudnn.activation_forward(x, _mode)
else:
self.y = cuda.elementwise(
'T x', 'T y', 'y = 1 / (1 + exp(-x))',
'sigmoid_fwd')(x)
return self.y,
def backward_gpu(self, inputs, grads):
x = inputs[0]
gy = grads[0]
if (cuda.cudnn_enabled and self.use_cudnn and
(_cudnn_version >= 3000 or x.dtype != numpy.float16)):
gx = cuda.cupy.cudnn.activation_backward(x, self.y, gy, _mode)
else:
gx = cuda.elementwise(
'T y, T gy', 'T gx',
'gx = gy * y * (1 - y)',
'sigmoid_bwd')(self.y, gy)
return gx,
def forward(self, inputs):
c_prev1, c_prev2, x1, x2 = inputs
a1, i1, f1, o1 = _extract_gates(x1)
a2, i2, f2, o2 = _extract_gates(x2)
if isinstance(x1, numpy.ndarray):
self.a1 = numpy.tanh(a1)
self.i1 = _sigmoid(i1)
self.f1 = _sigmoid(f1)
self.a2 = numpy.tanh(a2)
self.i2 = _sigmoid(i2)
self.f2 = _sigmoid(f2)
self.o = _sigmoid(o1 + o2)
self.c = self.a1 * self.i1 + self.a2 * self.i2 + \
self.f1 * c_prev1 + self.f2 * c_prev2
h = self.o * numpy.tanh(self.c)
else:
self.c, h = cuda.elementwise(
'''T c_prev1, T a1, T i1, T f1, T o1,
T c_prev2, T a2, T i2, T f2, T o2''',
'T c, T h',
'''
COMMON_ROUTINE;
c = aa1 * ai1 + af1 * c_prev1 + aa2 * ai2 + af2 * c_prev2;
h = ao * tanh(c);
''',
'slstm_fwd', preamble=_preamble)(
c_prev1, a1, i1, f1, o1, c_prev2, a2, i2, f2, o2)
return self.c, h
def forward_gpu(self, x):
return cuda.elementwise(
'T x, T cap', 'T y', 'y = min(max(x, (T)0), cap)',
'clipped_relu_fwd')(x[0], self.cap),
def backward_gpu(self, x, gy):
gx = cuda.elementwise(
'T x, T gy, T z', 'T gx',
'gx = ((x > 0) & (x < z))? gy : (T)0',
'clipped_relu_bwd')(x[0], gy[0], self.cap)
return gx,
def _kern():
return cuda.elementwise(
'T cond, T x, T slope', 'T y',
'y = cond >= 0 ? x : (T)(slope * x)', 'lrelu')
def backward_gpu(self, inputs, grad_outputs):
x, W = inputs
gy = grad_outputs[0]
masked = cuda.elementwise(
'T x, T gy', 'T masked',
'masked = x >= 0 ? (T)0 : (T)(x * gy)',
'prelu_masked')(x, gy)
axes = (0,) + tuple(six.moves.range(1 + W.ndim, gy.ndim))
gW = masked.sum(axis=axes)
gx = masked # reuse buffer
shape = _get_extended_shape(W, gx)
_fwd_kern()(gy, x, W.reshape(shape), gx)
return gx, gW
def prelu(x, W):
"""Parametric ReLU function.
It accepts two arguments: an input ``x`` and a weight array ``W``
and computes the output as :math:`PReLU(x) = \\max(x, W*x)`,
where :math:`*` is an elementwise multiplication for each sample in the
batch.
When the PReLU function is combined with two-dimensional convolution, the
elements of parameter :math:`a` are typically shared across the same filter
of different pixels. In order to support such usage, this function supports
the shape of parameter array that indicates leading dimensions of input
arrays except the batch dimension.
For example :math:`W` has the shape of :math:`(2, 3, 4)`,
:math:`x` must have the shape of :math:`(B, 2, 3, 4, S1, ..., SN)`
where B is batchsize and the number of trailing S's
is arbitrary non-negative integer.
Args:
x (~chainer.Variable): Input variable.
Its first argument is assumed to be the minibatch dimension.
W (~chainer.Variable): Weight variable.
Returns:
~chainer.Variable: Output variable
.. seealso:: :class:`~chainer.links.PReLU`
"""
return PReLUFunction()(x, W)