def forward_gpu(self, x):
# Implementation using cudnn
x = x[0]
n, c, h, w = x.shape
y_h = conv.get_conv_outsize(
h, self.kh, self.sy, self.ph, self.cover_all)
y_w = conv.get_conv_outsize(
w, self.kw, self.sx, self.pw, self.cover_all)
y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x.dtype)
handle = cudnn.get_handle()
pool_desc = self.create_pool_desc()
x_desc = cudnn.create_tensor_descriptor(x)
y_desc = cudnn.create_tensor_descriptor(y)
oz_dtype = 'd' if x.dtype == 'd' else 'f'
one = numpy.array(1, dtype=oz_dtype).ctypes
zero = numpy.array(0, dtype=oz_dtype).ctypes
libcudnn.poolingForward(
handle, pool_desc.value, one.data, x_desc.value,
x.data.ptr, zero.data, y_desc.value, y.data.ptr)
self.y = y
return y,
python类cudnn()的实例源码
def backward_gpu(self, x, gy):
# Implementation using cudnn
x = x[0]
handle = cudnn.get_handle()
pool_desc = self.create_pool_desc()
# Pooling of cuDNNv2 does not seem to support non-contiguous gradients
gy = cuda.cupy.ascontiguousarray(gy[0])
x_desc = cudnn.create_tensor_descriptor(x)
y_desc = cudnn.create_tensor_descriptor(gy)
oz_dtype = 'd' if x.dtype == 'd' else 'f'
one = numpy.array(1, dtype=oz_dtype).ctypes
zero = numpy.array(0, dtype=oz_dtype).ctypes
gx = cuda.cupy.empty_like(x)
libcudnn.poolingBackward(
handle, pool_desc.value, one.data, y_desc.value,
self.y.data.ptr, y_desc.value, gy.data.ptr, x_desc.value,
x.data.ptr, zero.data, x_desc.value, gx.data.ptr)
return gx,
def forward(self, xs):
x = xs[0]
xp = cuda.get_array_module(x)
if (xp != numpy and cuda.cudnn_enabled and self.use_cudnn and
_cudnn_version >= 3000):
oz_dtype = 'd' if x.dtype == 'd' else 'f'
one = numpy.array(1, dtype=oz_dtype).ctypes
zero = numpy.array(0, dtype=oz_dtype).ctypes
handle = cudnn.get_handle()
x_cube = x.reshape(x.shape[:2] + (-1, 1))
desc = cudnn.create_tensor_descriptor(x_cube)
self.y = xp.empty_like(x)
libcudnn.softmaxForward(
handle, _algorithm, _mode, one.data, desc.value,
x_cube.data.ptr, zero.data, desc.value,
self.y.data.ptr)
return self.y,
else:
log_z = logsumexp(x)
self.y = x - log_z
return self.y,
def forward(self, x):
xp = cuda.get_array_module(*x)
if (xp != numpy and cuda.cudnn_enabled and self.use_cudnn and
(_cudnn_version >= 3000 or x[0].dtype != numpy.float16)):
oz_dtype = 'd' if x[0].dtype == 'd' else 'f'
one = numpy.array(1, dtype=oz_dtype).ctypes
zero = numpy.array(0, dtype=oz_dtype).ctypes
handle = cudnn.get_handle()
x_cube = x[0].reshape(x[0].shape[:2] + (-1, 1))
desc = cudnn.create_tensor_descriptor(x_cube)
self.y = xp.empty_like(x[0])
libcudnn.softmaxForward(
handle, _algorithm, _mode, one.data, desc.value,
x_cube.data.ptr, zero.data, desc.value,
self.y.data.ptr)
else:
self.y = x[0] - x[0].max(axis=1, keepdims=True)
xp.exp(self.y, out=self.y)
self.y /= self.y.sum(axis=1, keepdims=True)
return self.y,
def backward(self, x, gy):
xp = cuda.get_array_module(*x)
if (xp != numpy and cuda.cudnn_enabled and self.use_cudnn and
(_cudnn_version >= 3000 or x[0].dtype != numpy.float16)):
oz_dtype = 'd' if x[0].dtype == 'd' else 'f'
one = numpy.array(1, dtype=oz_dtype).ctypes
zero = numpy.array(0, dtype=oz_dtype).ctypes
handle = cudnn.get_handle()
gx = xp.empty_like(x[0])
gx_cube = gx.reshape(gx.shape[:2] + (-1, 1))
desc = cudnn.create_tensor_descriptor(gx_cube)
libcudnn.softmaxBackward(
handle, _algorithm, _mode, one.data, desc.value,
self.y.data.ptr, desc.value, gy[0].data.ptr, zero.data,
desc.value, gx.data.ptr)
else:
gx = self.y * gy[0]
sumdx = gx.sum(axis=1, keepdims=True)
gx -= self.y * sumdx
return gx,
def softmax_log(x, use_cudnn):
xp = cuda.get_array_module(x)
if (xp != numpy and cuda.cudnn_enabled and use_cudnn and
_cudnn_version >= 3000):
oz_dtype = 'd' if x.dtype == 'd' else 'f'
one = numpy.array(1, dtype=oz_dtype).ctypes
zero = numpy.array(0, dtype=oz_dtype).ctypes
handle = cudnn.get_handle()
x_cube = x.reshape(x.shape[:2] + (-1, 1))
desc = cudnn.create_tensor_descriptor(x_cube)
y = xp.empty_like(x)
libcudnn.softmaxForward(
handle, _algorithm, _mode, one.data, desc.value,
x_cube.data.ptr, zero.data, desc.value,
y.data.ptr)
return y
else:
log_z = logsumexp(xp, x)
return x - log_z
def softmax_log(x, use_cudnn):
xp = cuda.get_array_module(x)
if (xp != numpy and cuda.cudnn_enabled and use_cudnn and
_cudnn_version >= 3000):
oz_dtype = 'd' if x.dtype == 'd' else 'f'
one = numpy.array(1, dtype=oz_dtype).ctypes
zero = numpy.array(0, dtype=oz_dtype).ctypes
handle = cudnn.get_handle()
x_cube = x.reshape(x.shape[:2] + (-1, 1))
desc = cudnn.create_tensor_descriptor(x_cube)
y = xp.empty_like(x)
libcudnn.softmaxForward(
handle, _algorithm, _mode, one.data, desc.value,
x_cube.data.ptr, zero.data, desc.value,
y.data.ptr)
return y
else:
log_z = logsumexp(xp, x)
return x - log_z
def _make_tensor_descriptor_array(xs, length):
"""Make an array of pointers denoting pointers of tensor descriptors.
"""
descs = []
batch_size = xs.shape[0] // length
for i in range(length):
x = xs[i*batch_size:(i+1)*batch_size]
if x.ndim < 3:
shape = x.shape + (1,) * (3 - x.ndim)
x = x.reshape(shape)
desc = cudnn.create_tensor_nd_descriptor(x)
descs.append(desc)
return PointerArray([d.value for d in descs], descs)
def create_pool_desc(self):
return cudnn.create_pooling_descriptor(
(self.kh, self.kw), (self.sy, self.sx), (self.ph, self.pw),
libcudnn.CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING)
def create_pool_desc(self):
return cudnn.create_pooling_descriptor(
(self.kh, self.kw), (self.sy, self.sx), (self.ph, self.pw),
libcudnn.CUDNN_POOLING_MAX)
def forward_gpu(self, x):
if (cuda.cudnn_enabled and self.use_cudnn and
(_cudnn_version >= 3000 or x[0].dtype != numpy.float16)):
y = cudnn.activation_forward(x[0], _mode)
self.y = y
else:
y = cuda.cupy.maximum(x[0], 0)
return y,
def forward_gpu(self, inputs):
x = inputs[0]
if (cuda.cudnn_enabled and self.use_cudnn and
(_cudnn_version >= 3000 or x.dtype != numpy.float16)):
self.y = cuda.cupy.cudnn.activation_forward(x, _mode)
else:
self.y = cuda.elementwise(
'T x', 'T y', 'y = 1 / (1 + exp(-x))',
'sigmoid_fwd')(x)
return self.y,
def backward_gpu(self, inputs, grads):
x = inputs[0]
gy = grads[0]
if (cuda.cudnn_enabled and self.use_cudnn and
(_cudnn_version >= 3000 or x.dtype != numpy.float16)):
gx = cuda.cupy.cudnn.activation_backward(x, self.y, gy, _mode)
else:
gx = cuda.elementwise(
'T y, T gy', 'T gx',
'gx = gy * y * (1 - y)',
'sigmoid_bwd')(self.y, gy)
return gx,
def forward_gpu(self, x):
if (cuda.cudnn_enabled and self.use_cudnn and
(_cudnn_version >= 3000 or x[0].dtype != numpy.float16)):
self.y = cudnn.activation_forward(x[0], _mode)
else:
self.y = cuda.cupy.empty_like(x[0])
cuda.cupy.tanh(x[0], out=self.y)
return self.y,
def backward_gpu(self, x, gy):
if (cuda.cudnn_enabled and self.use_cudnn and
(_cudnn_version >= 3000 or x[0].dtype != numpy.float16)):
gx = cudnn.activation_backward(x[0], self.y, gy[0], _mode)
else:
gx = cuda.elementwise(
'T y, T gy', 'T gx',
'gx = gy * (1 - y * y)',
'tanh_bwd')(self.y, gy[0])
return gx,