def forward_gpu(self, x):
xp = cuda.cupy
n, c, h, w = x[0].shape
if self.outh is None:
self.outh = conv.get_deconv_outsize(
h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
if self.outw is None:
self.outw = conv.get_deconv_outsize(
w, self.kw, self.sx, self.pw, cover_all=self.cover_all)
up_y = xp.zeros((n, c, self.outh, self.outw), dtype=numpy.float32)
up_y = conv.im2col_gpu(
up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
cover_all=self.cover_all)
up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
n, c, oy, ox, ky, kx = up_y.shape
indexes = xp.asarray(self.indexes, dtype=numpy.int32)
xp.ElementwiseKernel(
'int32 index, float32 x, int32 n, int32 c, int32 oy, int32 ox,'
'int32 ky, int32 kx', 'raw float32 up_y',
'''
int yn = i / c / oy / ox;
int yc = (i / oy / ox) % c;
int yoy = (i / ox) % oy;
int yox = i % ox;
up_y[yn * c * oy * ox * ky * kx + \
yc * oy * ox * ky * kx + \
yoy * ox * ky * kx + \
yox * ky * kx + \
index] = x;
''',
'upsampling_2d_fwd')(indexes, x[0], n, c, oy, ox, ky, kx, up_y)
up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
up_y = conv.col2im_gpu(up_y, self.sy, self.sx, self.ph, self.pw,
self.outh, self.outw)
return up_y,
评论列表
文章目录