def forward_gpu(self, x):
if (cuda.cudnn_enabled and self.use_cudnn and
pooling_2d._check_cudnn_acceptable_type(x[0].dtype)):
return super(MaxPooling2D, self).forward_gpu(x)
n, c, h, w = x[0].shape
y_h = conv.get_conv_outsize(
h, self.kh, self.sy, self.ph, self.cover_all)
y_w = conv.get_conv_outsize(
w, self.kw, self.sx, self.pw, self.cover_all)
y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype)
self.indexes = cuda.cupy.empty((n, c, y_h, y_w), dtype=numpy.int32)
cuda.elementwise(
'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,'
'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw',
'T out, S indexes',
'''
int c0 = i / (out_h * out_w);
int out_y = i / out_w % out_h;
int out_x = i % out_w;
int in_y_0 = max(0, out_y * sy - ph);
int in_y_1 = min(h, out_y * sy + kh - ph);
int in_x_0 = max(0, out_x * sx - pw);
int in_x_1 = min(w, out_x * sx + kw - pw);
T maxval = in[in_x_0 + w * (in_y_0 + h * c0)];
int argmax_y = in_y_0;
int argmax_x = in_x_0;
for (int y = in_y_0; y < in_y_1; ++y) {
int offset_y = w * (y + h * c0);
for (int x = in_x_0; x < in_x_1; ++x) {
float v = in[x + offset_y];
if (maxval < v) {
maxval = v;
argmax_y = y;
argmax_x = x;
}
}
}
out = maxval;
int argmax_ky = argmax_y + ph - out_y * sy;
int argmax_kx = argmax_x + pw - out_x * sx;
indexes = argmax_kx + kw * argmax_ky;
''', 'max_pool_fwd')(x[0].reduced_view(),
h, w, y_h, y_w, self.kh, self.kw,
self.sy, self.sx, self.ph, self.pw,
y, self.indexes)
return y,
评论列表
文章目录