def crop_gpu2cpu(x_gpu, sz, offset=(0,0)):
sfft = x_gpu.shape
block_size = (16, 16 ,1)
grid_size = (int(np.ceil(np.float32(sfft[1])/block_size[1])),
int(np.ceil(np.float32(sfft[0])/block_size[0])))
if x_gpu.dtype == np.float32:
mod = cu.module_from_buffer(cubin)
cropKernel = mod.get_function("crop_Kernel")
elif x_gpu.dtype == np.complex64:
mod = cu.module_from_buffer(cubin)
cropKernel = mod.get_function("crop_ComplexKernel")
x_cropped_gpu = cua.empty(tuple((int(sz[0]),int(sz[1]))), np.float32)
cropKernel(x_cropped_gpu.gpudata, np.int32(sz[0]), np.int32(sz[1]),
x_gpu.gpudata, np.int32(sfft[0]), np.int32(sfft[1]),
np.int32(offset[0]), np.int32(offset[1]),
block=block_size , grid=grid_size)
return x_cropped_gpu
评论列表
文章目录