def modify_sparse_gpu(y_gpu, beta, alpha=2/3):
shape = np.array(y_gpu.shape).astype(np.uint32)
gpu_shape = np.array([np.prod(shape),np.prod(shape)])
gpu_shape = np.uint32(np.ceil(np.sqrt(gpu_shape)))
dtype = y_gpu.dtype
block_size = (16,16,1)
grid_size = (int(np.ceil(float(gpu_shape[1])/block_size[0])),
int(np.ceil(float(gpu_shape[0])/block_size[1])))
preproc = _generate_preproc(dtype, np.array(grid_size)
* np.array(block_size)[0:1])
mod = SourceModule(preproc + kernel_code, keep=True)
modify_alpha_fun = mod.get_function("modify_alpha")
modify_alpha_fun(y_gpu.gpudata, np.float32(beta),
np.float32(alpha), np.uint32(np.prod(shape)),
block=block_size, grid=grid_size)
评论列表
文章目录