def histogram(x, xmin, xmax, histogram_out):
nbins = histogram_out.shape[0]
bin_width = (xmax - xmin) / nbins
start = cuda.grid(1)
stride = cuda.gridsize(1)
for i in range(start, x.shape[0], stride):
# note that calling a numba.jit function from CUDA automatically
# compiles an equivalent CUDA device function!
bin_number = compute_bin(x[i], nbins, xmin, xmax)
if bin_number >= 0 and bin_number < histogram_out.shape[0]:
cuda.atomic.add(histogram_out, bin_number, 1)
评论列表
文章目录