one_d_stencil.py 文件源码

python
阅读 23 收藏 0 点赞 0 评论 0

项目:Gpu-Stencil-Operations 作者: ebadali 项目源码 文件源码
def cudatest_hist():
    # src1 = np.arange(n, dtype=np.float32)
    src1 = np.random.randint(BIN_COUNT,size=n).astype(np.float32)
    histogram = np.zeros(BIN_COUNT, dtype=np.int32)

    print(src1)
    stream = cuda.stream()  # use stream to trigger async memory transfer
    ts = timer()

    # Controll the iterations
    count = 1
    for i in range(count):
        with stream.auto_synchronize():
            # ts = timer()
            d_src1 = cuda.to_device(src1, stream=stream)
            d_hist = cuda.to_device(histogram, stream=stream)
            # gpu_1d_stencil[bpg, tpb, stream](d_src1)
            gpu_histogram[bpg, tpb, stream](d_src1,d_hist)
            d_src1.copy_to_host(src1, stream=stream)
            d_hist.copy_to_host(histogram, stream=stream)

    te = timer()
    print('pinned ',count," : ", te - ts)
    print(histogram)
    # Taking histogram on origional data.
    # This histogram will contain few more frequency due to the padding we add in the orional data.
    # in kernel code.
    hist = src1.astype(np.int64)
    x = itemfreq(hist.ravel())
    hist = x#[:, 1]/sum(x[:, 1])
    print(hist)


# cudatest_stencil()
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号