def hist_cuda_test():
histogram_array = src1#np.zeros(vectorSize*BIN_COUNT, dtype=np.int32).reshape(vectorSize,BIN_COUNT)
histogram = src1[SEARCH_INDEX]#np.zeros(BIN_COUNT, dtype=np.float32)
results = np.zeros(9, dtype=np.float64)
# use stream to trigger async memory transfer
cstream = cuda.stream()
ts = timer()
# Increase Counter to measure the Efficiency
count = 1
for i in range(count):
with cstream.auto_synchronize():
# For Histogram Compairision.
d_histogram_array = cuda.to_device(histogram_array, stream=cstream)
d_histogram = cuda.to_device(histogram, stream=cstream)
d_results = cuda.to_device(results, stream=cstream)
hist_comp[1, vectorSize, cstream](d_histogram_array,d_histogram,d_results)
d_histogram_array.copy_to_host(histogram_array, stream=cstream)
d_histogram.copy_to_host(histogram, stream=cstream)
d_results.copy_to_host(results, stream=cstream)
te = timer()
print('GPU Process ',count," Iterations : in ", te - ts)
print('histogram is')
print(results)
评论列表
文章目录