image_filter_test.py 文件源码-python代码片段

def unsharp_masking(arry, hist):

    # We have 10*10 threads per block
    A = cuda.shared.array(shape=(32,32), dtype=int32)
    # H = cuda.shared.array(BIN_COUNT, dtype=int32)

    x,y = cuda.grid(2)

    ty = cuda.threadIdx.x
    tx = cuda.threadIdx.y

    A[ty,tx] = arry[x,y]


    cuda.syncthreads()

    threadCountX = A.shape[0] - 1
    threadCountY = A.shape[1] - 1
    # If within x range and y range then calculate the LBP discriptor along
    # with histogram value to specific bin

    # Other wise Ignore the Value
    if (ty > 0 and  (threadCountX-ty) > 0 ) and (tx > 0 and (threadCountY-tx) > 0):
    #     # You can do the Processing here. ^_^
        code = 0
        #  We need to make sure that each value is accessable to each thread
        #  TODO: make them atomic
        center = A[ty, tx]
        # Lets try averaging,
        code += A[ty-1][tx-1]*-1
        code += A[ty][tx-1]*-2
        code += A[ty+1][tx-1]*-1
        code += A[ty+1][tx]*-2
        code += A[ty+1][tx+1]*-1
        code += A[ty][tx+1]*-2
        code += A[ty-1][tx+1]*-1
        code += A[ty-1][tx-1]*-2

        code = code / 16

        code = ( code - center)

        A[ty,tx] = code

        # cuda.atomic.add(A, (ty,tx),code)
        cuda.syncthreads()

        val  = A[ty,tx]
        cuda.atomic.add(arry, (x,y),val)
        cuda.syncthreads()
        # This Atomic Operation is equivalent to  hist[code % 256] += 1
        ind = code % BIN_COUNT

        cuda.atomic.add(hist, ind, 1)