test_conv_cuda_ndarray.py 文件源码

python
阅读 24 收藏 0 点赞 0 评论 0

项目:Theano-Deep-learning 作者: GeekLiB 项目源码 文件源码
def gemm_directly(bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsx, subsy,
                  direction):
    ishape = (bs, ch, rImg1, rImg2)
    kshape = (nf, ch, rFlt1, rFlt2)
    subsample = (subsx, subsy)

    npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32')
    npy_kern = theano._asarray(numpy.random.rand(*kshape), dtype='float32')

    if direction == 'fprop':
        i = cuda.CudaNdarrayType(
            broadcastable=[sh == 1 for sh in npy_img.shape])()
        k = cuda.CudaNdarrayType(
            broadcastable=[sh == 1 for sh in npy_kern.shape])()

        cpuval = py_conv(npy_img, npy_kern, 'valid', subsample)
        op = theano.sandbox.cuda.blas.GpuCorrMM(border_mode='valid',
                                                subsample=subsample)(i, k)
        f = theano.function([i, k], op, mode=theano_mode)
        gpuval = f(npy_img, npy_kern[:, :, ::-1, ::-1])
    elif direction == 'bprop img':
        i = cuda.CudaNdarrayType(
            broadcastable=[sh == 1 for sh in
                           npy_kern.transpose(1, 0, 2, 3).shape])()
        k = cuda.CudaNdarrayType(
            broadcastable=[sh == 1 for sh in npy_img.shape])()

        cpuval = py_conv(npy_img, npy_kern, 'full', subsample)
        op = theano.sandbox.cuda.blas.GpuCorrMM_gradInputs(
            border_mode='valid', subsample=subsample)(i, k)
        f = theano.function([i, k], op, mode=theano_mode)
        gpuval = f(npy_kern.transpose(1, 0, 2, 3), npy_img)
    elif direction == 'bprop kern':
        i = cuda.CudaNdarrayType(
            broadcastable=[sh == 1 for sh in
                           npy_img.transpose(1, 0, 2, 3).shape])()
        k = cuda.CudaNdarrayType(
            broadcastable=[sh == 1 for sh in
                           npy_kern.transpose(1, 0, 2, 3).shape])()

        cpuval = py_conv(npy_img, npy_kern, 'valid', subsample)
        op = theano.sandbox.cuda.blas.GpuCorrMM_gradWeights(
            border_mode='valid', subsample=subsample)(i, k)
        f = theano.function([i, k], op, mode=theano_mode)
        gpuval = numpy.array(f(
            npy_img.transpose(1, 0, 2, 3),
            npy_kern.transpose(1, 0, 2, 3)[:, :, ::-1, ::-1])
            ).transpose(1, 0, 2, 3)

    assert_allclose(cpuval, gpuval, rtol=1e-4)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号