def _inv_gpu(b):
# We do a batched LU decomposition on the GPU to compute the inverse
# Change the shape of the array to be size=1 minibatch if necessary
# Also copy the matrix as the elments will be modified in-place
a = matmul._as_batch_mat(b).copy()
n = a.shape[1]
n_matrices = len(a)
# Pivot array
p = cuda.cupy.empty((n, n_matrices), dtype=numpy.int32)
# Output array
c = cuda.cupy.empty_like(a)
# These arrays hold information on the execution success
# or if the matrix was singular
info = cuda.cupy.empty(n_matrices, dtype=numpy.int32)
ap = matmul._mat_ptrs(a)
cp = matmul._mat_ptrs(c)
_, lda = matmul._get_ld(a)
_, ldc = matmul._get_ld(c)
handle = cuda.Device().cublas_handle
cuda.cublas.sgetrfBatched(
handle, n, ap.data.ptr, lda, p.data.ptr, info.data.ptr, n_matrices)
cuda.cublas.sgetriBatched(
handle, n, ap.data.ptr, lda, p.data.ptr, cp.data.ptr, ldc,
info.data.ptr, n_matrices)
return c, info
评论列表
文章目录