def _det_gpu(b):
# We do a batched LU decomposition on the GPU to compute
# and compute the determinant by multiplying the diagonal.
# Change the shape of the array to be size=1 minibatch if necessary.
# Also copy the matrix as the elments will be modified in-place.
a = matmul._as_batch_mat(b).copy()
n = a.shape[1]
n_matrices = len(a)
# Pivot array
p = cuda.cupy.zeros((n_matrices, n), dtype='int32')
# Output array
# These arrays hold information on the execution success
# or if the matrix was singular.
info = cuda.cupy.zeros(n_matrices, dtype=numpy.intp)
ap = matmul._mat_ptrs(a)
_, lda = matmul._get_ld(a)
cuda.cublas.sgetrfBatched(cuda.Device().cublas_handle, n, ap.data.ptr, lda,
p.data.ptr, info.data.ptr, n_matrices)
det = cuda.cupy.prod(a.diagonal(axis1=1, axis2=2), axis=1)
# The determinant is equal to the product of the diagonal entries
# of `a` where the sign of `a` is flipped depending on whether
# the pivot array is equal to its index.
rng = cuda.cupy.arange(1, n + 1, dtype='int32')
parity = cuda.cupy.sum(p != rng, axis=1) % 2
sign = 1. - 2. * parity.astype('float32')
return det * sign, info
评论列表
文章目录