def run_test_matmul_aa_ci8_shape(self, shape, transpose=False):
# **TODO: This currently never triggers the transpose path in the backend
shape_complex = shape[:-1] + (shape[-1] * 2,)
# Note: The xGPU-like correlation kernel does not support input values of -128 (only [-127:127])
a8 = ((np.random.random(size=shape_complex) * 2 - 1) * 127).astype(np.int8)
a_gold = a8.astype(np.float32).view(np.complex64)
if transpose:
a_gold = H(a_gold)
# Note: np.matmul seems to be slow and inaccurate when there are batch dims
c_gold = np.matmul(a_gold, H(a_gold))
triu = np.triu_indices(shape[-2] if not transpose else shape[-1], 1)
c_gold[..., triu[0], triu[1]] = 0
a = a8.view(bf.DataType.ci8)
a = bf.asarray(a, space='cuda')
if transpose:
a = H(a)
c = bf.zeros_like(c_gold, space='cuda')
self.linalg.matmul(1, a, None, 0, c)
c = c.copy('system')
np.testing.assert_allclose(c, c_gold, RTOL, ATOL)
评论列表
文章目录