def _get_batch_non_diagonal_cpu(array): batch_size, m, n = array.shape assert m == n rows, cols = np.tril_indices(n, -1) return array[:, rows, cols]