import unittest import numpy import pytest import cupy from cupyx import cusolver from cupy.cuda import driver from cupy.cuda import runtime from cupy.linalg import _util from cupy import testing from cupy.testing import _condition import cupyx def random_matrix(shape, dtype, scale, sym=False): m, n = shape[-2:] dtype = numpy.dtype(dtype) assert dtype.kind in 'iufc' low_s, high_s = scale bias = None if dtype.kind in 'iu': # For an m \times n matrix M whose element is in [-0.5, 0.5], it holds # (singular value of M) <= \sqrt{mn} / 2 err = numpy.sqrt(m * n) / 2. low_s += err high_s -= err if dtype.kind in 'u': assert sym, ( 'generating nonsymmetric matrix with uint cells is not' ' supported') # (singular value of numpy.ones((m, n))) <= \sqrt{mn} high_s = bias = high_s / (1 + numpy.sqrt(m * n)) assert low_s <= high_s a = numpy.random.standard_normal(shape) if dtype.kind == 'c': a = a + 1j * numpy.random.standard_normal(shape) u, s, vh = numpy.linalg.svd(a) if sym: assert m == n vh = u.conj().swapaxes(-1, -2) new_s = numpy.random.uniform(low_s, high_s, s.shape) new_a = numpy.einsum('...ij,...j,...jk->...ik', u, new_s, vh) if bias is not None: new_a += bias if dtype.kind in 'iu': new_a = numpy.rint(new_a) return new_a.astype(dtype) class TestCholeskyDecomposition: @testing.numpy_cupy_allclose(atol=1e-3) def check_L(self, array, xp): a = xp.asarray(array) return xp.linalg.cholesky(a) @testing.for_dtypes([ numpy.int32, numpy.int64, numpy.uint32, numpy.uint64, numpy.float32, numpy.float64, numpy.complex64, numpy.complex128]) def test_decomposition(self, dtype): # A positive definite matrix A = random_matrix((5, 5), dtype, scale=(10, 10000), sym=True) self.check_L(A) # np.linalg.cholesky only uses a lower triangle of an array self.check_L(numpy.array([[1, 2], [1, 9]], dtype)) @testing.for_dtypes([ numpy.int32, numpy.int64, numpy.uint32, numpy.uint64, numpy.float32, numpy.float64, numpy.complex64, numpy.complex128]) def test_batched_decomposition(self, dtype): if not cusolver.check_availability('potrfBatched'): pytest.skip('potrfBatched is not available') Ab1 = random_matrix((3, 5, 5), dtype, scale=(10, 10000), sym=True) self.check_L(Ab1) Ab2 = random_matrix((2, 2, 5, 5), dtype, scale=(10, 10000), sym=True) self.check_L(Ab2) @pytest.mark.parametrize('shape', [ # empty square (0, 0), (3, 0, 0), # empty batch (2, 0, 3, 4, 4), ]) @testing.for_dtypes([ numpy.int32, numpy.uint16, numpy.float32, numpy.float64, numpy.complex64, numpy.complex128]) @testing.numpy_cupy_allclose() def test_empty(self, shape, xp, dtype): a = xp.empty(shape, dtype) return xp.linalg.cholesky(a) class TestCholeskyInvalid(unittest.TestCase): def check_L(self, array): for xp in (numpy, cupy): a = xp.asarray(array) with cupyx.errstate(linalg='raise'): with pytest.raises(numpy.linalg.LinAlgError): xp.linalg.cholesky(a) @testing.for_dtypes([ numpy.int32, numpy.int64, numpy.uint32, numpy.uint64, numpy.float32, numpy.float64]) def test_decomposition(self, dtype): A = numpy.array([[1, -2], [-2, 1]]).astype(dtype) self.check_L(A) @testing.parameterize(*testing.product({ 'mode': ['r', 'raw', 'complete', 'reduced'], })) class TestQRDecomposition(unittest.TestCase): @testing.for_dtypes('fdFD') def check_mode(self, array, mode, dtype): if runtime.is_hip and driver.get_build_version() < 307: if dtype in (numpy.complex64, numpy.complex128): pytest.skip('ungqr unsupported') a_cpu = numpy.asarray(array, dtype=dtype) a_gpu = cupy.asarray(array, dtype=dtype) result_gpu = cupy.linalg.qr(a_gpu, mode=mode) if ( mode != 'raw' or numpy.lib.NumpyVersion(numpy.__version__) >= '1.22.0rc1' ): result_cpu = numpy.linalg.qr(a_cpu, mode=mode) self._check_result(result_cpu, result_gpu) def _check_result(self, result_cpu, result_gpu): if isinstance(result_cpu, tuple): for b_cpu, b_gpu in zip(result_cpu, result_gpu): assert b_cpu.dtype == b_gpu.dtype cupy.testing.assert_allclose(b_cpu, b_gpu, atol=1e-4) else: assert result_cpu.dtype == result_gpu.dtype cupy.testing.assert_allclose(result_cpu, result_gpu, atol=1e-4) @testing.fix_random() @_condition.repeat(3, 10) def test_mode(self): self.check_mode(numpy.random.randn(2, 4), mode=self.mode) self.check_mode(numpy.random.randn(3, 3), mode=self.mode) self.check_mode(numpy.random.randn(5, 4), mode=self.mode) @testing.with_requires('numpy>=1.22') @testing.fix_random() def test_mode_rank3(self): self.check_mode(numpy.random.randn(3, 2, 4), mode=self.mode) self.check_mode(numpy.random.randn(4, 3, 3), mode=self.mode) self.check_mode(numpy.random.randn(2, 5, 4), mode=self.mode) @testing.with_requires('numpy>=1.22') @testing.fix_random() def test_mode_rank4(self): self.check_mode(numpy.random.randn(2, 3, 2, 4), mode=self.mode) self.check_mode(numpy.random.randn(2, 4, 3, 3), mode=self.mode) self.check_mode(numpy.random.randn(2, 2, 5, 4), mode=self.mode) @testing.with_requires('numpy>=1.16') def test_empty_array(self): self.check_mode(numpy.empty((0, 3)), mode=self.mode) self.check_mode(numpy.empty((3, 0)), mode=self.mode) @testing.with_requires('numpy>=1.22') def test_empty_array_rank3(self): self.check_mode(numpy.empty((0, 3, 2)), mode=self.mode) self.check_mode(numpy.empty((3, 0, 2)), mode=self.mode) self.check_mode(numpy.empty((3, 2, 0)), mode=self.mode) self.check_mode(numpy.empty((0, 3, 3)), mode=self.mode) self.check_mode(numpy.empty((3, 0, 3)), mode=self.mode) self.check_mode(numpy.empty((3, 3, 0)), mode=self.mode) self.check_mode(numpy.empty((0, 2, 3)), mode=self.mode) self.check_mode(numpy.empty((2, 0, 3)), mode=self.mode) self.check_mode(numpy.empty((2, 3, 0)), mode=self.mode) @testing.parameterize(*testing.product({ 'full_matrices': [True, False], })) @testing.fix_random() class TestSVD(unittest.TestCase): def setUp(self): self.seed = testing.generate_seed() @testing.for_dtypes([ numpy.int32, numpy.int64, numpy.uint32, numpy.uint64, numpy.float32, numpy.float64, numpy.complex64, numpy.complex128, ]) def check_usv(self, shape, dtype): array = testing.shaped_random( shape, numpy, dtype=dtype, seed=self.seed) a_cpu = numpy.asarray(array, dtype=dtype) a_gpu = cupy.asarray(array, dtype=dtype) result_cpu = numpy.linalg.svd(a_cpu, full_matrices=self.full_matrices) result_gpu = cupy.linalg.svd(a_gpu, full_matrices=self.full_matrices) # Check if the input matrix is not broken cupy.testing.assert_allclose(a_gpu, a_cpu) assert len(result_gpu) == 3 for i in range(3): assert result_gpu[i].shape == result_cpu[i].shape assert result_gpu[i].dtype == result_cpu[i].dtype u_cpu, s_cpu, vh_cpu = result_cpu u_gpu, s_gpu, vh_gpu = result_gpu cupy.testing.assert_allclose(s_gpu, s_cpu, rtol=1e-5, atol=1e-4) # reconstruct the matrix k = s_cpu.shape[-1] if len(shape) == 2: if self.full_matrices: a_gpu_usv = cupy.dot(u_gpu[:, :k] * s_gpu, vh_gpu[:k, :]) else: a_gpu_usv = cupy.dot(u_gpu * s_gpu, vh_gpu) else: if self.full_matrices: a_gpu_usv = cupy.matmul(u_gpu[..., :k] * s_gpu[..., None, :], vh_gpu[..., :k, :]) else: a_gpu_usv = cupy.matmul(u_gpu*s_gpu[..., None, :], vh_gpu) cupy.testing.assert_allclose(a_gpu, a_gpu_usv, rtol=1e-4, atol=1e-4) # assert unitary u_len = u_gpu.shape[-1] vh_len = vh_gpu.shape[-2] cupy.testing.assert_allclose( cupy.matmul(u_gpu.swapaxes(-1, -2).conj(), u_gpu), _util.stacked_identity(shape[:-2], u_len, dtype), atol=1e-4) cupy.testing.assert_allclose( cupy.matmul(vh_gpu, vh_gpu.swapaxes(-1, -2).conj()), _util.stacked_identity(shape[:-2], vh_len, dtype), atol=1e-4) @testing.for_dtypes([ numpy.int32, numpy.int64, numpy.uint32, numpy.uint64, numpy.float32, numpy.float64, numpy.complex64, numpy.complex128, ]) @testing.numpy_cupy_allclose(rtol=1e-5, atol=1e-4) def check_singular(self, shape, xp, dtype): array = testing.shaped_random(shape, xp, dtype=dtype, seed=self.seed) a = xp.asarray(array, dtype=dtype) a_copy = a.copy() result = xp.linalg.svd( a, full_matrices=self.full_matrices, compute_uv=False) # Check if the input matrix is not broken assert (a == a_copy).all() return result @_condition.repeat(3, 10) def test_svd_rank2(self): self.check_usv((3, 7)) self.check_usv((2, 2)) self.check_usv((7, 3)) @_condition.repeat(3, 10) def test_svd_rank2_no_uv(self): self.check_singular((3, 7)) self.check_singular((2, 2)) self.check_singular((7, 3)) @testing.with_requires('numpy>=1.16') def test_svd_rank2_empty_array(self): self.check_usv((0, 3)) self.check_usv((3, 0)) self.check_usv((1, 0)) @testing.with_requires('numpy>=1.16') @testing.numpy_cupy_array_equal() def test_svd_rank2_empty_array_compute_uv_false(self, xp): array = xp.empty((3, 0)) return xp.linalg.svd( array, full_matrices=self.full_matrices, compute_uv=False) @_condition.repeat(3, 10) def test_svd_rank3(self): self.check_usv((2, 3, 4)) self.check_usv((2, 3, 7)) self.check_usv((2, 4, 4)) self.check_usv((2, 7, 3)) self.check_usv((2, 4, 3)) self.check_usv((2, 32, 32)) # still use _gesvdj_batched @_condition.repeat(3, 10) def test_svd_rank3_loop(self): # This tests the loop-based batched gesvd on CUDA (_gesvd_batched) self.check_usv((2, 64, 64)) self.check_usv((2, 64, 32)) self.check_usv((2, 32, 64)) @_condition.repeat(3, 10) def test_svd_rank3_no_uv(self): self.check_singular((2, 3, 4)) self.check_singular((2, 3, 7)) self.check_singular((2, 4, 4)) self.check_singular((2, 7, 3)) self.check_singular((2, 4, 3)) @_condition.repeat(3, 10) def test_svd_rank3_no_uv_loop(self): # This tests the loop-based batched gesvd on CUDA (_gesvd_batched) self.check_singular((2, 64, 64)) self.check_singular((2, 64, 32)) self.check_singular((2, 32, 64)) @testing.with_requires('numpy>=1.16') def test_svd_rank3_empty_array(self): self.check_usv((0, 3, 4)) self.check_usv((3, 0, 4)) self.check_usv((3, 4, 0)) self.check_usv((3, 0, 0)) self.check_usv((0, 3, 0)) self.check_usv((0, 0, 3)) @testing.with_requires('numpy>=1.16') @testing.numpy_cupy_array_equal() def test_svd_rank3_empty_array_compute_uv_false1(self, xp): array = xp.empty((3, 0, 4)) return xp.linalg.svd( array, full_matrices=self.full_matrices, compute_uv=False) @testing.with_requires('numpy>=1.16') @testing.numpy_cupy_array_equal() def test_svd_rank3_empty_array_compute_uv_false2(self, xp): array = xp.empty((0, 3, 4)) return xp.linalg.svd( array, full_matrices=self.full_matrices, compute_uv=False) @_condition.repeat(3, 10) def test_svd_rank4(self): self.check_usv((2, 2, 3, 4)) self.check_usv((2, 2, 3, 7)) self.check_usv((2, 2, 4, 4)) self.check_usv((2, 2, 7, 3)) self.check_usv((2, 2, 4, 3)) self.check_usv((2, 2, 32, 32)) # still use _gesvdj_batched @_condition.repeat(3, 10) def test_svd_rank4_loop(self): # This tests the loop-based batched gesvd on CUDA (_gesvd_batched) self.check_usv((3, 2, 64, 64)) self.check_usv((3, 2, 64, 32)) self.check_usv((3, 2, 32, 64)) @_condition.repeat(3, 10) def test_svd_rank4_no_uv(self): self.check_singular((2, 2, 3, 4)) self.check_singular((2, 2, 3, 7)) self.check_singular((2, 2, 4, 4)) self.check_singular((2, 2, 7, 3)) self.check_singular((2, 2, 4, 3)) @_condition.repeat(3, 10) def test_svd_rank4_no_uv_loop(self): # This tests the loop-based batched gesvd on CUDA (_gesvd_batched) self.check_singular((3, 2, 64, 64)) self.check_singular((3, 2, 64, 32)) self.check_singular((3, 2, 32, 64)) @testing.with_requires('numpy>=1.16') def test_svd_rank4_empty_array(self): self.check_usv((0, 2, 3, 4)) self.check_usv((1, 2, 0, 4)) self.check_usv((1, 2, 3, 0))