test_cufft.py 8 KB
Newer Older
root's avatar
root committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import pickle
import unittest

import numpy
import pytest

import cupy
from cupy import testing
from cupy.cuda import cufft
from cupy.fft import config
from cupy.fft._fft import _convert_fft_type

from ..fft_tests.test_fft import (multi_gpu_config, _skip_multi_gpu_bug)


class TestExceptionPicklable(unittest.TestCase):

    def test(self):
        e1 = cufft.CuFFTError(1)
        e2 = pickle.loads(pickle.dumps(e1))
        assert e1.args == e2.args
        assert str(e1) == str(e2)


# This class tests multi-GPU Plan1d with data sitting on host.
# Internally, we use cuFFT's data transfer API to ensure the
# data is in order.

@testing.parameterize(*testing.product({
    'shape': [(64,), (4, 16), (128,), (8, 32)],
}))
@testing.multi_gpu(2)
@pytest.mark.skipif(cupy.cuda.runtime.is_hip, reason='not supported by hipFFT')
class TestMultiGpuPlan1dNumPy(unittest.TestCase):

    @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]])
    @testing.for_complex_dtypes()
    def test_fft(self, dtype):
        _skip_multi_gpu_bug(self.shape, self.gpus)

        a = testing.shaped_random(self.shape, numpy, dtype)

        if len(self.shape) == 1:
            batch = 1
            nx = self.shape[0]
        elif len(self.shape) == 2:
            batch = self.shape[0]
            nx = self.shape[1]

        # compute via cuFFT
        cufft_type = _convert_fft_type(a.dtype, 'C2C')
        plan = cufft.Plan1d(nx, cufft_type, batch, devices=config._devices)
        out_cp = numpy.empty_like(a)
        plan.fft(a, out_cp, cufft.CUFFT_FORWARD)

        out_np = numpy.fft.fft(a)
        # np.fft.fft alway returns np.complex128
        if dtype is numpy.complex64:
            out_np = out_np.astype(dtype)

        assert numpy.allclose(out_cp, out_np, rtol=1e-4, atol=1e-7)

        # compute it again to ensure Plan1d's internal state is reset
        plan.fft(a, out_cp, cufft.CUFFT_FORWARD)

        assert numpy.allclose(out_cp, out_np, rtol=1e-4, atol=1e-7)

    @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]])
    @testing.for_complex_dtypes()
    def test_ifft(self, dtype):
        _skip_multi_gpu_bug(self.shape, self.gpus)

        a = testing.shaped_random(self.shape, numpy, dtype)

        if len(self.shape) == 1:
            batch = 1
            nx = self.shape[0]
        elif len(self.shape) == 2:
            batch = self.shape[0]
            nx = self.shape[1]

        # compute via cuFFT
        cufft_type = _convert_fft_type(a.dtype, 'C2C')
        plan = cufft.Plan1d(nx, cufft_type, batch, devices=config._devices)
        out_cp = numpy.empty_like(a)
        plan.fft(a, out_cp, cufft.CUFFT_INVERSE)
        # normalization
        out_cp /= nx

        out_np = numpy.fft.ifft(a)
        # np.fft.fft alway returns np.complex128
        if dtype is numpy.complex64:
            out_np = out_np.astype(dtype)

        assert numpy.allclose(out_cp, out_np, rtol=1e-4, atol=1e-7)

        # compute it again to ensure Plan1d's internal state is reset
        plan.fft(a, out_cp, cufft.CUFFT_INVERSE)
        # normalization
        out_cp /= nx

        assert numpy.allclose(out_cp, out_np, rtol=1e-4, atol=1e-7)


# This class contains a few simple tests for the new XtPlanNd wrapper. A full-
# scale test will be done (with little cost) when this low-level API is
# integrated with the high-level APIs such as cupy.fft.fft() (see #4406), so
# don't make our life too hard here. Note that we don't have cupy.complex32
# yet, so its dtype is represented by a single character 'E' (in contrast to
# 'e' for float16) for the time being (see #3370).

@testing.parameterize(*testing.product({
    'shape': [(4, 16), (4, 16, 16)],
}))
@pytest.mark.skipif(cupy.cuda.runtime.is_hip, reason='not supported by hipFFT')
class TestXtPlanNd(unittest.TestCase):

    @testing.for_complex_dtypes()
    def test_forward_fft(self, dtype):
        t = dtype
        idtype = odtype = edtype = cupy.dtype(t)
        shape = self.shape
        length = cupy._core.internal.prod(shape[1:])

        a = testing.shaped_random(shape, cupy, dtype)
        out = cupy.empty_like(a)

        plan = cufft.XtPlanNd(shape[1:],
                              shape[1:], 1, length, idtype,
                              shape[1:], 1, length, odtype,
                              shape[0], edtype,
                              order='C', last_axis=-1, last_size=None)
        plan.fft(a, out, cufft.CUFFT_FORWARD)

        if len(shape) <= 2:
            out_cp = cupy.fft.fft(a)
        else:
            out_cp = cupy.fft.fftn(a, axes=(-1, -2))
        testing.assert_allclose(out, out_cp)

    @testing.for_complex_dtypes()
    def test_backward_fft(self, dtype):
        t = dtype
        idtype = odtype = edtype = cupy.dtype(t)
        shape = self.shape
        length = cupy._core.internal.prod(shape[1:])

        a = testing.shaped_random(shape, cupy, dtype)
        out = cupy.empty_like(a)

        plan = cufft.XtPlanNd(shape[1:],
                              shape[1:], 1, length, idtype,
                              shape[1:], 1, length, odtype,
                              shape[0], edtype,
                              order='C', last_axis=-1, last_size=None)
        plan.fft(a, out, cufft.CUFFT_INVERSE)

        if len(shape) <= 2:
            out_cp = cupy.fft.ifft(a)
        else:
            out_cp = cupy.fft.ifftn(a, axes=(-1, -2))
        testing.assert_allclose(out/length, out_cp)

    @pytest.mark.skipif(int(cupy.cuda.device.get_compute_capability()) < 53,
                        reason='half-precision complex FFT is not supported')
    def test_forward_fft_complex32(self):
        t = 'E'
        idtype = odtype = edtype = t
        old_shape = self.shape
        shape = list(self.shape)
        shape[-1] = 2*shape[-1]
        shape = tuple(shape)

        a = testing.shaped_random(shape, cupy, cupy.float16)
        out = cupy.empty_like(a)

        shape = old_shape
        length = cupy._core.internal.prod(shape[1:])
        plan = cufft.XtPlanNd(shape[1:],
                              shape[1:], 1, length, idtype,
                              shape[1:], 1, length, odtype,
                              shape[0], edtype,
                              order='C', last_axis=-1, last_size=None)
        plan.fft(a, out, cufft.CUFFT_FORWARD)

        a_cp = a.astype(cupy.float32)  # upcast
        a_cp = a_cp.view(cupy.complex64)
        if len(shape) <= 2:
            out_cp = cupy.fft.fft(a_cp)
        else:
            out_cp = cupy.fft.fftn(a_cp, axes=(-1, -2))
        out_cp = out_cp.view(cupy.float32)
        out_cp = out_cp.astype(cupy.float16)  # downcast

        # We shouldn't expect getting high accuracy, as both computations have
        # precision losses...
        testing.assert_allclose(out, out_cp, rtol=1E-1, atol=1E-1)

    @pytest.mark.skipif(int(cupy.cuda.device.get_compute_capability()) < 53,
                        reason='half-precision complex FFT is not supported')
    def test_backward_fft_complex32(self):
        t = 'E'
        idtype = odtype = edtype = t
        old_shape = self.shape
        shape = list(self.shape)
        shape[-1] = 2*shape[-1]
        shape = tuple(shape)

        a = testing.shaped_random(shape, cupy, cupy.float16)
        out = cupy.empty_like(a)

        shape = old_shape
        length = cupy._core.internal.prod(shape[1:])
        plan = cufft.XtPlanNd(shape[1:],
                              shape[1:], 1, length, idtype,
                              shape[1:], 1, length, odtype,
                              shape[0], edtype,
                              order='C', last_axis=-1, last_size=None)
        plan.fft(a, out, cufft.CUFFT_INVERSE)

        a_cp = a.astype(cupy.float32)  # upcast
        a_cp = a_cp.view(cupy.complex64)
        if len(shape) <= 2:
            out_cp = cupy.fft.ifft(a_cp)
        else:
            out_cp = cupy.fft.ifftn(a_cp, axes=(-1, -2))
        out_cp = out_cp.view(cupy.float32)
        out_cp = out_cp.astype(cupy.float16)  # downcast

        # We shouldn't expect getting high accuracy, as both computations have
        # precision losses...
        testing.assert_allclose(out/length, out_cp, rtol=1E-1, atol=1E-1)