test_gufunc_scalar.py 5.26 KB
Newer Older
dugupeiwen's avatar
dugupeiwen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
"""Example: sum each row using guvectorize

See Numpy documentation for detail about gufunc:
    http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html
"""
import numpy as np
from numba import guvectorize, cuda
from numba.cuda.testing import skip_on_cudasim, CUDATestCase
import unittest


@skip_on_cudasim('ufunc API unsupported in the simulator')
class TestGUFuncScalar(CUDATestCase):
    def test_gufunc_scalar_output(self):
        #    function type:
        #        - has no void return type
        #        - array argument is one dimension fewer than the source array
        #        - scalar output is passed as a 1-element array.
        #
        #    signature: (n)->()
        #        - the function takes an array of n-element and output a scalar.

        @guvectorize(['void(int32[:], int32[:])'], '(n)->()', target='cuda')
        def sum_row(inp, out):
            tmp = 0.
            for i in range(inp.shape[0]):
                tmp += inp[i]
            out[0] = tmp

        # inp is (10000, 3)
        # out is (10000)
        # The outer (leftmost) dimension must match or numpy broadcasting
        # is performed. But, broadcasting on CUDA arrays is not supported.

        inp = np.arange(300, dtype=np.int32).reshape(100, 3)

        # invoke on CUDA with manually managed memory
        out1 = np.empty(100, dtype=inp.dtype)
        out2 = np.empty(100, dtype=inp.dtype)

        dev_inp = cuda.to_device(
            inp)                 # alloc and copy input data
        dev_out1 = cuda.to_device(out1, copy=False)   # alloc only

        sum_row(dev_inp, out=dev_out1)                # invoke the gufunc
        dev_out2 = sum_row(dev_inp)                   # invoke the gufunc

        dev_out1.copy_to_host(out1)                 # retrieve the result
        dev_out2.copy_to_host(out2)                 # retrieve the result

        # verify result
        for i in range(inp.shape[0]):
            self.assertTrue(out1[i] == inp[i].sum())
            self.assertTrue(out2[i] == inp[i].sum())

    def test_gufunc_scalar_output_bug(self):
        # Issue 2812: Error due to using input argument types as output argument
        @guvectorize(['void(int32, int32[:])'], '()->()', target='cuda')
        def twice(inp, out):
            out[0] = inp * 2

        self.assertEqual(twice(10), 20)
        arg = np.arange(10).astype(np.int32)
        self.assertPreciseEqual(twice(arg), arg * 2)

    def test_gufunc_scalar_input_saxpy(self):
        @guvectorize(['void(float32, float32[:], float32[:], float32[:])'],
                     '(),(t),(t)->(t)', target='cuda')
        def saxpy(a, x, y, out):
            for i in range(out.shape[0]):
                out[i] = a * x[i] + y[i]

        A = np.float32(2)
        X = np.arange(10, dtype=np.float32).reshape(5, 2)
        Y = np.arange(10, dtype=np.float32).reshape(5, 2)
        out = saxpy(A, X, Y)

        for j in range(5):
            for i in range(2):
                exp = A * X[j, i] + Y[j, i]
                self.assertTrue(exp == out[j, i])

        X = np.arange(10, dtype=np.float32)
        Y = np.arange(10, dtype=np.float32)
        out = saxpy(A, X, Y)

        for j in range(10):
            exp = A * X[j] + Y[j]
            self.assertTrue(exp == out[j], (exp, out[j]))

        A = np.arange(5, dtype=np.float32)
        X = np.arange(10, dtype=np.float32).reshape(5, 2)
        Y = np.arange(10, dtype=np.float32).reshape(5, 2)
        out = saxpy(A, X, Y)

        for j in range(5):
            for i in range(2):
                exp = A[j] * X[j, i] + Y[j, i]
                self.assertTrue(exp == out[j, i], (exp, out[j, i]))

    def test_gufunc_scalar_cast(self):
        @guvectorize(['void(int32, int32[:], int32[:])'], '(),(t)->(t)',
                     target='cuda')
        def foo(a, b, out):
            for i in range(b.size):
                out[i] = a * b[i]

        a = np.int64(2)  # type does not match signature (int32)
        b = np.arange(10).astype(np.int32)
        out = foo(a, b)
        np.testing.assert_equal(out, a * b)

        # test error
        a = np.array(a)
        da = cuda.to_device(a)
        self.assertEqual(da.dtype, np.int64)
        with self.assertRaises(TypeError) as raises:
            foo(da, b)

        self.assertIn("does not support .astype()", str(raises.exception))

    def test_gufunc_old_style_scalar_as_array(self):
        # Example from issue #2579
        @guvectorize(['void(int32[:],int32[:],int32[:])'], '(n),()->(n)',
                     target='cuda')
        def gufunc(x, y, res):
            for i in range(x.shape[0]):
                res[i] = x[i] + y[0]

        # Case 1
        a = np.array([1, 2, 3, 4], dtype=np.int32)
        b = np.array([2], dtype=np.int32)

        res = np.zeros(4, dtype=np.int32)

        expected = res.copy()
        expected = a + b

        gufunc(a, b, out=res)

        np.testing.assert_almost_equal(expected, res)

        # Case 2
        a = np.array([1, 2, 3, 4] * 2, dtype=np.int32).reshape(2, 4)
        b = np.array([2, 10], dtype=np.int32)

        res = np.zeros((2, 4), dtype=np.int32)

        expected = res.copy()
        expected[0] = a[0] + b[0]
        expected[1] = a[1] + b[1]

        gufunc(a, b, res)

        np.testing.assert_almost_equal(expected, res)


if __name__ == '__main__':
    unittest.main()