test_cuda_memory.py 6.47 KB
Newer Older
dugupeiwen's avatar
dugupeiwen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import ctypes

import numpy as np

from numba.cuda.cudadrv import driver, drvapi, devices
from numba.cuda.testing import unittest, ContextResettingTestCase
from numba.cuda.testing import skip_on_cudasim


@skip_on_cudasim('CUDA Memory API unsupported in the simulator')
class TestCudaMemory(ContextResettingTestCase):
    def setUp(self):
        super().setUp()
        self.context = devices.get_context()

    def tearDown(self):
        del self.context
        super(TestCudaMemory, self).tearDown()

    def _template(self, obj):
        self.assertTrue(driver.is_device_memory(obj))
        driver.require_device_memory(obj)
        if driver.USE_NV_BINDING:
            expected_class = driver.binding.CUdeviceptr
        else:
            expected_class = drvapi.cu_device_ptr
        self.assertTrue(isinstance(obj.device_ctypes_pointer,
                                   expected_class))

    def test_device_memory(self):
        devmem = self.context.memalloc(1024)
        self._template(devmem)

    def test_device_view(self):
        devmem = self.context.memalloc(1024)
        self._template(devmem.view(10))

    def test_host_alloc(self):
        devmem = self.context.memhostalloc(1024, mapped=True)
        self._template(devmem)

    def test_pinned_memory(self):
        ary = np.arange(10)
        devmem = self.context.mempin(ary, ary.ctypes.data,
                                     ary.size * ary.dtype.itemsize,
                                     mapped=True)
        self._template(devmem)

    def test_managed_memory(self):
        devmem = self.context.memallocmanaged(1024)
        self._template(devmem)

    def test_derived_pointer(self):
        # Use MemoryPointer.view to create derived pointer

        def handle_val(mem):
            if driver.USE_NV_BINDING:
                return int(mem.handle)
            else:
                return mem.handle.value

        def check(m, offset):
            # create view
            v1 = m.view(offset)
            self.assertEqual(handle_val(v1.owner), handle_val(m))
            self.assertEqual(m.refct, 2)
            self.assertEqual(handle_val(v1) - offset, handle_val(v1.owner))
            # create a view
            v2 = v1.view(offset)
            self.assertEqual(handle_val(v2.owner), handle_val(m))
            self.assertEqual(handle_val(v2.owner), handle_val(m))
            self.assertEqual(handle_val(v2) - offset * 2,
                             handle_val(v2.owner))
            self.assertEqual(m.refct, 3)
            del v2
            self.assertEqual(m.refct, 2)
            del v1
            self.assertEqual(m.refct, 1)

        m = self.context.memalloc(1024)
        check(m=m, offset=0)
        check(m=m, offset=1)

    def test_user_extension(self):
        # User can use MemoryPointer to wrap externally defined pointers.
        # This test checks if the finalizer is invokded at correct time
        fake_ptr = ctypes.c_void_p(0xdeadbeef)
        dtor_invoked = [0]

        def dtor():
            dtor_invoked[0] += 1

        # Ensure finalizer is called when pointer is deleted
        ptr = driver.MemoryPointer(context=self.context, pointer=fake_ptr,
                                   size=40, finalizer=dtor)
        self.assertEqual(dtor_invoked[0], 0)
        del ptr
        self.assertEqual(dtor_invoked[0], 1)

        # Ensure removing derived pointer doesn't call finalizer
        ptr = driver.MemoryPointer(context=self.context, pointer=fake_ptr,
                                   size=40, finalizer=dtor)
        owned = ptr.own()
        del owned
        self.assertEqual(dtor_invoked[0], 1)
        del ptr
        self.assertEqual(dtor_invoked[0], 2)


class TestCudaMemoryFunctions(ContextResettingTestCase):
    def setUp(self):
        super().setUp()
        self.context = devices.get_context()

    def tearDown(self):
        del self.context
        super(TestCudaMemoryFunctions, self).tearDown()

    def test_memcpy(self):
        hstary = np.arange(100, dtype=np.uint32)
        hstary2 = np.arange(100, dtype=np.uint32)
        sz = hstary.size * hstary.dtype.itemsize
        devary = self.context.memalloc(sz)

        driver.host_to_device(devary, hstary, sz)
        driver.device_to_host(hstary2, devary, sz)

        self.assertTrue(np.all(hstary == hstary2))

    def test_memset(self):
        dtype = np.dtype('uint32')
        n = 10
        sz = dtype.itemsize * 10
        devary = self.context.memalloc(sz)
        driver.device_memset(devary, 0xab, sz)

        hstary = np.empty(n, dtype=dtype)
        driver.device_to_host(hstary, devary, sz)

        hstary2 = np.array([0xabababab] * n, dtype=np.dtype('uint32'))
        self.assertTrue(np.all(hstary == hstary2))

    def test_d2d(self):
        hst = np.arange(100, dtype=np.uint32)
        hst2 = np.empty_like(hst)
        sz = hst.size * hst.dtype.itemsize
        dev1 = self.context.memalloc(sz)
        dev2 = self.context.memalloc(sz)
        driver.host_to_device(dev1, hst, sz)
        driver.device_to_device(dev2, dev1, sz)
        driver.device_to_host(hst2, dev2, sz)
        self.assertTrue(np.all(hst == hst2))


@skip_on_cudasim('CUDA Memory API unsupported in the simulator')
class TestMVExtent(ContextResettingTestCase):
    def test_c_contiguous_array(self):
        ary = np.arange(100)
        arysz = ary.dtype.itemsize * ary.size
        s, e = driver.host_memory_extents(ary)
        self.assertTrue(ary.ctypes.data == s)
        self.assertTrue(arysz == driver.host_memory_size(ary))

    def test_f_contiguous_array(self):
        ary = np.asfortranarray(np.arange(100).reshape(2, 50))
        arysz = ary.dtype.itemsize * np.prod(ary.shape)
        s, e = driver.host_memory_extents(ary)
        self.assertTrue(ary.ctypes.data == s)
        self.assertTrue(arysz == driver.host_memory_size(ary))

    def test_single_element_array(self):
        ary = np.asarray(np.uint32(1234))
        arysz = ary.dtype.itemsize
        s, e = driver.host_memory_extents(ary)
        self.assertTrue(ary.ctypes.data == s)
        self.assertTrue(arysz == driver.host_memory_size(ary))

    def test_ctypes_struct(self):
        class mystruct(ctypes.Structure):
            _fields_ = [('x', ctypes.c_int), ('y', ctypes.c_int)]

        data = mystruct(x=123, y=432)
        sz = driver.host_memory_size(data)
        self.assertTrue(ctypes.sizeof(data) == sz)

    def test_ctypes_double(self):
        data = ctypes.c_double(1.234)
        sz = driver.host_memory_size(data)
        self.assertTrue(ctypes.sizeof(data) == sz)


if __name__ == '__main__':
    unittest.main()