test_compiler.py 3.41 KB
Newer Older
dugupeiwen's avatar
dugupeiwen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import tempfile
import os
import numpy as np

import unittest
from numba import roc
from numba.core import types
from numba.roc import compiler
from numba.roc.hsadrv.driver import hsa as hsart
from numba.roc.hsadrv.driver import BrigModule, Executable, Program


def copy_kernel(out, inp):
    out[0] = inp[0]


def copy_kernel_1d(out, inp):
    i = roc.get_global_id(0)
    if i < out.size:
        out[i] = inp[i]


def assign_value(out, inp):
    i = roc.get_global_id(0)
    if i < out.size:
        out[i] = inp


class TestCodeGeneration(unittest.TestCase):
    def test_copy_kernel(self):
        arytype = types.float32[:]
        kernel = compiler.compile_kernel(copy_kernel, [arytype] * 2)
        self.assertIn(".globl\t{0}".format(kernel.entry_name),
                      kernel.assembly)

    def test_copy_kernel_1d(self):
        arytype = types.float32[:]
        kernel = compiler.compile_kernel(copy_kernel_1d, [arytype] * 2)
        self.assertIn(".globl\t{0}".format(kernel.entry_name),
                      kernel.assembly)


class _TestBase(unittest.TestCase):
    def setUp(self):
        self.gpu = [a for a in hsart.agents if a.is_component][0]
        self.cpu = [a for a in hsart.agents if not a.is_component][0]
        self.queue = self.gpu.create_queue_multi(self.gpu.queue_max_size)

    def tearDown(self):
        del self.queue
        del self.gpu
        del self.cpu


class TestExecution(unittest.TestCase):
    def test_hsa_kernel(self):
        src = np.arange(1024, dtype=np.float32)
        dst = np.zeros_like(src)

        # Compiler kernel
        arytype = types.float32[::1]
        kernel = compiler.compile_kernel(copy_kernel_1d, [arytype] * 2)

        # Run kernel
        kernel[src.size // 256, 256](dst, src)

        np.testing.assert_equal(src, dst)


class TestKernelArgument(unittest.TestCase):
    def _test_template(self, nbtype, src):
        dtype = np.dtype(str(nbtype))
        dst = np.zeros(1, dtype=dtype)
        src = dtype.type(src)
        arytype = nbtype[::1]
        kernel = compiler.compile_kernel(assign_value, [arytype, nbtype])
        kernel[1, 1](dst, src)
        self.assertEqual(dst[0], src)

    def test_float64(self):
        self._test_template(nbtype=types.float64, src=1. / 3.)

    def test_float32(self):
        self._test_template(nbtype=types.float32, src=1. / 3.)

    def test_int32(self):
        self._test_template(nbtype=types.int32, src=123)

    def test_int16(self):
        self._test_template(nbtype=types.int16, src=123)

    def test_complex64(self):
        self._test_template(nbtype=types.complex64, src=12 + 34j)

    def test_complex128(self):
        self._test_template(nbtype=types.complex128, src=12 + 34j)


def udt_devfunc(a, i):
    return a[i]


class TestDeviceFunction(unittest.TestCase):
    def test_device_function(self):
        src = np.arange(10, dtype=np.int32)
        dst = np.zeros_like(src)

        arytype = types.int32[::1]
        devfn = compiler.compile_device(udt_devfunc, arytype.dtype,
                                        [arytype, types.intp])

        def udt_devfunc_caller(dst, src):
            i = roc.get_global_id(0)
            if i < dst.size:
                dst[i] = devfn(src, i)

        kernel = compiler.compile_kernel(udt_devfunc_caller,
                                         [arytype, arytype])

        kernel[src.size, 1](dst, src)
        np.testing.assert_equal(dst, src)


if __name__ == '__main__':
    unittest.main()