vector_add.py 1.67 KB
Newer Older
catchyrime's avatar
catchyrime committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env python3
import logging
import sys
import os

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from seek import *  # noqa: F403 (unable to detect undefined names)
from seek.instructions.gfx906 import *  # noqa: F403 (unable to detect undefined names)


class VectorAddProgram(Program):
    """
    void vector_add(const int* a, const int* b, int* sum)
    {
        sum[threadIdx.x] = a[threadIdx.x] + b[threadIdx.x]
    }
    """
    def __init__(self):
        super().__init__()

    def get_signature(self) -> str:
        return """
            __global__
            void vector_add(const int* a, const int* b, int* sum)
            """

    def setup(self):
        with self.add_block("MAIN") as block_main:
            s_args = new(count=6, align=4)
            s_load_dwordx4(s_args[0:3], s_kernarg, 0)
            s_load_dwordx2(s_args[4:5], s_kernarg, 16)

            s_a_ptr = s_args[0:1].alias()
            s_b_ptr = s_args[2:3].alias()
            s_sum_ptr = s_args[4:5].alias()

            v_offset = new()
            v_lshlrev_b32(v_offset, 2, threadIdx.x, comment="offset = threadIdx.x * sizeof(int)")

            v_a = new()
            v_b = new()
            v_sum = new()
            global_load_dword(v_a, v_offset, s_a_ptr, comment="a = a_ptr[threadIdx.x]")
            global_load_dword(v_b, v_offset, s_b_ptr, comment="b = b_ptr[threadIdx.x]")

            v_add_i32(v_sum, v_a, v_b, comment="sum = a + b")
            global_store_dword(v_offset, v_sum, s_sum_ptr, comment="sum_ptr[threadIdx.x] = sum")

            s_endpgm()


if __name__ == "__main__":
    VectorAddProgram().compile(log_level=logging.INFO, code_object_version=3)