builtin_vectors.py 1.2 KB
Newer Older
root's avatar
root committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import sys
import numpy
import cupy

code = '''
__device__ double3 operator+(const double3& lhs, const double3& rhs) {
    return make_double3(lhs.x + rhs.x,
                        lhs.y + rhs.y,
                        lhs.z + rhs.z);
}

extern "C" __global__ void sum_kernel(const double3* lhs,
                                            double3  rhs,
                                            double3* out) {
  int i = threadIdx.x;
  out[i] = lhs[i] + rhs;
}
'''

double3 = numpy.dtype(
    {
        'names': ['x', 'y', 'z'],
        'formats': [numpy.float64]*3
    }
)


def main():
    N = 8

    # The kernel computes out = lhs+rhs where lhs and rhs are double3 vectors.
    # lhs is an array of N such vectors and rhs is double3 kernel parameter.

    lhs = cupy.random.rand(3*N, dtype=numpy.float64).reshape(N, 3)
    rhs = numpy.random.rand(3).astype(numpy.float64)
    out = cupy.empty_like(lhs)

    kernel = cupy.RawKernel(code, 'sum_kernel')
    args = (lhs, rhs.view(double3), out)
    kernel((1,), (N,), args)

    expected = lhs + cupy.asarray(rhs[None, :])
    cupy.testing.assert_array_equal(expected, out)
    print("Kernel output matches expected value.")


if __name__ == '__main__':
    sys.exit(main())