contiguous.cpp 2.63 KB
Newer Older
1

2
#include <migraph/gpu/device/contiguous.hpp>
Paul's avatar
Paul committed
3
#include <migraph/gpu/device/launch.hpp>
4
5

namespace migraph {
Paul's avatar
Paul committed
6
namespace gpu {
7
namespace device {
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42

template <class F>
void visit_tensor_size(std::size_t n, F f)
{
    switch(n)
    {
    case 1:
    {
        f(std::integral_constant<std::size_t, 1>{});
        break;
    }
    case 2:
    {
        f(std::integral_constant<std::size_t, 2>{});
        break;
    }
    case 3:
    {
        f(std::integral_constant<std::size_t, 3>{});
        break;
    }
    case 4:
    {
        f(std::integral_constant<std::size_t, 4>{});
        break;
    }
    case 5:
    {
        f(std::integral_constant<std::size_t, 5>{});
        break;
    }
    default: throw std::runtime_error("Unknown tensor size");
    }
}

wsttiger's avatar
wsttiger committed
43
44
45
46
template <size_t NDim>
struct hip_index
{
    size_t d[NDim];
Paul's avatar
Paul committed
47
48
    __device__ __host__ size_t& operator[](size_t i) { return d[i]; }
    __device__ __host__ size_t operator[](size_t i) const { return d[i]; }
wsttiger's avatar
wsttiger committed
49
50
};

51
52
53
template <size_t NDim>
struct hip_tensor_descriptor
{
Paul's avatar
Paul committed
54
    __device__ __host__ hip_tensor_descriptor() = default;
55
    template <typename T, typename V>
Paul's avatar
Paul committed
56
    __device__ __host__ hip_tensor_descriptor(const T& lens_ext, const V& strides_ext)
57
58
    {
        for(size_t i = 0; i < NDim; i++)
wsttiger's avatar
wsttiger committed
59
            lens[i] = lens_ext[i];
60
        for(size_t i = 0; i < NDim; i++)
wsttiger's avatar
wsttiger committed
61
            strides[i] = strides_ext[i];
62
    }
Paul's avatar
Paul committed
63
    __device__ __host__ hip_index<NDim> multi(size_t idx) const
64
    {
wsttiger's avatar
wsttiger committed
65
66
67
68
69
70
71
72
        hip_index<NDim> result{};
        size_t tidx = idx;
        for(size_t is = 0; is < NDim; is++)
        {
            result[is] = tidx / strides[is];
            tidx       = tidx % strides[is];
        }
        return result;
73
    }
Paul's avatar
Paul committed
74
    __device__ __host__ size_t linear(hip_index<NDim> s) const
wsttiger's avatar
wsttiger committed
75
76
77
78
79
80
81
82
83
    {
        size_t idx = 0;
        for(size_t i = 0; i < NDim; i++)
            idx += s[i] * strides[i];
        return idx;
    }
    size_t lens[NDim]    = {};
    size_t strides[NDim] = {};
};
84

85
void contiguous(shape output_shape, argument arg, argument result)
86
87
{
    visit_all(result, arg)([&](auto output, auto input) {
Paul's avatar
Paul committed
88
        visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
89
            const auto& s = arg.get_shape();
Paul's avatar
Paul committed
90
91
            hip_tensor_descriptor<ndim> a_desc(s.lens(), s.strides());
            hip_tensor_descriptor<ndim> at_desc(output_shape.lens(), output_shape.strides());
Paul's avatar
Paul committed
92
93
            auto* a  = input.data();
            auto* at = output.data();
Paul's avatar
Paul committed
94
95
96
            gs_launch(s.elements())([=](auto i) {
                size_t lidx = a_desc.linear(at_desc.multi(i));
                at[i]       = a[lidx];
Paul's avatar
Paul committed
97
98
            });
        });
99
100
    });
}
101
} // namespace device
Paul's avatar
Paul committed
102
} // namespace gpu
103
} // namespace migraph