scatter.cpp 1.58 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/scatter.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {

Shucai Xiao's avatar
Shucai Xiao committed
13
14
argument scatter(
    hipStream_t stream, argument result, argument arg0, argument arg1, argument arg2, int64_t axis)
15
{
Shucai Xiao's avatar
Shucai Xiao committed
16
17
    auto ds            = arg0.get_shape();
    auto inds          = arg1.get_shape();
Shucai Xiao's avatar
Shucai Xiao committed
18
    auto axis_dim_size = ds.lens()[axis];
19
    hip_visit_all(result, arg0, inds)([&](auto output, auto data, auto s1) {
Shucai Xiao's avatar
Shucai Xiao committed
20
        auto* output_ptr     = device_cast(output.data());
21
        const auto* data_ptr = device_cast(data.data());
Shucai Xiao's avatar
Shucai Xiao committed
22
23
        gs_launch(stream, ds.elements(), 256)([=](auto i)
                                                  __device__ { output_ptr[i] = data_ptr[i]; });
24
        hip_visit_all(arg1, arg2)([&](auto indices, auto update) {
Shucai Xiao's avatar
Shucai Xiao committed
25
            const auto* upd_ptr     = device_cast(update.data());
26
27
            const auto* indices_ptr = device_cast(indices.data());
            gs_launch(stream, inds.elements(), 256)([=](auto i) __device__ {
Shucai Xiao's avatar
Shucai Xiao committed
28
                auto out_idx    = s1.multi(i);
Shucai Xiao's avatar
Shucai Xiao committed
29
30
                auto index      = indices_ptr[i];
                index           = index < 0 ? index + axis_dim_size : index;
Shucai Xiao's avatar
Shucai Xiao committed
31
                out_idx[axis]   = index;
32
33
34
35
36
37
38
39
40
41
42
43
                output[out_idx] = upd_ptr[i];
            });
        });
    });

    return result;
}

} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx