scatter.cpp 1.41 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/scatter.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {

Shucai Xiao's avatar
Shucai Xiao committed
13
14
argument scatter(
    hipStream_t stream, argument result, argument arg0, argument arg1, argument arg2, int64_t axis)
15
{
Shucai Xiao's avatar
Shucai Xiao committed
16
    auto ds   = arg0.get_shape();
17
18
    auto inds = arg1.get_shape();
    hip_visit_all(result, arg0, inds)([&](auto output, auto data, auto s1) {
Shucai Xiao's avatar
Shucai Xiao committed
19
        auto* output_ptr     = device_cast(output.data());
20
        const auto* data_ptr = device_cast(data.data());
Shucai Xiao's avatar
Shucai Xiao committed
21
22
        gs_launch(stream, ds.elements(), 256)([=](auto i)
                                                  __device__ { output_ptr[i] = data_ptr[i]; });
23
        hip_visit_all(arg1, arg2)([&](auto indices, auto update) {
Shucai Xiao's avatar
Shucai Xiao committed
24
            const auto* upd_ptr     = device_cast(update.data());
25
26
            const auto* indices_ptr = device_cast(indices.data());
            gs_launch(stream, inds.elements(), 256)([=](auto i) __device__ {
Shucai Xiao's avatar
Shucai Xiao committed
27
28
                auto out_idx    = s1.multi(i);
                out_idx[axis]   = indices_ptr[i];
29
30
31
32
33
34
35
36
37
38
39
40
                output[out_idx] = upd_ptr[i];
            });
        });
    });

    return result;
}

} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx