pad.cpp 1.53 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/pad.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {

argument pad(hipStream_t stream,
                argument result,
                argument arg1,
                float value,
                std::vector<std::int64_t> pads)
{
    std::size_t nelements = arg1.get_shape().elements();

    nary(stream, result)([=] { return value; });
    visit_all(result, arg1)([&](auto output, auto input) {
        visit_tensor_size(result.get_shape().lens().size(), [&](auto ndim) {
            std::size_t offsets[ndim];
            std::copy(pads.begin(), pads.begin() + ndim, offsets);
            auto* outptr      = output.data();
            const auto* inptr = input.data();
            hip_tensor_descriptor<ndim> desc_input(input.get_shape());
            hip_tensor_descriptor<ndim> desc_output(output.get_shape());
            gs_launch(stream, nelements)(
                [=](auto i) { 
                    auto idx = desc_input.multi(i);
                    for(std::size_t j = 0; j < ndim;j++) {
                        idx[j] += offsets[j];
                    }
                    outptr[desc_output.linear(idx)] = inptr[i]; 
                });
        });
    });
    return result;
}

} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx