hipoc_kernel.hpp 5.61 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
/*******************************************************************************
 *
 * MIT License
 *
 * Copyright (c) 2017 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 *******************************************************************************/
#ifndef GUARD_OLC_HIPOC_KERNEL_HPP
#define GUARD_OLC_HIPOC_KERNEL_HPP

#include <array>
#include <cassert>
#include <hipoc_program.hpp>
#include <stringutils.hpp>
#include <manage_ptr.hpp>
#include <op_kernel_args.hpp>
#include <hipCheck.hpp>
#include <vector>
#include <memory>

Chao Liu's avatar
rename  
Chao Liu committed
39
namespace online_compile {
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171

using HipEventPtr = OLC_MANAGE_PTR(hipEvent_t, hipEventDestroy);
inline HipEventPtr make_hip_event()
{
    hipEvent_t result = nullptr;
    MY_HIP_CHECK(hipEventCreate(&result));
    return HipEventPtr{result};
}

template <class T, class U>
struct KernelArgsPair
{
    static const int alignment    = sizeof(U);
    static const int padding      = (alignment - (sizeof(T) % alignment)) % alignment;
    static const int second_index = sizeof(T) + padding;
    KernelArgsPair(T x, U y)
    {

        new(buffer) T(x); // NOLINT (clang-analyzer-cplusplus.PlacementNew)
        new(buffer + second_index) U(y);
    }
    char buffer[second_index + sizeof(U)] = {};
};

template <class... Ts>
struct KernelArgsPack;

template <class T, class U, class... Ts>
struct KernelArgsPack<T, U, Ts...>
{
    using data_t = KernelArgsPack<KernelArgsPair<T, U>, Ts...>;
    KernelArgsPack(T x, U y, Ts... xs) : data(KernelArgsPair<T, U>(x, y), xs...) {}
    data_t data;
};

template <class T>
struct KernelArgsPack<T>
{
    KernelArgsPack(T x) : head(x) {}
    T head;
};

template <class... Ts>
struct KernelArgs
{
    KernelArgs(Ts... xs) : pack(xs...) { std::fill(std::begin(hidden), std::end(hidden), 0); }
    KernelArgsPack<Ts...> pack;
    uint64_t hidden[6] = {};
};

struct HIPOCKernelInvoke
{
    hipStream_t stream          = nullptr;
    hipFunction_t fun           = nullptr;
    std::array<size_t, 3> ldims = {};
    std::array<size_t, 3> gdims = {};
    std::string name;
    std::function<void(hipEvent_t, hipEvent_t)> callback;

    // Workaround for aggregate types in c++11
    HIPOCKernelInvoke() {}
    HIPOCKernelInvoke(hipStream_t pstream,
                      hipFunction_t pfun,
                      std::array<size_t, 3> pldims,
                      std::array<size_t, 3> pgdims,
                      std::string pname,
                      std::function<void(hipEvent_t, hipEvent_t)> pcallback)
        : stream(pstream), fun(pfun), ldims(pldims), gdims(pgdims), name(pname), callback(pcallback)
    {
    }
    void operator()(std::vector<OpKernelArg>& any_args) const
    {
        char hip_args[256] = {0};
        auto sz_left       = any_args[0].size();

        memcpy(hip_args, &(any_args[0].buffer[0]), any_args[0].size());

        for(unsigned long idx = 1; idx < any_args.size(); idx++)
        {
            auto& any_arg              = any_args[idx];
            unsigned long alignment    = any_arg.size();
            unsigned long padding      = (alignment - (sz_left % alignment)) % alignment;
            unsigned long second_index = sz_left + padding;
            memcpy(hip_args + second_index, &(any_arg.buffer[0]), any_arg.size());
            sz_left = second_index + alignment;
        }
        run(hip_args, sz_left);
    }

    template <class... Ts>
    void operator()(Ts... xs) const
    {
        KernelArgs<Ts...> args{xs...};
        run(&args, sizeof(args));
    }

    void run(void* args, std::size_t size) const;

    const std::string& GetName() const { return name; }
};

struct HIPOCKernel
{
    HIPOCProgram program;
    std::string name;
    std::array<size_t, 3> ldims = {};
    std::array<size_t, 3> gdims = {};
    std::string kernel_module;
    hipFunction_t fun = nullptr;

    HIPOCKernel() {}
    HIPOCKernel(HIPOCProgram p,
                const std::string kernel_name,
                std::vector<size_t> local_dims,
                std::vector<size_t> global_dims)
        : program(p), name(kernel_name)
    {
        assert(!local_dims.empty() && local_dims.size() <= 3);
        assert(!global_dims.empty() && global_dims.size() <= 3);
        ldims.fill(1);
        gdims.fill(1);
        std::copy(local_dims.begin(), local_dims.end(), ldims.begin());
        std::copy(global_dims.begin(), global_dims.end(), gdims.begin());

        kernel_module = name;
        MY_HIP_CHECK(hipModuleGetFunction(&fun, program.GetModule(), kernel_module.c_str()));
    }

    HIPOCKernelInvoke Invoke(hipStream_t stream,
                             std::function<void(hipEvent_t, hipEvent_t)> callback = nullptr) const;
};

Chao Liu's avatar
rename  
Chao Liu committed
172
} // namespace online_compile
173
174

#endif