ck_gemm_softmax_gemm.cpp 10 KB
Newer Older
Alan Turner's avatar
Alan Turner committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/*
 * The MIT License (MIT)
 *
 * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
#include <fstream>
#include <filesystem>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>

#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/env.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/module.hpp>
#include <migraphx/env.hpp>
#include <migraphx/file_buffer.hpp>

const std::vector<std::string>&
Alan Turner's avatar
Alan Turner committed
42
get_gsg_instance(std::size_t i, const std::function<bool(const std::vector<std::string>&)>& pred);
Alan Turner's avatar
Alan Turner committed
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

namespace gpu {

using namespace migraphx::gpu::gen; // NOLINT

MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_LOG_CK_GEMM);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_CK_TUNING);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_CK_DEBUG);

// NOLINTNEXTLINE
static const char* const ck_gemm_softmax_gemm_kernel = R"__migraphx__(
#include <args.hpp>
#include <migraphx/kernels/ck_gemm_softmax_gemm.hpp>
#include <migraphx/kernels/pointwise.hpp>

namespace migraphx {

${preamble}

extern "C" {

__global__ void ${kernel}(${params})
{
    transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) {
Alan Turner's avatar
Alan Turner committed
70
        ck_gemm_softmax_gemm<CK_DeviceBatchedGemmSoftmaxGemm_Xdl_CShuffle<${instance}>, ${blocks_per_batch}>(xs...);
Alan Turner's avatar
Alan Turner committed
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
    });
}

}

} // namespace migraphx

)__migraphx__";

static std::size_t int_div_ceil(std::size_t x, std::size_t y) { return (x + y - 1) / y; }

struct instance
{
    std::vector<std::string> params;
    static const std::size_t block_size_index = 17;

    std::size_t int_at(std::size_t i) const { return std::stoull(params[i]); }

    std::size_t get_block_size() const { return int_at(block_size_index); }

    std::size_t get_pb(std::size_t i) const
    {
Alan Turner's avatar
Alan Turner committed
93
        assert(i <= 4);
Alan Turner's avatar
Alan Turner committed
94
95
96
        return int_at(block_size_index + 1 + i);
    }

Alan Turner's avatar
Alan Turner committed
97
    std::array<std::size_t, 4> get_pad(const std::array<std::size_t, 4>& config) const
Alan Turner's avatar
Alan Turner committed
98
    {
Alan Turner's avatar
Alan Turner committed
99
        std::array<std::size_t, 4> result{};
Alan Turner's avatar
Alan Turner committed
100
101
102
103
104
105
106
        for(auto i : range(config.size()))
        {
            result[i] = int_div_ceil(config[i], get_pb(i)) * get_pb(i) - config[i];
        }
        return result;
    }

Alan Turner's avatar
Alan Turner committed
107
    std::size_t get_grid_size(const std::array<std::size_t, 4>& config) const
Alan Turner's avatar
Alan Turner committed
108
    {
Alan Turner's avatar
Alan Turner committed
109
        return int_div_ceil(config[0], get_pb(0)) * int_div_ceil(config[3], get_pb(3));
Alan Turner's avatar
Alan Turner committed
110
111
112
113
    }

    void set_gemm(const std::string& s)
    {
Alan Turner's avatar
Alan Turner committed
114
115
        assert(params[15] == "ck::tensor_operation::device::GemmSpecialization::Default" or
               params[15] == "ck::tensor_operation::device::GemmSpecialization::MNKOPadding");
Alan Turner's avatar
Alan Turner committed
116
        params[15] = s;
Alan Turner's avatar
Alan Turner committed
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
    }

    std::string str() const { return join_strings(params, ","); }
};

template <class F, class Action>
auto action_decorate(F f, Action action)
{
    return [=](auto&&... xs) {
        action();
        f(std::forward<decltype(xs)>(xs)...);
    };
}

using tuning_entry = std::pair<std::vector<shape>, size_t>;
static std::vector<tuning_entry> read_tuning(const std::string& s)
{
    if(not fs::exists(s))
        return {};
    return from_value<std::vector<tuning_entry>>(from_json_string(read_string(s)));
}

static std::size_t get_tuning_for(const std::vector<shape>& inputs)
{
    static auto tuning = read_tuning(string_value_of(MIGRAPHX_CK_TUNING{}, ""));
    if(tuning.empty())
Alan Turner's avatar
Alan Turner committed
143
        std::cout << "*********** Warning: No CK GSG tuning!" << std::endl;
Alan Turner's avatar
Alan Turner committed
144
145
146
147
    auto it = std::find_if(
        tuning.begin(), tuning.end(), [&](const auto& p) { return p.first == inputs; });
    if(it == tuning.end())
    {
Alan Turner's avatar
Alan Turner committed
148
        std::cout << "*********** Warning: CK GSG tuning missing for config!" << std::endl;
Alan Turner's avatar
Alan Turner committed
149
150
151
152
153
154
155
156
157
        return 4;
    }
    return it->second;
}

struct ck_gemm_softmax_gemm_compiler : compiler<ck_gemm_softmax_gemm_compiler>
{
    static std::string get_layout(const shape& s)
    {
Alan Turner's avatar
Alan Turner committed
158
        if(not s.transposed())
Alan Turner's avatar
Alan Turner committed
159
            return "ck::tensor_layout::gemm::RowMajor";
Alan Turner's avatar
Alan Turner committed
160

Alan Turner's avatar
Alan Turner committed
161
        auto lens = s.lens();
Alan Turner's avatar
Alan Turner committed
162
163
164
        return lens[lens.size() - 1] > lens[lens.size() - 2]
                   ? "ck::tensor_layout::gemm::ColumnMajor"
                   : "ck::tensor_layout::gemm::RowMajor";
Alan Turner's avatar
Alan Turner committed
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
    }

    static std::string get_type(const shape& s)
    {
        if(s.type() == shape::half_type)
            return "ck::half_t";
        return shape::cpp_type(s.type());
    }

    template <class Iterator, class F>
    static std::string ck_tuple(Iterator start, Iterator last, F f)
    {
        std::vector<std::string> s;
        std::transform(start, last, std::back_inserter(s), f);
        return "ck::Tuple<" + join_strings(s, ",") + ">";
    }

Alan Turner's avatar
Alan Turner committed
182
183
184
185
    std::vector<std::string> names() const
    {
        return {"ck_gemm_softmax_gemm", "gpu::ck_gemm_softmax_gemm"};
    }
Alan Turner's avatar
Alan Turner committed
186
187
188

    operation compile_op(context& /* ctx */, const std::vector<shape>& inputs, const value& v) const
    {
Alan Turner's avatar
Alan Turner committed
189
190
        auto a_shape  = inputs[0];
        auto b_shape  = inputs[1];
Alan Turner's avatar
Alan Turner committed
191
        auto b1_shape = inputs[2];
Alan Turner's avatar
Alan Turner committed
192
193
194
195
        auto c_shape  = inputs.back();
        auto m        = a_shape.lens()[0];
        auto k        = a_shape.lens()[1];
        auto n        = c_shape.lens()[1];
Alan Turner's avatar
Alan Turner committed
196
197
198

        auto rank = a_shape.lens().size();

Alan Turner's avatar
Alan Turner committed
199
200
        std::array<char, 4> keys{'M', 'N', 'K', 'O'};
        // config (m0, n0, k0, n1)
Alan Turner's avatar
Alan Turner committed
201
202
203
204
205
206
207
208
        std::array<std::size_t, 4> config{c_shape.lens()[rank - 2],
                                          b_shape.lens()[rank - 2],
                                          a_shape.lens().back(),
                                          c_shape.lens().back()};

        auto tuning_val =
            v.get("tuning_val", get_tuning_for({a_shape, b_shape, b1_shape, c_shape}));
        auto ip = instance{get_gsg_instance(tuning_val, [&](const auto& x) -> bool {
Alan Turner's avatar
Alan Turner committed
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
            return get_layout(a_shape) == x[0] and get_layout(b_shape) == x[1] and
                   get_layout(c_shape) == x[3] and get_type(a_shape) == x[4] and
                   get_type(b_shape) == x[5] and get_type(c_shape) == x[9];
        })};

        auto padding = ip.get_pad(config);
        std::string gemm_type;
        for(auto i : range(padding.size()))
        {
            if(padding[i] != 0)
                gemm_type += keys[i];
        }
        if(gemm_type.empty())
            gemm_type = "Default";
        else
            gemm_type += "Padding";
        ip.set_gemm("ck::tensor_operation::device::GemmSpecialization::" + gemm_type);

Alan Turner's avatar
Alan Turner committed
227
228
        auto blocks_per_batch = ip.get_grid_size(config);
        auto batch_count      = std::accumulate(c_shape.lens().rbegin() + 2,
Alan Turner's avatar
Alan Turner committed
229
230
231
232
233
                                           c_shape.lens().rend(),
                                           std::size_t{1},
                                           std::multiplies<std::size_t>());

        hip_compile_options options;
Alan Turner's avatar
Alan Turner committed
234
        auto block_size = ip.get_block_size();
Alan Turner's avatar
Alan Turner committed
235
236
237
238
239
240
241
242
243
244
245
        auto grid_size  = batch_count * blocks_per_batch;
        options.set_launch_params(v, grid_size * block_size, block_size);
        options.inputs         = inputs;
        options.output         = c_shape;
        options.kernel_name    = v.get("kernel", "ck_gemm_softmax_gemm_kernel");
        options.virtual_inputs = inputs;

        if(v.get("check", false) or enabled(MIGRAPHX_CK_DEBUG{}))
            options.params += " -DMIGRAPHX_CK_CHECK=1";

        auto src = interpolate_string(ck_gemm_softmax_gemm_kernel,
Alan Turner's avatar
Alan Turner committed
246
                                      {{"instance", ip.str()},
Alan Turner's avatar
Alan Turner committed
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
                                       {"params", enum_params(inputs.size(), "void * private_p")},
                                       {"args", enum_params(inputs.size(), "private_p")},
                                       {"blocks_per_batch", to_string(blocks_per_batch)},
                                       {"preamble", v.get("preamble", std::string{})},
                                       {"kernel", options.kernel_name}});

        return compile_hip_code_object(src, options);
    }

    compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
    {
        auto v      = op.to_value();
        v["kernel"] = "ck_gemm_softmax_gemm_kernel";
        if(not ins->module_inputs().empty())
        {
            auto* pm      = ins->module_inputs().front();
            v["preamble"] = generate_pointwise(*pm, "post_ck_gemm_softmax_gemm_function") +
Alan Turner's avatar
Alan Turner committed
264
265
                            "\nMIGRAPHX_LIFT_CLASS(post_ck_gemm_softmax_gemm, "
                            "post_ck_gemm_softmax_gemm_function);";
Alan Turner's avatar
Alan Turner committed
266
267
268
269
270
271
272
            v["kernel"] = "ck_gemm_softmax_gemm_" + generate_name_from_ops(*pm) + "_kernel";
        }

        auto shapes = to_shapes(ins->inputs());
        return action_decorate(replace(compile_op(ctx, shapes, v)), [=] {
            if(enabled(MIGRAPHX_LOG_CK_GEMM{}))
            {
Alan Turner's avatar
Alan Turner committed
273
                std::vector<shape> gemm_shapes{shapes[0], shapes[1], shapes[2], shapes.back()};
Alan Turner's avatar
Alan Turner committed
274
275
                std::cout << "ck_gemm_softmax_gemm: " << to_json_string(to_value(gemm_shapes))
                          << std::endl;
Alan Turner's avatar
Alan Turner committed
276
277
278
279
280
281
282
283
            }
        });
    }
};

} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx