fuse_ck.cpp 9.55 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
/*
 * The MIT License (MIT)
 *
 * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
#include <migraphx/gpu/fuse_ck.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/register_op.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module;

namespace gpu {

struct ck_gemm
{
    operation op = make_op("dot");

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return pack(f(self.op, "op"));
    }

    std::string name() const { return "gpu::ck_gemm"; }

    void check_gemm_shape(const shape& s) const
    {
        if(not contains(range(s.strides().rbegin(), s.strides().rbegin() + 3), 1))
            MIGRAPHX_THROW("Invalid shape for ck_gemm");
    }

    shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
    {
        check_shapes{inputs, *this}.same_ndims();
        if(inputs.size() < 2)
            MIGRAPHX_THROW("should have at least two inputs.");
        auto a = inputs[0];
        auto b = inputs[1];
        for(const auto& input : inputs)
            check_gemm_shape(input);
        auto r = op.compute_shape({a, b});
        if(mods.empty())
            return r;
        return r.with_type(mods.front()->get_output_shapes().front().type());
    }
Alan Turner's avatar
Alan Turner committed
68
69
70
71
72

    static bool is_ck_supported_type(shape::type_t t)
    {
        return contains({shape::half_type, shape::int8_type, shape::int32_type}, t);
    }
73
74
75
};
MIGRAPHX_REGISTER_OP(ck_gemm);

Alan Turner's avatar
Alan Turner committed
76
struct ck_gemm_softmax_gemm
77
{
Alan Turner's avatar
Alan Turner committed
78
    operation op = make_op("dot");
79
    double scale = 1.0;
Alan Turner's avatar
Alan Turner committed
80
81
82
83

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
84
        return pack(f(self.op, "op"), f(self.scale, "scale"));
Alan Turner's avatar
Alan Turner committed
85
86
87
88
89
90
91
92
93
94
    }

    std::string name() const { return "gpu::ck_gemm_softmax_gemm"; }

    void check_gemm_shape(const shape& s) const
    {
        if(not contains(range(s.strides().rbegin(), s.strides().rbegin() + 3), 1))
            MIGRAPHX_THROW("Invalid shape for ck_gemm_softmax_gemm");
    }

95
    shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>&) const
Alan Turner's avatar
Alan Turner committed
96
97
    {
        check_shapes{inputs, *this}.same_ndims();
Alan Turner's avatar
Alan Turner committed
98
99
        if(inputs.size() < 3)
            MIGRAPHX_THROW("Expected 3 inputs but got " + to_string(inputs.size()));
Alan Turner's avatar
Alan Turner committed
100
101
102
103
104
105
106
107
108
109
        auto a  = inputs[0];
        auto b  = inputs[1];
        auto b1 = inputs[2];
        for(const auto& input : inputs)
        {
            check_gemm_shape(input);
        }
        return op.compute_shape({op.compute_shape({a, b}), b1});
    }

Alan Turner's avatar
Alan Turner committed
110
    static bool is_ck_supported_type(shape::type_t t) { return contains({shape::half_type}, t); }
Alan Turner's avatar
Alan Turner committed
111
112
113
114
};
MIGRAPHX_REGISTER_OP(ck_gemm_softmax_gemm);

namespace {
115
116
117
118
119

MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
{
    if(ins->name() != "dot" and ins->name() != "quant_dot")
        return false;
Alan Turner's avatar
Alan Turner committed
120
    if(not ck_gemm::is_ck_supported_type(ins->get_shape().type()))
121
122
123
        return false;
    auto a = ins->inputs().front()->get_shape();
    auto b = ins->inputs().back()->get_shape();
124
125
126
127
128
129
130
131
132
133
134
135
136
    auto m = a.lens()[a.lens().size() - 2];
    auto n = b.lens().back();
    auto k = a.lens().back();
    // Integer gemms must be divisible by 4 in ck
    if(contains({shape::int8_type, shape::int32_type}, ins->get_shape().type()))
    {
        if(m % 4 != 0)
            return false;
        if(n % 4 != 0)
            return false;
        if(k % 4 != 0)
            return false;
    }
137
138
139
    // Skipping GEMMs with a K dimension greater than 2048 is a course-grained strategy
    // to avoid poor-performing GEMM kernels from CK
    // To-do: Investigate a more precise strategy
140
    return k <= 2048;
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
}

struct find_ck_gemm_pointwise
{
    // Find a gemm followed by a pointwise operation.
    auto matcher() const
    {
        auto gemm = match::skip(match::name("contiguous"))(
            match::name("dot", "quant_dot")(is_ck_gemm().bind("gemm")));
        return match::name("pointwise")(match::any_of[match::inputs()](gemm.bind("x")));
    }

    void apply(module_pass_manager& mpm, const match::matcher_result& r) const
    {
        auto ins      = r.result;
        auto gemm_ins = r.instructions["gemm"];
        auto x_ins    = r.instructions["x"]; // input after contiguous
        auto* pm      = ins->module_inputs().front();
        auto names    = pm->get_parameter_names();
        std::sort(names.begin(), names.end());
        auto inputs   = ins->inputs();
        auto gemm_it  = std::find(inputs.begin(), inputs.end(), x_ins);
        auto gemm_idx = gemm_it - inputs.begin();
        if(gemm_ins->get_shape().type() != shape::int32_type and
           ins->get_shape().type() != gemm_ins->get_shape().type())
            return;
        if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [](auto input) {
Alan Turner's avatar
Alan Turner committed
168
169
170
171
172
               return not ck_gemm::is_ck_supported_type(input->get_shape().type());
           }))
            return;
        if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [](auto input) {
               return not input->inputs().empty() and input->inputs().front()->name() == "capture";
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
           }))
            return;
        assert(gemm_it != inputs.end());
        if(gemm_idx != 0)
        {
            auto first_param    = pm->get_parameter(names[0]);
            auto gemm_param     = pm->get_parameter(names[gemm_idx]);
            auto new_gemm_param = pm->add_parameter(names[0] + "_0", gemm_param->get_shape());
            auto new_first_param =
                pm->add_parameter(names[gemm_idx] + "_0", first_param->get_shape());
            pm->replace_instruction(gemm_param, new_gemm_param);
            pm->replace_instruction(first_param, new_first_param);
            pm->remove_instruction(first_param);
            pm->remove_instruction(gemm_param);
        }
        inputs.erase(gemm_it);
        inputs.insert(inputs.begin(), gemm_ins->inputs().begin(), gemm_ins->inputs().end());

        mpm.get_module().replace_instruction(ins, ck_gemm{gemm_ins->get_operator()}, inputs, {pm});
    }
};

struct find_ck_gemm
{
Alan Turner's avatar
Alan Turner committed
197
    auto matcher() const { return match::name("dot", "quant_dot")(is_ck_gemm().bind("gemm")); }
198
199
200
201
202
203
204
205

    void apply(module_pass_manager& mpm, const match::matcher_result& r) const
    {
        auto ins = r.result;
        mpm.get_module().replace_instruction(ins, ck_gemm{ins->get_operator()}, ins->inputs());
    }
};

Alan Turner's avatar
Alan Turner committed
206
static auto is_mul_module(module& m)
207
{
Alan Turner's avatar
Alan Turner committed
208
209
    auto is_mul =
        match::arg(0)(match::name("mul")(match::all_of[match::inputs()](match::name("@param"))));
Alan Turner's avatar
Alan Turner committed
210
211
212
213
214
    return match_instruction(m, std::prev(m.end()), is_mul).result != m.end();
}

MIGRAPHX_PRED_MATCHER(is_pointwise_scale, instruction_ref ins)
{
Alan Turner's avatar
Alan Turner committed
215
    if(ins->name() != "pointwise")
Alan Turner's avatar
Alan Turner committed
216
        return false;
Alan Turner's avatar
Alan Turner committed
217
    if(ins->module_inputs().size() != 1)
Alan Turner's avatar
Alan Turner committed
218
219
        return false;
    return is_mul_module(*ins->module_inputs().front());
220
221
222
223
224
225
}

struct find_ck_gemm_softmax_gemm
{
    auto matcher() const
    {
Alan Turner's avatar
Alan Turner committed
226
227
228
229
        auto gemm1 =
            match::skip(match::name("contiguous"))(match::name("dot")(is_ck_gemm().bind("gemm1")));
        auto mul     = match::name("pointwise")(match::either_arg(0, 1)(
            match::is_constant().bind("scale"), gemm1))(is_pointwise_scale());
Alan Turner's avatar
Alan Turner committed
230
231
232
        auto softmax = match::name("softmax")(match::arg(0)(mul)).bind("softmax");

        return match::name("dot")(is_ck_gemm().bind("gemm2"))(match::arg(0)(softmax));
233
234
235
236
237
238
239
    }

    void apply(module_pass_manager& mpm, const match::matcher_result& r) const
    {
        auto ins       = r.result;
        auto gemm2_ins = r.instructions["gemm2"];
        auto gemm1_ins = r.instructions["gemm1"];
Alan Turner's avatar
Alan Turner committed
240
        auto scale_lit = r.instructions["scale"];
241

Alan Turner's avatar
Alan Turner committed
242
        if(not ck_gemm_softmax_gemm::is_ck_supported_type(gemm1_ins->get_shape().type()))
243
            return;
Alan Turner's avatar
Alan Turner committed
244

245
        double scale = 1.0;
246
        scale_lit->eval().visit([&](const auto s) {
Alan Turner's avatar
Alan Turner committed
247
            // CK only supports single-valued scale
Alan Turner's avatar
Alan Turner committed
248
249
            if(std::all_of(
                   s.begin() + 1, s.end(), [&](auto v) { return float_equal(v, s.front()); }))
Alan Turner's avatar
Alan Turner committed
250
251
252
253
                scale = s.front();
            else
                return;
        });
254
255
256
257
258
259
260
261
262

        auto inputs = gemm1_ins->inputs();            // A, B
        inputs.push_back(gemm2_ins->inputs().back()); // B1

        mpm.get_module().replace_instruction(
            ins, ck_gemm_softmax_gemm{gemm2_ins->get_operator(), scale}, inputs);
    }
};

263
264
265
266
} // namespace

void fuse_ck::apply(module_pass_manager& mpm) const
{
Alan Turner's avatar
Alan Turner committed
267
    match::find_matches(mpm, find_ck_gemm_softmax_gemm{});
268
269
270
271
272
273
274
275
    match::find_matches(mpm, find_ck_gemm_pointwise{});
    match::find_matches(mpm, find_ck_gemm{});
}

} // namespace gpu

} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx