"src/vscode:/vscode.git/clone" did not exist on "d21778c624098766f36e63ec205a02eb4a1d6a84"
lowering.cpp 14.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/*
 * The MIT License (MIT)
 *
 * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
Shucai Xiao's avatar
Shucai Xiao committed
24
#include <iterator>
Paul's avatar
Paul committed
25
26
27
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
28
#include <migraphx/make_op.hpp>
29
30
#include <migraphx/instruction_ref.hpp>
#include <migraphx/stringutils.hpp>
31
32

#include <migraphx/op/dot.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
33
#include <migraphx/op/if_op.hpp>
34
35
36
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/quant_dot.hpp>

Paul's avatar
Paul committed
37
#include <migraphx/gpu/context.hpp>
38
#include <migraphx/gpu/device_name.hpp>
Paul's avatar
Paul committed
39
#include <migraphx/gpu/gemm.hpp>
40
41
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/rocblas.hpp>
42
#include <migraphx/gpu/compiler.hpp>
43
#include <migraphx/iterator_for.hpp>
44
#include <migraphx/program.hpp>
Paul's avatar
Paul committed
45
#include <utility>
46
#include <functional>
Khalique's avatar
Khalique committed
47
#include <algorithm>
Shucai Xiao's avatar
Shucai Xiao committed
48
#include <map>
Paul's avatar
Paul committed
49

Paul's avatar
Paul committed
50
namespace migraphx {
Paul's avatar
Paul committed
51
inline namespace MIGRAPHX_INLINE_NS {
Paul's avatar
Paul committed
52
namespace gpu {
Paul's avatar
Paul committed
53
54
55

struct miopen_apply
{
Shucai Xiao's avatar
Shucai Xiao committed
56
    module* mod          = nullptr;
57
    const lowering* pass = nullptr;
Shucai Xiao's avatar
Shucai Xiao committed
58
    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
Shucai Xiao's avatar
Shucai Xiao committed
59
    instruction_ref last{};
Shucai Xiao's avatar
Shucai Xiao committed
60
61
    bool offload_copy   = false;
    bool int8_x4_format = true;
62
    bool compute_fp32   = false;
Paul's avatar
Paul committed
63

64
    context& get_context() const
65
66
67
68
69
70
    {
        assert(pass != nullptr);
        assert(pass->ctx != nullptr);
        return *pass->ctx;
    }

Paul's avatar
Paul committed
71
72
73
74
75
76
77
    void check_shape(shape x, instruction_ref i)
    {
        assert(x == i->get_shape());
        (void)x;
        (void)i;
    }

78
79
    void init()
    {
Shucai Xiao's avatar
Shucai Xiao committed
80
        assert(mod != nullptr);
81
        assert(pass != nullptr);
82

83
84
85
        auto& ctx      = get_context();
        int8_x4_format = get_int8_x4_format(ctx);
        compute_fp32   = get_compute_fp32_flag();
86
        offload_copy   = (mod->name() == "main") ? pass->offload_copy : false;
Paul's avatar
Paul committed
87

88
89
90
91
92
        add_generic_op("contiguous");

        add_extend_op("argmax");
        add_extend_op("argmin");
        add_extend_op("logsoftmax");
Shucai Xiao's avatar
Shucai Xiao committed
93
        add_extend_op("lrn");
turneram's avatar
turneram committed
94
        add_extend_op("multinomial");
Shucai Xiao's avatar
Shucai Xiao committed
95
        add_extend_op("nonzero");
96
        add_extend_op("pooling");
97
        add_extend_op("prefix_scan_sum");
Cagri Eryilmaz's avatar
Cagri Eryilmaz committed
98
        add_extend_op("reverse");
99
100
101
        add_extend_op("rnn_var_sl_last_output");
        add_extend_op("rnn_var_sl_shift_output");
        add_extend_op("rnn_var_sl_shift_sequence");
102
        add_extend_op("scatter_none");
Shucai Xiao's avatar
Shucai Xiao committed
103
        add_extend_op("topk");
104

105
106
107
        add_convolution_op("convolution");
        add_convolution_op("deconvolution");
        add_convolution_op("quant_convolution");
Shucai Xiao's avatar
Shucai Xiao committed
108
109
        add_gemm_op<op::dot>("dot");
        add_gemm_op<op::quant_dot>("quant_dot");
Shucai Xiao's avatar
Shucai Xiao committed
110
        add_if_op();
Shucai Xiao's avatar
Shucai Xiao committed
111
        add_loop_op();
Shucai Xiao's avatar
Shucai Xiao committed
112
        add_neg_op();
113
        add_nms_op();
Charlie Lin's avatar
Charlie Lin committed
114
        add_select_module_op();
115
116
    }

117
    void copy_params() const
118
    {
Shucai Xiao's avatar
Shucai Xiao committed
119
        if(not offload_copy)
120
            return;
121

Shucai Xiao's avatar
Shucai Xiao committed
122
        for(auto ins : iterator_for(*mod))
123
124
125
        {
            if(ins->name() != "@param")
                continue;
126

Shucai Xiao's avatar
Shucai Xiao committed
127
128
129
130
            // parameter no outputs, no need to insert copy to gpu
            if(ins->outputs().empty())
                continue;

131
132
            auto pos = std::next(ins);
            auto a   = insert_allocation(pos, ins->get_shape());
133
            auto c   = mod->insert_instruction(pos, make_op("hip::copy_to_gpu"), ins, a);
Shucai Xiao's avatar
Shucai Xiao committed
134
            mod->replace_instruction(ins, c);
135
        }
136
137

        // return instruction
Shucai Xiao's avatar
Shucai Xiao committed
138
        auto ret = std::prev(mod->end());
139
140
        if(ret->name() == "@return")
        {
141
            const auto& inputs = ret->inputs();
142
143
144

            // each input of ret need to be copied from gpu to host, and replace
            // output with copy output
145
            for(const auto& in : inputs)
146
            {
147
                auto p_output = mod->insert_instruction(ret, make_op("hip::copy_from_gpu"), in);
148
149
150
151
152
153
                instruction::replace_argument(ret, in, p_output);
            }
        }
        // else branch to handle legacy program without the return instruction
        else
        {
154
            mod->add_instruction(make_op("hip::copy_from_gpu"), ret);
155
        }
156
157
    }

Paul's avatar
Paul committed
158
159
    void apply()
    {
160
        init();
Shucai Xiao's avatar
Shucai Xiao committed
161
        for(auto it = mod->begin(); it != mod->end(); it++)
Paul's avatar
Paul committed
162
        {
163
164
            auto s     = it->get_shape();
            auto attrs = it->get_operator().attributes();
165
            if(apply_map.count(it->name()) > 0)
166
            {
167
                check_shape(s, apply_map.at(it->name())(it));
Paul's avatar
Paul committed
168
            }
169
170
171
172
            else if(has_compiler_for(it->name()))
            {
                check_shape(s, insert_precompile_op(it));
            }
173
174
175
176
            else if(attrs.contains("target"))
            {
                check_shape(s, insert_custom_op(it, attrs));
            }
Paul's avatar
Paul committed
177
        }
178
        copy_params();
Paul's avatar
Paul committed
179
180
    }

181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
    instruction_ref insert_custom_op(instruction_ref ins, const value& attrs) const
    {
        const auto& custom_op = ins->get_operator();
        if(attrs.at("target") == "cpu")
        {
            auto s = ins->get_shape();
            std::vector<instruction_ref> cpu_inputs;
            auto inputs = ins->inputs();
            auto output = inputs.back();
            std::transform(
                inputs.begin(), inputs.end(), std::back_inserter(cpu_inputs), [&](auto in) {
                    return mod->insert_instruction(ins, make_op("hip::copy_from_gpu"), in);
                });
            cpu_inputs.front() =
                mod->insert_instruction(ins, make_op("hip::sync_stream"), cpu_inputs);
            auto cpu_out = mod->insert_instruction(ins, custom_op, cpu_inputs);
            auto gpu_out =
                mod->insert_instruction(ins, make_op("hip::copy_to_gpu"), cpu_out, output);
            return mod->replace_instruction(ins, gpu_out);
        }
        return ins;
    }

204
    instruction_ref insert_precompile_op(instruction_ref ins) const
205
206
207
208
209
210
211
212
213
214
215
216
    {
        auto output                       = insert_allocation(ins, ins->get_shape());
        std::vector<instruction_ref> refs = ins->inputs();
        refs.push_back(output);

        return mod->replace_instruction(
            ins,
            make_op("gpu::precompile_op", {{"op", to_value(ins->get_operator())}}),
            refs,
            ins->module_inputs());
    }

217
    instruction_ref insert_allocation(instruction_ref ins, const shape& s) const
Paul's avatar
Paul committed
218
    {
219
        return mod->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}}));
Paul's avatar
Paul committed
220
221
    }

222
223
    template <typename Op>
    void add_gemm_op(const std::string& name)
224
225
    {
        apply_map.emplace(name, [=](instruction_ref ins) {
226
            std::vector<instruction_ref> refs = ins->inputs();
227
228
229
            assert(refs.size() == 2);
            auto output = insert_allocation(ins, ins->get_shape());
            refs.push_back(output);
Shucai Xiao's avatar
Shucai Xiao committed
230
            return mod->replace_instruction(
231
                ins, rocblas_gemm<Op>{Op{}, 1, 0, int8_x4_format, compute_fp32}, refs);
232
233
234
        });
    }

235
    void add_convolution_op(const std::string& name)
236
    {
237
        apply_map.emplace(name, [=](instruction_ref ins) {
238
239
240
241
            operation conv = make_op(
                "gpu::" + name,
                {{"op", ins->get_operator().to_value()}, {"int8_x4_format", int8_x4_format}});
            auto output = insert_allocation(ins, ins->get_shape());
242

243
244
245
246
247
            return mod->replace_instruction(ins,
                                            make_op("gpu::miopen_op", {{"op", to_value(conv)}}),
                                            ins->inputs().at(0),
                                            ins->inputs().at(1),
                                            output);
Shucai Xiao's avatar
Shucai Xiao committed
248
249
250
        });
    }

251
252
253
    // add_generic_op just constructs the operator with no fields whereas add_extend_op copies over
    // the fields Since it doesn't have fields its default constructed

254
255
256
    void add_generic_op(const std::string& name) { add_generic_op(name, "gpu::" + name); }

    void add_generic_op(const std::string& op_name, const std::string& gpu_name)
Paul's avatar
Paul committed
257
    {
258
        apply_map.emplace(op_name, [=](instruction_ref ins) {
259
260
261
            auto output                       = insert_allocation(ins, ins->get_shape());
            std::vector<instruction_ref> refs = ins->inputs();
            refs.push_back(output);
Paul's avatar
Paul committed
262

Shucai Xiao's avatar
Shucai Xiao committed
263
            return mod->replace_instruction(ins, make_op(gpu_name), refs);
264
        });
Paul's avatar
Paul committed
265
    }
Paul's avatar
Paul committed
266

267
268
269
    void add_extend_op(const std::string& name) { add_extend_op(name, "gpu::" + name); }

    void add_extend_op(const std::string& op_name, const std::string& gpu_name)
Khalique's avatar
Khalique committed
270
    {
271
272
        apply_map.emplace(op_name, [=](instruction_ref ins) {
            auto&& op                         = ins->get_operator();
273
274
275
            auto output                       = insert_allocation(ins, ins->get_shape());
            std::vector<instruction_ref> refs = ins->inputs();
            refs.push_back(output);
Paul's avatar
Paul committed
276

Shucai Xiao's avatar
Shucai Xiao committed
277
            return mod->replace_instruction(ins, make_op(gpu_name, op.to_value()), refs);
278
        });
Khalique's avatar
Khalique committed
279
280
    }

Shucai Xiao's avatar
Shucai Xiao committed
281
282
283
284
285
286
    // use 0 - input to represent neg
    void add_neg_op()
    {
        apply_map.emplace("neg", [=](instruction_ref ins) {
            auto s = ins->get_shape();
            std::vector<float> zeros(s.elements(), 0.0f);
Shucai Xiao's avatar
Shucai Xiao committed
287
            auto l0     = mod->add_literal(literal(s, zeros));
Shucai Xiao's avatar
Shucai Xiao committed
288
            auto output = insert_allocation(ins, s);
Shucai Xiao's avatar
Shucai Xiao committed
289
            return mod->replace_instruction(
290
                ins, make_op("gpu::sub"), l0, ins->inputs().front(), output);
Shucai Xiao's avatar
Shucai Xiao committed
291
292
        });
    }
Shucai Xiao's avatar
Shucai Xiao committed
293

Shucai Xiao's avatar
Shucai Xiao committed
294
    // add input and output argument for the if operator
Shucai Xiao's avatar
Shucai Xiao committed
295
296
297
298
    void add_if_op()
    {
        apply_map.emplace("if", [=](instruction_ref ins) {
            std::vector<instruction_ref> inputs = ins->inputs();
299
300
301
            auto cpu_cond =
                mod->insert_instruction(ins, make_op("hip::copy_from_gpu"), inputs.front());
            auto sync_cond = mod->insert_instruction(ins, make_op("hip::sync_stream"), cpu_cond);
Shucai Xiao's avatar
Shucai Xiao committed
302
303
            inputs.front() = sync_cond;

304
            return mod->replace_instruction(ins, ins->get_operator(), inputs, ins->module_inputs());
Shucai Xiao's avatar
Shucai Xiao committed
305
306
        });
    }
Shucai Xiao's avatar
Shucai Xiao committed
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322

    // replace the loop operator with gpu_loop operator
    void add_loop_op()
    {
        apply_map.emplace("loop", [=](instruction_ref ins) {
            std::vector<instruction_ref> inputs = ins->inputs();
            // copy max_iter from gpu to cpu
            auto cpu_max_iter =
                mod->insert_instruction(ins, make_op("hip::copy_from_gpu"), inputs.at(0));
            auto cpu_cond =
                mod->insert_instruction(ins, make_op("hip::copy_from_gpu"), inputs.at(1));
            auto synced_max_iter =
                mod->insert_instruction(ins, make_op("hip::sync_stream"), cpu_max_iter, cpu_cond);
            inputs.at(0)     = synced_max_iter;
            inputs.at(1)     = cpu_cond;
            auto copy_inputs = inputs;
323
324
325
326
            std::transform(copy_inputs.begin(),
                           copy_inputs.end(),
                           std::back_inserter(inputs),
                           [&](auto in) { return insert_allocation(ins, in->get_shape()); });
Shucai Xiao's avatar
Shucai Xiao committed
327
328
329
330
331

            auto mod_args = ins->module_inputs();
            auto output   = insert_allocation(ins, ins->get_shape());

            const auto* sub_mod = mod_args.front();
332
333
            auto cond_out       = insert_allocation(ins, sub_mod->get_output_shapes().front());

Shucai Xiao's avatar
Shucai Xiao committed
334
335
336
337
338
339
340
341
            // add cond and mod outputs to the argument list
            inputs.push_back(cond_out);
            inputs.push_back(output);

            return mod->replace_instruction(
                ins, make_op("gpu::loop", ins->get_operator().to_value()), inputs, mod_args);
        });
    }
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361

    void add_nms_op()
    {
        apply_map.emplace("nonmaxsuppression", [=](instruction_ref ins) {
            auto s      = ins->get_shape();
            auto output = insert_allocation(ins, s);
            std::vector<instruction_ref> cpu_inputs;
            auto inputs = ins->inputs();
            std::transform(
                inputs.begin(), inputs.end(), std::back_inserter(cpu_inputs), [&](auto in) {
                    return mod->insert_instruction(ins, make_op("hip::copy_from_gpu"), in);
                });
            cpu_inputs.front() =
                mod->insert_instruction(ins, make_op("hip::sync_stream"), cpu_inputs);
            auto cpu_out = mod->insert_instruction(ins, ins->get_operator(), cpu_inputs);
            auto gpu_out =
                mod->insert_instruction(ins, make_op("hip::copy_to_gpu"), cpu_out, output);
            return mod->replace_instruction(ins, gpu_out);
        });
    }
Charlie Lin's avatar
Charlie Lin committed
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388

    /**
     * Turns on use_local_alloc in the select_module submodules.
     * Changes the submodule returns to a hip::sync_stream.
     */
    void add_select_module_op()
    {
        apply_map.emplace("select_module", [=](instruction_ref ins) {
            std::vector<instruction_ref> inputs = ins->inputs();
            auto mod_args                       = ins->module_inputs();
            for(auto* smod : mod_args)
            {
                smod->use_local_alloc = true;
                auto last_ins         = std::prev(smod->end());
                if(last_ins->name() == "@return")
                {
                    for(auto out_ins : last_ins->inputs())
                    {
                        auto sync_out = smod->insert_instruction(
                            last_ins, make_op("hip::sync_stream"), out_ins);
                        smod->replace_return({sync_out});
                    }
                }
            }
            return ins;
        });
    }
Paul's avatar
Paul committed
389
390
};

Shucai Xiao's avatar
Shucai Xiao committed
391
void lowering::apply(module& m) const { miopen_apply{&m, this}.apply(); }
Shucai Xiao's avatar
Shucai Xiao committed
392

Paul's avatar
Paul committed
393
} // namespace gpu
Paul's avatar
Paul committed
394
} // namespace MIGRAPHX_INLINE_NS
Paul's avatar
Paul committed
395
} // namespace migraphx