lowering.cpp 15.3 KB
Newer Older
Paul's avatar
Paul committed
1
2
3
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#include <migraphx/make_op.hpp>

#include <migraphx/op/abs.hpp>
#include <migraphx/op/batch_norm_inference.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/elu.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/lrn.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_dot.hpp>

#include <migraphx/gpu/abs.hpp>
#include <migraphx/gpu/batch_norm_inference.hpp>
Paul's avatar
Paul committed
21
22
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp>
kahmed10's avatar
kahmed10 committed
23
#include <migraphx/gpu/deconvolution.hpp>
Khalique's avatar
Khalique committed
24
#include <migraphx/gpu/elu.hpp>
25
#include <migraphx/gpu/equal.hpp>
Paul's avatar
Paul committed
26
#include <migraphx/gpu/gemm.hpp>
27
#include <migraphx/gpu/hip.hpp>
28
#include <migraphx/gpu/int8_conv_pack.hpp>
29
30
31
32
33
34
35
#include <migraphx/gpu/leaky_relu.hpp>
#include <migraphx/gpu/lrn.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/pooling.hpp>
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/iterator_for.hpp>
Paul's avatar
Paul committed
36
#include <utility>
37
#include <functional>
Khalique's avatar
Khalique committed
38
#include <algorithm>
Paul's avatar
Paul committed
39

Paul's avatar
Paul committed
40
namespace migraphx {
Paul's avatar
Paul committed
41
inline namespace MIGRAPHX_INLINE_NS {
Paul's avatar
Paul committed
42
namespace gpu {
Paul's avatar
Paul committed
43
44
45

struct miopen_apply
{
46
47
    program* prog        = nullptr;
    const lowering* pass = nullptr;
Shucai Xiao's avatar
Shucai Xiao committed
48
    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
Shucai Xiao's avatar
Shucai Xiao committed
49
    instruction_ref last{};
50
    std::unordered_map<instruction_ref, std::string> prog_output_names{};
Paul's avatar
Paul committed
51

52
53
54
55
56
57
58
    context& get_context()
    {
        assert(pass != nullptr);
        assert(pass->ctx != nullptr);
        return *pass->ctx;
    }

Paul's avatar
Paul committed
59
60
61
62
63
64
65
    void check_shape(shape x, instruction_ref i)
    {
        assert(x == i->get_shape());
        (void)x;
        (void)i;
    }

66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
    void create_output_names()
    {
        this->last = instruction::get_output_alias(std::prev(prog->end()));
        if(this->last->name() == "@return")
        {
            auto& prog_outputs = last->inputs();
            std::vector<instruction_ref> outputs_alias(prog_outputs.size());

            std::transform(prog_outputs.begin(),
                           prog_outputs.end(),
                           outputs_alias.begin(),
                           [](const auto& i) { return instruction::get_output_alias(i); });

            std::size_t index = 0;
            for(auto ins : outputs_alias)
            {
                prog_output_names[ins] = "#output_" + std::to_string(index++);
            }
        }
    }

87
88
    void init()
    {
89
90
        assert(prog != nullptr);
        assert(pass != nullptr);
91
92

        create_output_names();
Paul's avatar
Paul committed
93

94
95
96
97
98
        add_miopen_simple_op<miopen_abs>("abs", make_abs);

        add_miopen_extend_op<miopen_leaky_relu, op::leaky_relu>("leaky_relu", make_leaky_relu);
        add_miopen_extend_op<miopen_elu, op::elu>("elu", make_elu);

99
100
101
102
103
104
105
106
107
108
109
110
        add_generic_op("acos");
        add_generic_op("acosh");
        add_generic_op("add");
        add_generic_op("asin");
        add_generic_op("asinh");
        add_generic_op("atan");
        add_generic_op("atanh");
        add_generic_op("ceil");
        add_generic_op("contiguous");
        add_generic_op("cos");
        add_generic_op("cosh");
        add_generic_op("div");
111
        add_generic_op("equal");
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
        add_generic_op("erf");
        add_generic_op("exp");
        add_generic_op("floor");
        add_generic_op("log");
        add_generic_op("max");
        add_generic_op("min");
        add_generic_op("mul");
        add_generic_op("pow");
        add_generic_op("prelu");
        add_generic_op("recip");
        add_generic_op("relu");
        add_generic_op("round");
        add_generic_op("rsqrt");
        add_generic_op("sigmoid");
        add_generic_op("sign");
        add_generic_op("sin");
        add_generic_op("sinh");
        add_generic_op("sqdiff");
        add_generic_op("sqrt");
        add_generic_op("sub");
        add_generic_op("tan");
        add_generic_op("tanh");

        add_extend_op("argmax");
        add_extend_op("argmin");
        add_extend_op("clip");
        add_extend_op("concat");
        add_extend_op("convert");
        add_extend_op("gather");
        add_extend_op("logsoftmax");
        add_extend_op("pad");
        add_extend_op("reduce_max");
        add_extend_op("reduce_mean");
        add_extend_op("reduce_min");
        add_extend_op("reduce_prod");
        add_extend_op("reduce_sum");
        add_extend_op("rnn_var_sl_last_output");
        add_extend_op("rnn_var_sl_shift_output");
        add_extend_op("rnn_var_sl_shift_sequence");
        add_extend_op("softmax");

153
154
        add_gemm_op<op::dot>("dot");
        add_gemm_op<op::quant_dot>("quant_dot");
Khalique's avatar
Khalique committed
155
        add_lrn_op();
156
        add_convolution_op();
kahmed10's avatar
kahmed10 committed
157
        add_deconvolution_op();
158
        add_quant_convolution_op();
159
160
        add_pooling_op();
        add_batch_norm_inference_op();
Shucai Xiao's avatar
Shucai Xiao committed
161
        add_neg_op();
162
163
    }

164
165
166
167
    void copy_params()
    {
        if(not pass->offload_copy)
            return;
168

169
170
171
172
        for(auto ins : iterator_for(*prog))
        {
            if(ins->name() != "@param")
                continue;
173

174
175
176
177
178
            auto pos = std::next(ins);
            auto a   = insert_allocation(pos, ins->get_shape());
            auto c   = prog->insert_instruction(pos, hip_copy_to_gpu{}, ins, a);
            prog->replace_instruction(ins, c);
        }
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198

        // return instruction
        auto ret = std::prev(prog->end());
        if(ret->name() == "@return")
        {
            auto& inputs = ret->inputs();

            // each input of ret need to be copied from gpu to host, and replace
            // output with copy output
            for(auto& in : inputs)
            {
                auto p_output = prog->insert_instruction(ret, hip_copy_from_gpu{}, in);
                instruction::replace_argument(ret, in, p_output);
            }
        }
        // else branch to handle legacy program without the return instruction
        else
        {
            prog->add_instruction(hip_copy_from_gpu{}, ret);
        }
199
200
    }

Paul's avatar
Paul committed
201
202
    void apply()
    {
203
        init();
Paul's avatar
Paul committed
204
205
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
Paul's avatar
Paul committed
206
            auto s = it->get_shape();
207
            if(apply_map.count(it->name()) > 0)
208
            {
209
                check_shape(s, apply_map.at(it->name())(it));
Paul's avatar
Paul committed
210
            }
Paul's avatar
Paul committed
211
        }
212

213
        copy_params();
Paul's avatar
Paul committed
214
215
    }

Paul's avatar
Paul committed
216
    instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "")
Paul's avatar
Paul committed
217
    {
218
219
        // Instruction's output is an input of the ret instruction
        if(pass->offload_copy)
Paul's avatar
Paul committed
220
        {
221
            auto result = prog->insert_instruction(ins, hip_allocate{s, std::move(tag)});
Paul's avatar
Paul committed
222
223
            return result;
        }
224
225
226
227
228
229
230
231
232
233
234
235

        auto ins_alias = instruction::get_output_alias(ins);
        if(last->name() == "@return" and tag.empty() and prog_output_names.count(ins_alias) > 0)
        {
            return prog->add_parameter(prog_output_names[ins_alias], s);
        }
        else if(ins == last and tag.empty())
        {
            return prog->add_parameter("output", s);
        }

        return prog->insert_instruction(ins, hip_allocate{s, std::move(tag)});
Paul's avatar
Paul committed
236
237
    }

Shucai Xiao's avatar
Shucai Xiao committed
238
    void add_convolution_op()
Paul's avatar
Paul committed
239
    {
240
241
        apply_map.emplace("convolution", [=](instruction_ref ins) {
            auto&& op = any_cast<op::convolution>(ins->get_operator());
Paul's avatar
Paul committed
242

243
            auto conv = miopen_convolution{op, make_conv(op)};
244
            auto ws   = conv.find(get_context(), ins->get_shape(), to_shapes(ins->inputs()));
Paul's avatar
Paul committed
245

246
247
            auto workspace = insert_allocation(ins, ws, "workspace");
            auto output    = insert_allocation(ins, ins->get_shape());
kahmed10's avatar
kahmed10 committed
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263

            return prog->replace_instruction(
                ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
        });
    }

    void add_deconvolution_op()
    {
        apply_map.emplace("deconvolution", [=](instruction_ref ins) {
            auto&& op = any_cast<op::deconvolution>(ins->get_operator());

            auto conv = miopen_deconvolution{op, make_deconv(op)};
            auto ws   = conv.compile(get_context(), ins->get_shape(), to_shapes(ins->inputs()));

            auto workspace = insert_allocation(ins, ws, "workspace");
            auto output    = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
264

265
266
267
            return prog->replace_instruction(
                ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
        });
Paul's avatar
Paul committed
268
269
    }

270
271
272
273
274
275
276
    template <class Op>
    void add_gemm_op(std::string name)
    {
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto&& op                         = any_cast<Op>(ins->get_operator());
            auto beta                         = op.beta;
            std::vector<instruction_ref> refs = ins->inputs();
Shucai Xiao's avatar
Shucai Xiao committed
277
            if(refs.size() == 2)
278
279
            {
                auto output = insert_allocation(ins, ins->get_shape());
Shucai Xiao's avatar
Shucai Xiao committed
280
281
282
283
284
285
286
                beta        = 0;
                refs.push_back(output);
            }
            else
            {
                auto c_alias = instruction::get_output_alias(refs.back());
                if(ins == last or refs.back()->outputs().size() > 1 or c_alias->inputs().empty())
287
                {
Shucai Xiao's avatar
Shucai Xiao committed
288
                    auto output   = insert_allocation(ins, ins->get_shape());
289
290
291
292
                    auto copy_out = prog->insert_instruction(ins, hip_copy{}, refs.back(), output);
                    refs.back()   = copy_out;
                    refs.push_back(copy_out);
                }
Shucai Xiao's avatar
Shucai Xiao committed
293
294
295
296
                else
                {
                    refs.push_back(refs.back());
                }
297
298
299
300
301
302
            }

            return prog->replace_instruction(ins, rocblas_gemm<Op>{Op{op.alpha, beta}}, refs);
        });
    }

303
304
305
306
307
    void add_quant_convolution_op()
    {
        apply_map.emplace("quant_convolution", [=](instruction_ref ins) {
            auto&& op = any_cast<op::quant_convolution>(ins->get_operator());
            auto conv = miopen_quant_convolution{op, make_conv(op)};
308
            auto ws   = conv.compile(get_context(), ins->get_shape(), to_shapes(ins->inputs()));
309

Shucai Xiao's avatar
Shucai Xiao committed
310
            auto args      = ins->inputs();
311
            auto workspace = insert_allocation(ins, ws, "workspace");
Shucai Xiao's avatar
Shucai Xiao committed
312
313
            auto output    = insert_allocation(ins, ins->get_shape());

Shucai Xiao's avatar
Shucai Xiao committed
314
            return prog->replace_instruction(ins, conv, args[0], args[1], workspace, output);
Shucai Xiao's avatar
Shucai Xiao committed
315
316
317
        });
    }

Shucai Xiao's avatar
Shucai Xiao committed
318
    void add_pooling_op()
Paul's avatar
Paul committed
319
    {
320
321
322
323
        apply_map.emplace("pooling", [=](instruction_ref ins) {
            auto&& op   = any_cast<op::pooling>(ins->get_operator());
            auto pd     = make_pooling(op);
            auto output = insert_allocation(ins, ins->get_shape());
324

325
326
327
            return prog->replace_instruction(
                ins, miopen_pooling{op, std::move(pd)}, ins->inputs().at(0), output);
        });
Paul's avatar
Paul committed
328
    }
329

Khalique's avatar
Khalique committed
330
    void add_lrn_op()
Khalique's avatar
Khalique committed
331
    {
Khalique's avatar
Khalique committed
332
        apply_map.emplace("lrn", [=](instruction_ref ins) {
Khalique's avatar
Khalique committed
333
334
335
336
337
338
            auto&& op   = any_cast<op::lrn>(ins->get_operator());
            auto ldesc  = make_lrn(op);
            auto output = insert_allocation(ins, ins->get_shape());
            return prog->replace_instruction(
                ins, miopen_lrn{std::move(ldesc)}, ins->inputs().at(0), output);
        });
Khalique's avatar
Khalique committed
339
    }
Paul's avatar
Paul committed
340

341
342
343
    void add_generic_op(const std::string& name) { add_generic_op(name, "gpu::" + name); }

    void add_generic_op(const std::string& op_name, const std::string& gpu_name)
Paul's avatar
Paul committed
344
    {
345
        apply_map.emplace(op_name, [=](instruction_ref ins) {
346
347
348
            auto output                       = insert_allocation(ins, ins->get_shape());
            std::vector<instruction_ref> refs = ins->inputs();
            refs.push_back(output);
Paul's avatar
Paul committed
349

350
            return prog->replace_instruction(ins, make_op(gpu_name), refs);
351
        });
Paul's avatar
Paul committed
352
    }
Paul's avatar
Paul committed
353

354
355
356
    void add_extend_op(const std::string& name) { add_extend_op(name, "gpu::" + name); }

    void add_extend_op(const std::string& op_name, const std::string& gpu_name)
Khalique's avatar
Khalique committed
357
    {
358
359
        apply_map.emplace(op_name, [=](instruction_ref ins) {
            auto&& op                         = ins->get_operator();
360
361
362
            auto output                       = insert_allocation(ins, ins->get_shape());
            std::vector<instruction_ref> refs = ins->inputs();
            refs.push_back(output);
Paul's avatar
Paul committed
363

364
            return prog->replace_instruction(ins, make_op(gpu_name, op.to_value()), refs);
365
        });
Khalique's avatar
Khalique committed
366
367
    }

Shucai Xiao's avatar
Shucai Xiao committed
368
    template <class T, class Op, class F>
369
    void add_miopen_extend_op(std::string name, F f)
Paul's avatar
Paul committed
370
    {
Shucai Xiao's avatar
Shucai Xiao committed
371
        apply_map.emplace(name, [=](instruction_ref ins) {
372
373
            auto&& op = any_cast<Op>(ins->get_operator());
            auto ad   = f(op.alpha);
374

375
            auto output = insert_allocation(ins, ins->get_shape());
Shucai Xiao's avatar
Shucai Xiao committed
376
            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
377
        });
378
    }
379

Shucai Xiao's avatar
Shucai Xiao committed
380
    template <class T, class F>
381
    void add_miopen_simple_op(std::string name, F f)
382
    {
Shucai Xiao's avatar
Shucai Xiao committed
383
384
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto ad     = f();
385
            auto output = insert_allocation(ins, ins->get_shape());
Shucai Xiao's avatar
Shucai Xiao committed
386
            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
387
        });
388
389
    }

Shucai Xiao's avatar
Shucai Xiao committed
390
    void add_batch_norm_inference_op()
391
    {
392
393
394
395
        apply_map.emplace("batch_norm_inference", [=](instruction_ref ins) {
            auto&& op       = any_cast<op::batch_norm_inference>(ins->get_operator());
            auto output     = insert_allocation(ins, ins->get_shape());
            shape old_shape = ins->inputs().at(1)->get_shape();
Shucai Xiao's avatar
Shucai Xiao committed
396
397
398
399
400
401
402
403
404
405
406
407
408
409
            auto input      = ins->inputs()[0];
            auto input_lens = input->get_shape().lens();
            std::vector<int64_t> rsp_lens(input_lens.size(), 1);
            // for per_activation case, also need to reshape input
            if(op.bn_mode == op::batch_norm_inference::per_activation)
            {
                std::copy(input_lens.begin() + 1, input_lens.end(), rsp_lens.begin() + 1);
            }
            else
            {
                rsp_lens[1] = static_cast<int64_t>(old_shape.elements());
            }

            auto reshape_op = op::reshape{rsp_lens};
410
411
            std::vector<instruction_ref> reshapes;
            std::transform(ins->inputs().begin() + 1,
Shucai Xiao's avatar
Shucai Xiao committed
412
413
414
                           ins->inputs().end(),
                           std::back_inserter(reshapes),
                           [&](auto i) { return prog->insert_instruction(ins, reshape_op, i); });
Shucai Xiao's avatar
Shucai Xiao committed
415

416
            return prog->replace_instruction(ins,
Shucai Xiao's avatar
Shucai Xiao committed
417
                                             miopen_batch_norm_inference{op},
Shucai Xiao's avatar
Shucai Xiao committed
418
                                             input,
Shucai Xiao's avatar
Shucai Xiao committed
419
420
421
422
423
                                             reshapes[0],
                                             reshapes[1],
                                             reshapes[2],
                                             reshapes[3],
                                             output);
Shucai Xiao's avatar
Shucai Xiao committed
424

425
        });
426
    }
Shucai Xiao's avatar
Shucai Xiao committed
427
428
429
430
431
432
433
434
435

    // use 0 - input to represent neg
    void add_neg_op()
    {
        apply_map.emplace("neg", [=](instruction_ref ins) {
            auto s = ins->get_shape();
            std::vector<float> zeros(s.elements(), 0.0f);
            auto l0     = prog->add_literal(literal(s, zeros));
            auto output = insert_allocation(ins, s);
436
437
            return prog->replace_instruction(
                ins, make_op("gpu::sub"), l0, ins->inputs().front(), output);
Shucai Xiao's avatar
Shucai Xiao committed
438
439
        });
    }
Paul's avatar
Paul committed
440
441
};

442
void lowering::apply(program& p) const { miopen_apply{&p, this}.apply(); }
Paul's avatar
Paul committed
443
} // namespace gpu
Paul's avatar
Paul committed
444
} // namespace MIGRAPHX_INLINE_NS
Paul's avatar
Paul committed
445
} // namespace migraphx