lowering.cpp 15.3 KB
Newer Older
Paul's avatar
Paul committed
1
2
3
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#include <migraphx/make_op.hpp>

#include <migraphx/op/abs.hpp>
#include <migraphx/op/batch_norm_inference.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/elu.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/lrn.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_dot.hpp>

#include <migraphx/gpu/abs.hpp>
#include <migraphx/gpu/batch_norm_inference.hpp>
Paul's avatar
Paul committed
21
22
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp>
kahmed10's avatar
kahmed10 committed
23
#include <migraphx/gpu/deconvolution.hpp>
Khalique's avatar
Khalique committed
24
#include <migraphx/gpu/elu.hpp>
Paul's avatar
Paul committed
25
#include <migraphx/gpu/gemm.hpp>
26
#include <migraphx/gpu/hip.hpp>
27
#include <migraphx/gpu/int8_conv_pack.hpp>
28
29
30
31
32
33
34
#include <migraphx/gpu/leaky_relu.hpp>
#include <migraphx/gpu/lrn.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/pooling.hpp>
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/iterator_for.hpp>
Paul's avatar
Paul committed
35
#include <utility>
36
#include <functional>
Khalique's avatar
Khalique committed
37
#include <algorithm>
Paul's avatar
Paul committed
38

Paul's avatar
Paul committed
39
namespace migraphx {
Paul's avatar
Paul committed
40
inline namespace MIGRAPHX_INLINE_NS {
Paul's avatar
Paul committed
41
namespace gpu {
Paul's avatar
Paul committed
42
43
44

struct miopen_apply
{
45
46
    program* prog        = nullptr;
    const lowering* pass = nullptr;
Shucai Xiao's avatar
Shucai Xiao committed
47
    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
Shucai Xiao's avatar
Shucai Xiao committed
48
    instruction_ref last{};
49
    std::unordered_map<instruction_ref, std::string> prog_output_names{};
Paul's avatar
Paul committed
50

51
52
53
54
55
56
57
    context& get_context()
    {
        assert(pass != nullptr);
        assert(pass->ctx != nullptr);
        return *pass->ctx;
    }

Paul's avatar
Paul committed
58
59
60
61
62
63
64
    void check_shape(shape x, instruction_ref i)
    {
        assert(x == i->get_shape());
        (void)x;
        (void)i;
    }

65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
    void create_output_names()
    {
        this->last = instruction::get_output_alias(std::prev(prog->end()));
        if(this->last->name() == "@return")
        {
            auto& prog_outputs = last->inputs();
            std::vector<instruction_ref> outputs_alias(prog_outputs.size());

            std::transform(prog_outputs.begin(),
                           prog_outputs.end(),
                           outputs_alias.begin(),
                           [](const auto& i) { return instruction::get_output_alias(i); });

            std::size_t index = 0;
            for(auto ins : outputs_alias)
            {
                prog_output_names[ins] = "#output_" + std::to_string(index++);
            }
        }
    }

86
87
    void init()
    {
88
89
        assert(prog != nullptr);
        assert(pass != nullptr);
90
91

        create_output_names();
Paul's avatar
Paul committed
92

93
94
95
96
97
        add_miopen_simple_op<miopen_abs>("abs", make_abs);

        add_miopen_extend_op<miopen_leaky_relu, op::leaky_relu>("leaky_relu", make_leaky_relu);
        add_miopen_extend_op<miopen_elu, op::elu>("elu", make_elu);

98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
        add_generic_op("acos");
        add_generic_op("acosh");
        add_generic_op("add");
        add_generic_op("asin");
        add_generic_op("asinh");
        add_generic_op("atan");
        add_generic_op("atanh");
        add_generic_op("ceil");
        add_generic_op("contiguous");
        add_generic_op("cos");
        add_generic_op("cosh");
        add_generic_op("div");
        add_generic_op("erf");
        add_generic_op("exp");
        add_generic_op("floor");
        add_generic_op("log");
        add_generic_op("max");
        add_generic_op("min");
        add_generic_op("mul");
        add_generic_op("pow");
        add_generic_op("prelu");
        add_generic_op("recip");
        add_generic_op("relu");
        add_generic_op("round");
        add_generic_op("rsqrt");
        add_generic_op("sigmoid");
        add_generic_op("sign");
        add_generic_op("sin");
        add_generic_op("sinh");
        add_generic_op("sqdiff");
        add_generic_op("sqrt");
        add_generic_op("sub");
        add_generic_op("tan");
        add_generic_op("tanh");

        add_extend_op("argmax");
        add_extend_op("argmin");
        add_extend_op("clip");
        add_extend_op("concat");
        add_extend_op("convert");
        add_extend_op("gather");
        add_extend_op("logsoftmax");
        add_extend_op("pad");
        add_extend_op("reduce_max");
        add_extend_op("reduce_mean");
        add_extend_op("reduce_min");
        add_extend_op("reduce_prod");
        add_extend_op("reduce_sum");
        add_extend_op("rnn_var_sl_last_output");
        add_extend_op("rnn_var_sl_shift_output");
        add_extend_op("rnn_var_sl_shift_sequence");
        add_extend_op("softmax");

151
152
        add_gemm_op<op::dot>("dot");
        add_gemm_op<op::quant_dot>("quant_dot");
Khalique's avatar
Khalique committed
153
        add_lrn_op();
154
        add_convolution_op();
kahmed10's avatar
kahmed10 committed
155
        add_deconvolution_op();
156
        add_quant_convolution_op();
157
158
        add_pooling_op();
        add_batch_norm_inference_op();
Shucai Xiao's avatar
Shucai Xiao committed
159
        add_neg_op();
160
161
    }

162
163
164
165
    void copy_params()
    {
        if(not pass->offload_copy)
            return;
166

167
168
169
170
        for(auto ins : iterator_for(*prog))
        {
            if(ins->name() != "@param")
                continue;
171

172
173
174
175
176
            auto pos = std::next(ins);
            auto a   = insert_allocation(pos, ins->get_shape());
            auto c   = prog->insert_instruction(pos, hip_copy_to_gpu{}, ins, a);
            prog->replace_instruction(ins, c);
        }
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196

        // return instruction
        auto ret = std::prev(prog->end());
        if(ret->name() == "@return")
        {
            auto& inputs = ret->inputs();

            // each input of ret need to be copied from gpu to host, and replace
            // output with copy output
            for(auto& in : inputs)
            {
                auto p_output = prog->insert_instruction(ret, hip_copy_from_gpu{}, in);
                instruction::replace_argument(ret, in, p_output);
            }
        }
        // else branch to handle legacy program without the return instruction
        else
        {
            prog->add_instruction(hip_copy_from_gpu{}, ret);
        }
197
198
    }

Paul's avatar
Paul committed
199
200
    void apply()
    {
201
        init();
Paul's avatar
Paul committed
202
203
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
Paul's avatar
Paul committed
204
            auto s = it->get_shape();
205
            if(apply_map.count(it->name()) > 0)
206
            {
207
                check_shape(s, apply_map.at(it->name())(it));
Paul's avatar
Paul committed
208
            }
Paul's avatar
Paul committed
209
        }
210

211
        copy_params();
Paul's avatar
Paul committed
212
213
    }

Paul's avatar
Paul committed
214
    instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "")
Paul's avatar
Paul committed
215
    {
216
217
        // Instruction's output is an input of the ret instruction
        if(pass->offload_copy)
Paul's avatar
Paul committed
218
        {
219
            auto result = prog->insert_instruction(ins, hip_allocate{s, std::move(tag)});
Paul's avatar
Paul committed
220
221
            return result;
        }
222
223
224
225
226
227
228
229
230
231
232
233

        auto ins_alias = instruction::get_output_alias(ins);
        if(last->name() == "@return" and tag.empty() and prog_output_names.count(ins_alias) > 0)
        {
            return prog->add_parameter(prog_output_names[ins_alias], s);
        }
        else if(ins == last and tag.empty())
        {
            return prog->add_parameter("output", s);
        }

        return prog->insert_instruction(ins, hip_allocate{s, std::move(tag)});
Paul's avatar
Paul committed
234
235
    }

Shucai Xiao's avatar
Shucai Xiao committed
236
    void add_convolution_op()
Paul's avatar
Paul committed
237
    {
238
239
        apply_map.emplace("convolution", [=](instruction_ref ins) {
            auto&& op = any_cast<op::convolution>(ins->get_operator());
Paul's avatar
Paul committed
240

241
            auto conv = miopen_convolution{op, make_conv(op)};
242
            auto ws   = conv.find(get_context(), ins->get_shape(), to_shapes(ins->inputs()));
Paul's avatar
Paul committed
243

244
245
            auto workspace = insert_allocation(ins, ws, "workspace");
            auto output    = insert_allocation(ins, ins->get_shape());
kahmed10's avatar
kahmed10 committed
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261

            return prog->replace_instruction(
                ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
        });
    }

    void add_deconvolution_op()
    {
        apply_map.emplace("deconvolution", [=](instruction_ref ins) {
            auto&& op = any_cast<op::deconvolution>(ins->get_operator());

            auto conv = miopen_deconvolution{op, make_deconv(op)};
            auto ws   = conv.compile(get_context(), ins->get_shape(), to_shapes(ins->inputs()));

            auto workspace = insert_allocation(ins, ws, "workspace");
            auto output    = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
262

263
264
265
            return prog->replace_instruction(
                ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
        });
Paul's avatar
Paul committed
266
267
    }

268
269
270
271
272
273
274
    template <class Op>
    void add_gemm_op(std::string name)
    {
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto&& op                         = any_cast<Op>(ins->get_operator());
            auto beta                         = op.beta;
            std::vector<instruction_ref> refs = ins->inputs();
Shucai Xiao's avatar
Shucai Xiao committed
275
            if(refs.size() == 2)
276
277
            {
                auto output = insert_allocation(ins, ins->get_shape());
Shucai Xiao's avatar
Shucai Xiao committed
278
279
280
281
282
283
284
                beta        = 0;
                refs.push_back(output);
            }
            else
            {
                auto c_alias = instruction::get_output_alias(refs.back());
                if(ins == last or refs.back()->outputs().size() > 1 or c_alias->inputs().empty())
285
                {
Shucai Xiao's avatar
Shucai Xiao committed
286
                    auto output   = insert_allocation(ins, ins->get_shape());
287
288
289
290
                    auto copy_out = prog->insert_instruction(ins, hip_copy{}, refs.back(), output);
                    refs.back()   = copy_out;
                    refs.push_back(copy_out);
                }
Shucai Xiao's avatar
Shucai Xiao committed
291
292
293
294
                else
                {
                    refs.push_back(refs.back());
                }
295
296
297
298
299
300
            }

            return prog->replace_instruction(ins, rocblas_gemm<Op>{Op{op.alpha, beta}}, refs);
        });
    }

301
302
303
304
305
    void add_quant_convolution_op()
    {
        apply_map.emplace("quant_convolution", [=](instruction_ref ins) {
            auto&& op = any_cast<op::quant_convolution>(ins->get_operator());
            auto conv = miopen_quant_convolution{op, make_conv(op)};
306
            auto ws   = conv.compile(get_context(), ins->get_shape(), to_shapes(ins->inputs()));
307

Shucai Xiao's avatar
Shucai Xiao committed
308
            auto args      = ins->inputs();
309
            auto workspace = insert_allocation(ins, ws, "workspace");
Shucai Xiao's avatar
Shucai Xiao committed
310
311
            auto output    = insert_allocation(ins, ins->get_shape());

Shucai Xiao's avatar
Shucai Xiao committed
312
            return prog->replace_instruction(ins, conv, args[0], args[1], workspace, output);
Shucai Xiao's avatar
Shucai Xiao committed
313
314
315
        });
    }

Shucai Xiao's avatar
Shucai Xiao committed
316
    void add_pooling_op()
Paul's avatar
Paul committed
317
    {
318
319
320
321
        apply_map.emplace("pooling", [=](instruction_ref ins) {
            auto&& op   = any_cast<op::pooling>(ins->get_operator());
            auto pd     = make_pooling(op);
            auto output = insert_allocation(ins, ins->get_shape());
322

323
324
325
            return prog->replace_instruction(
                ins, miopen_pooling{op, std::move(pd)}, ins->inputs().at(0), output);
        });
Paul's avatar
Paul committed
326
    }
327

Khalique's avatar
Khalique committed
328
    void add_lrn_op()
Khalique's avatar
Khalique committed
329
    {
Khalique's avatar
Khalique committed
330
        apply_map.emplace("lrn", [=](instruction_ref ins) {
Khalique's avatar
Khalique committed
331
332
333
334
335
336
            auto&& op   = any_cast<op::lrn>(ins->get_operator());
            auto ldesc  = make_lrn(op);
            auto output = insert_allocation(ins, ins->get_shape());
            return prog->replace_instruction(
                ins, miopen_lrn{std::move(ldesc)}, ins->inputs().at(0), output);
        });
Khalique's avatar
Khalique committed
337
    }
Paul's avatar
Paul committed
338

339
340
341
    void add_generic_op(const std::string& name) { add_generic_op(name, "gpu::" + name); }

    void add_generic_op(const std::string& op_name, const std::string& gpu_name)
Paul's avatar
Paul committed
342
    {
343
        apply_map.emplace(op_name, [=](instruction_ref ins) {
344
345
346
            auto output                       = insert_allocation(ins, ins->get_shape());
            std::vector<instruction_ref> refs = ins->inputs();
            refs.push_back(output);
Paul's avatar
Paul committed
347

348
            return prog->replace_instruction(ins, make_op(gpu_name), refs);
349
        });
Paul's avatar
Paul committed
350
    }
Paul's avatar
Paul committed
351

352
353
354
    void add_extend_op(const std::string& name) { add_extend_op(name, "gpu::" + name); }

    void add_extend_op(const std::string& op_name, const std::string& gpu_name)
Khalique's avatar
Khalique committed
355
    {
356
357
        apply_map.emplace(op_name, [=](instruction_ref ins) {
            auto&& op                         = ins->get_operator();
358
359
360
            auto output                       = insert_allocation(ins, ins->get_shape());
            std::vector<instruction_ref> refs = ins->inputs();
            refs.push_back(output);
Paul's avatar
Paul committed
361

362
            return prog->replace_instruction(ins, make_op(gpu_name, op.to_value()), refs);
363
        });
Khalique's avatar
Khalique committed
364
365
    }

Shucai Xiao's avatar
Shucai Xiao committed
366
    template <class T, class Op, class F>
367
    void add_miopen_extend_op(std::string name, F f)
Paul's avatar
Paul committed
368
    {
Shucai Xiao's avatar
Shucai Xiao committed
369
        apply_map.emplace(name, [=](instruction_ref ins) {
370
371
            auto&& op = any_cast<Op>(ins->get_operator());
            auto ad   = f(op.alpha);
372

373
            auto output = insert_allocation(ins, ins->get_shape());
Shucai Xiao's avatar
Shucai Xiao committed
374
            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
375
        });
376
    }
377

Shucai Xiao's avatar
Shucai Xiao committed
378
    template <class T, class F>
379
    void add_miopen_simple_op(std::string name, F f)
380
    {
Shucai Xiao's avatar
Shucai Xiao committed
381
382
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto ad     = f();
383
            auto output = insert_allocation(ins, ins->get_shape());
Shucai Xiao's avatar
Shucai Xiao committed
384
            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
385
        });
386
387
    }

Shucai Xiao's avatar
Shucai Xiao committed
388
    void add_batch_norm_inference_op()
389
    {
390
391
392
393
        apply_map.emplace("batch_norm_inference", [=](instruction_ref ins) {
            auto&& op       = any_cast<op::batch_norm_inference>(ins->get_operator());
            auto output     = insert_allocation(ins, ins->get_shape());
            shape old_shape = ins->inputs().at(1)->get_shape();
Shucai Xiao's avatar
Shucai Xiao committed
394
395
396
397
398
399
400
401
402
403
404
405
406
407
            auto input      = ins->inputs()[0];
            auto input_lens = input->get_shape().lens();
            std::vector<int64_t> rsp_lens(input_lens.size(), 1);
            // for per_activation case, also need to reshape input
            if(op.bn_mode == op::batch_norm_inference::per_activation)
            {
                std::copy(input_lens.begin() + 1, input_lens.end(), rsp_lens.begin() + 1);
            }
            else
            {
                rsp_lens[1] = static_cast<int64_t>(old_shape.elements());
            }

            auto reshape_op = op::reshape{rsp_lens};
408
409
            std::vector<instruction_ref> reshapes;
            std::transform(ins->inputs().begin() + 1,
Shucai Xiao's avatar
Shucai Xiao committed
410
411
412
                           ins->inputs().end(),
                           std::back_inserter(reshapes),
                           [&](auto i) { return prog->insert_instruction(ins, reshape_op, i); });
Shucai Xiao's avatar
Shucai Xiao committed
413

414
            return prog->replace_instruction(ins,
Shucai Xiao's avatar
Shucai Xiao committed
415
                                             miopen_batch_norm_inference{op},
Shucai Xiao's avatar
Shucai Xiao committed
416
                                             input,
Shucai Xiao's avatar
Shucai Xiao committed
417
418
419
420
421
                                             reshapes[0],
                                             reshapes[1],
                                             reshapes[2],
                                             reshapes[3],
                                             output);
Shucai Xiao's avatar
Shucai Xiao committed
422

423
        });
424
    }
Shucai Xiao's avatar
Shucai Xiao committed
425
426
427
428
429
430
431
432
433

    // use 0 - input to represent neg
    void add_neg_op()
    {
        apply_map.emplace("neg", [=](instruction_ref ins) {
            auto s = ins->get_shape();
            std::vector<float> zeros(s.elements(), 0.0f);
            auto l0     = prog->add_literal(literal(s, zeros));
            auto output = insert_allocation(ins, s);
434
435
            return prog->replace_instruction(
                ins, make_op("gpu::sub"), l0, ins->inputs().front(), output);
Shucai Xiao's avatar
Shucai Xiao committed
436
437
        });
    }
Paul's avatar
Paul committed
438
439
};

440
void lowering::apply(program& p) const { miopen_apply{&p, this}.apply(); }
Paul's avatar
Paul committed
441
} // namespace gpu
Paul's avatar
Paul committed
442
} // namespace MIGRAPHX_INLINE_NS
Paul's avatar
Paul committed
443
} // namespace migraphx