lowering.cpp 12.2 KB
Newer Older
1
#include <rocblas.h>
Paul's avatar
Paul committed
2
3
4
5
6
7
8
9
10
11
12
13
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/iterator_for.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
14
15
#include <migraphx/gpu/argmax.hpp>
#include <migraphx/gpu/argmin.hpp>
Paul's avatar
Paul committed
16
17
18
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp>
19
#include <migraphx/gpu/quant_convolution.hpp>
Paul's avatar
Paul committed
20
21
#include <migraphx/gpu/contiguous.hpp>
#include <migraphx/gpu/relu.hpp>
Khalique's avatar
Khalique committed
22
23
#include <migraphx/gpu/sigmoid.hpp>
#include <migraphx/gpu/abs.hpp>
Paul's avatar
Paul committed
24
#include <migraphx/gpu/leaky_relu.hpp>
Khalique's avatar
Khalique committed
25
#include <migraphx/gpu/elu.hpp>
Paul's avatar
Paul committed
26
#include <migraphx/gpu/softmax.hpp>
27
#include <migraphx/gpu/logsoftmax.hpp>
Paul's avatar
Paul committed
28
#include <migraphx/gpu/add.hpp>
29
#include <migraphx/gpu/sub.hpp>
30
#include <migraphx/gpu/div.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
31
#include <migraphx/gpu/exp.hpp>
32
#include <migraphx/gpu/erf.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
33
#include <migraphx/gpu/log.hpp>
34
#include <migraphx/gpu/sin.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
35
#include <migraphx/gpu/sign.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
36
37
#include <migraphx/gpu/cos.hpp>
#include <migraphx/gpu/tan.hpp>
38
39
#include <migraphx/gpu/sinh.hpp>
#include <migraphx/gpu/cosh.hpp>
40
#include <migraphx/gpu/tanh.hpp>
41
42
43
#include <migraphx/gpu/asin.hpp>
#include <migraphx/gpu/acos.hpp>
#include <migraphx/gpu/atan.hpp>
Paul's avatar
Paul committed
44
#include <migraphx/gpu/mul.hpp>
Khalique's avatar
Khalique committed
45
46
#include <migraphx/gpu/max.hpp>
#include <migraphx/gpu/min.hpp>
Paul's avatar
Paul committed
47
48
49
50
#include <migraphx/gpu/batchnorm.hpp>
#include <migraphx/gpu/pooling.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/concat.hpp>
51
#include <migraphx/gpu/pad.hpp>
52
#include <migraphx/gpu/gather.hpp>
Khalique's avatar
Khalique committed
53
#include <migraphx/gpu/lrn.hpp>
54
#include <migraphx/gpu/convert.hpp>
Khalique's avatar
Khalique committed
55
#include <migraphx/gpu/clip.hpp>
Paul's avatar
Paul committed
56
#include <migraphx/gpu/reduce_sum.hpp>
57
#include <migraphx/gpu/round.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
58
59
#include <migraphx/gpu/ceil.hpp>
#include <migraphx/gpu/floor.hpp>
Khalique's avatar
Khalique committed
60
#include <migraphx/gpu/rsqrt.hpp>
61
#include <migraphx/gpu/sqrt.hpp>
62
#include <migraphx/gpu/reduce_mean.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
63
64
#include <migraphx/gpu/reduce_min.hpp>
#include <migraphx/gpu/reduce_max.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
65
#include <migraphx/gpu/pow.hpp>
Khalique's avatar
Khalique committed
66
#include <migraphx/gpu/sqdiff.hpp>
67
#include <migraphx/gpu/int8_conv_pack.hpp>
Paul's avatar
Paul committed
68
#include <utility>
69
#include <functional>
Khalique's avatar
Khalique committed
70
#include <algorithm>
Paul's avatar
Paul committed
71

Paul's avatar
Paul committed
72
namespace migraphx {
Paul's avatar
Paul committed
73
inline namespace MIGRAPHX_INLINE_NS {
Paul's avatar
Paul committed
74
namespace gpu {
Paul's avatar
Paul committed
75
76
77

struct miopen_apply
{
Paul's avatar
Paul committed
78
    program* prog = nullptr;
Paul's avatar
Paul committed
79
    context ctx{};
Shucai Xiao's avatar
Shucai Xiao committed
80
    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
Shucai Xiao's avatar
Shucai Xiao committed
81
    instruction_ref last{};
Paul's avatar
Paul committed
82

Paul's avatar
Paul committed
83
84
85
86
87
88
89
    void check_shape(shape x, instruction_ref i)
    {
        assert(x == i->get_shape());
        (void)x;
        (void)i;
    }

90
91
    void init()
    {
Shucai Xiao's avatar
Shucai Xiao committed
92
        this->last = instruction::get_output_alias(std::prev(prog->end()));
Paul's avatar
Paul committed
93

94
95
96
97
98
99
        add_miopen_simple_op<miopen_abs>("abs", make_abs);

        add_miopen_extend_op<miopen_leaky_relu, op::leaky_relu>("leaky_relu", make_leaky_relu);
        add_miopen_extend_op<miopen_elu, op::elu>("elu", make_elu);

        add_generic_op<hip_add>("add");
100
        add_generic_op<hip_sub>("sub");
101
        add_generic_op<hip_exp>("exp");
Shucai Xiao's avatar
Shucai Xiao committed
102
        add_generic_op<hip_erf>("erf");
103
        add_generic_op<hip_log>("log");
104
105
106
107
108
        add_generic_op<hip_sin>("sin");
        add_generic_op<hip_cos>("cos");
        add_generic_op<hip_tan>("tan");
        add_generic_op<hip_sinh>("sinh");
        add_generic_op<hip_cosh>("cosh");
109
        add_generic_op<hip_tanh>("tanh");
110
111
112
        add_generic_op<hip_asin>("asin");
        add_generic_op<hip_acos>("acos");
        add_generic_op<hip_atan>("atan");
113
        add_generic_op<hip_sqrt>("sqrt");
114
        add_generic_op<hip_mul>("mul");
115
        add_generic_op<hip_div>("div");
Khalique's avatar
Khalique committed
116
117
        add_generic_op<hip_max>("max");
        add_generic_op<hip_min>("min");
Khalique's avatar
Khalique committed
118
        add_generic_op<hip_rsqrt>("rsqrt");
119
        add_generic_op<hip_round>("round");
120
        add_generic_op<hip_pow>("pow");
Khalique's avatar
Khalique committed
121
        add_generic_op<hip_sqdiff>("sqdiff");
122
        add_generic_op<hip_relu>("relu");
Shucai Xiao's avatar
Shucai Xiao committed
123
        add_generic_op<hip_sign>("sign");
124
        add_generic_op<hip_sigmoid>("sigmoid");
Shucai Xiao's avatar
Shucai Xiao committed
125
126
        add_generic_op<hip_ceil>("ceil");
        add_generic_op<hip_floor>("floor");
127
128
129

        add_extend_op<miopen_contiguous, op::contiguous>("contiguous");
        add_extend_op<hip_concat, op::concat>("concat");
Khalique's avatar
Khalique committed
130
        add_extend_op<hip_softmax, op::softmax>("softmax");
131
        add_extend_op<hip_logsoftmax, op::logsoftmax>("logsoftmax");
132
133
        add_extend_op<hip_argmax, op::argmax>("argmax");
        add_extend_op<hip_argmin, op::argmin>("argmin");
Khalique's avatar
Khalique committed
134
        add_extend_op<hip_gather, op::gather>("gather");
135
        add_extend_op<hip_pad, op::pad>("pad");
136
        add_extend_op<hip_convert, op::convert>("convert");
Khalique's avatar
Khalique committed
137
        add_extend_op<hip_clip, op::clip>("clip");
Paul's avatar
Paul committed
138
        add_extend_op<hip_reduce_sum, op::reduce_sum>("reduce_sum");
139
        add_extend_op<hip_reduce_mean, op::reduce_mean>("reduce_mean");
Shucai Xiao's avatar
Shucai Xiao committed
140
141
        add_extend_op<hip_reduce_min, op::reduce_min>("reduce_min");
        add_extend_op<hip_reduce_max, op::reduce_max>("reduce_max");
142
143
        add_gemm_op<op::dot>("dot");
        add_gemm_op<op::quant_dot>("quant_dot");
144

Khalique's avatar
Khalique committed
145
        add_lrn_op();
146
        add_convolution_op();
147
        add_quant_convolution_op();
148
149
        add_pooling_op();
        add_batch_norm_inference_op();
150
151
    }

Paul's avatar
Paul committed
152
153
    void apply()
    {
154
        init();
Paul's avatar
Paul committed
155
156
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
Paul's avatar
Paul committed
157
            auto s = it->get_shape();
158
            if(apply_map.count(it->name()) > 0)
159
            {
160
                check_shape(s, apply_map.at(it->name())(it));
Paul's avatar
Paul committed
161
            }
Paul's avatar
Paul committed
162
163
164
        }
    }

Paul's avatar
Paul committed
165
    instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "")
Paul's avatar
Paul committed
166
    {
Shucai Xiao's avatar
Shucai Xiao committed
167
        if(ins == last and tag.empty())
Paul's avatar
Paul committed
168
169
170
171
172
        {
            return prog->add_parameter("output", s);
        }
        else
        {
173
            auto result = prog->insert_instruction(ins, hip_allocate{s, std::move(tag)});
Paul's avatar
Paul committed
174
175
176
177
            return result;
        }
    }

Shucai Xiao's avatar
Shucai Xiao committed
178
    void add_convolution_op()
Paul's avatar
Paul committed
179
    {
180
181
        apply_map.emplace("convolution", [=](instruction_ref ins) {
            auto&& op = any_cast<op::convolution>(ins->get_operator());
Paul's avatar
Paul committed
182

183
            auto conv = miopen_convolution{op, make_conv(op)};
Paul's avatar
Paul committed
184
            auto ws   = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));
Paul's avatar
Paul committed
185

186
187
            auto workspace = insert_allocation(ins, ws, "workspace");
            auto output    = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
188

189
190
191
            return prog->replace_instruction(
                ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
        });
Paul's avatar
Paul committed
192
193
    }

194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
    template <class Op>
    void add_gemm_op(std::string name)
    {
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto&& op                         = any_cast<Op>(ins->get_operator());
            auto beta                         = op.beta;
            std::vector<instruction_ref> refs = ins->inputs();
            if((refs.size() == 2) or (refs.size() == 3 and refs.back()->outputs().size() > 1) or
               (ins == last))
            {
                auto output = insert_allocation(ins, ins->get_shape());
                if(refs.size() == 2)
                {
                    beta = 0;
                    refs.push_back(output);
                }
                else
                {
                    auto copy_out = prog->insert_instruction(ins, hip_copy{}, refs.back(), output);
                    refs.back()   = copy_out;
                    refs.push_back(copy_out);
                }
            }
            else
            {
                refs.push_back(refs.back());
            }

            return prog->replace_instruction(ins, rocblas_gemm<Op>{Op{op.alpha, beta}}, refs);
        });
    }

226
227
228
229
230
231
232
    void add_quant_convolution_op()
    {
        apply_map.emplace("quant_convolution", [=](instruction_ref ins) {
            auto&& op = any_cast<op::quant_convolution>(ins->get_operator());
            auto conv = miopen_quant_convolution{op, make_conv(op)};
            auto ws   = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));

Shucai Xiao's avatar
Shucai Xiao committed
233
            auto args      = ins->inputs();
234
            auto workspace = insert_allocation(ins, ws, "workspace");
Shucai Xiao's avatar
Shucai Xiao committed
235
236
            auto output    = insert_allocation(ins, ins->get_shape());

Shucai Xiao's avatar
Shucai Xiao committed
237
            return prog->replace_instruction(ins, conv, args[0], args[1], workspace, output);
Shucai Xiao's avatar
Shucai Xiao committed
238
239
240
        });
    }

Shucai Xiao's avatar
Shucai Xiao committed
241
    void add_pooling_op()
Paul's avatar
Paul committed
242
    {
243
244
245
246
        apply_map.emplace("pooling", [=](instruction_ref ins) {
            auto&& op   = any_cast<op::pooling>(ins->get_operator());
            auto pd     = make_pooling(op);
            auto output = insert_allocation(ins, ins->get_shape());
247

248
249
250
            return prog->replace_instruction(
                ins, miopen_pooling{op, std::move(pd)}, ins->inputs().at(0), output);
        });
Paul's avatar
Paul committed
251
    }
252

Khalique's avatar
Khalique committed
253
    void add_lrn_op()
Khalique's avatar
Khalique committed
254
    {
Khalique's avatar
Khalique committed
255
        apply_map.emplace("lrn", [=](instruction_ref ins) {
Khalique's avatar
Khalique committed
256
257
258
259
260
261
            auto&& op   = any_cast<op::lrn>(ins->get_operator());
            auto ldesc  = make_lrn(op);
            auto output = insert_allocation(ins, ins->get_shape());
            return prog->replace_instruction(
                ins, miopen_lrn{std::move(ldesc)}, ins->inputs().at(0), output);
        });
Khalique's avatar
Khalique committed
262
    }
Paul's avatar
Paul committed
263

Shucai Xiao's avatar
Shucai Xiao committed
264
    template <class T>
265
    void add_generic_op(std::string name)
Paul's avatar
Paul committed
266
    {
267
268
269
270
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto output                       = insert_allocation(ins, ins->get_shape());
            std::vector<instruction_ref> refs = ins->inputs();
            refs.push_back(output);
Paul's avatar
Paul committed
271

272
273
            return prog->replace_instruction(ins, T{}, refs);
        });
Paul's avatar
Paul committed
274
    }
Paul's avatar
Paul committed
275

Shucai Xiao's avatar
Shucai Xiao committed
276
    template <class T, class Op>
277
    void add_extend_op(std::string name)
Khalique's avatar
Khalique committed
278
    {
279
280
281
282
283
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto&& op                         = any_cast<Op>(ins->get_operator());
            auto output                       = insert_allocation(ins, ins->get_shape());
            std::vector<instruction_ref> refs = ins->inputs();
            refs.push_back(output);
Paul's avatar
Paul committed
284

285
286
            return prog->replace_instruction(ins, T{op}, refs);
        });
Khalique's avatar
Khalique committed
287
288
    }

Shucai Xiao's avatar
Shucai Xiao committed
289
    template <class T, class Op, class F>
290
    void add_miopen_extend_op(std::string name, F f)
Paul's avatar
Paul committed
291
    {
Shucai Xiao's avatar
Shucai Xiao committed
292
        apply_map.emplace(name, [=](instruction_ref ins) {
293
294
            auto&& op = any_cast<Op>(ins->get_operator());
            auto ad   = f(op.alpha);
295

296
            auto output = insert_allocation(ins, ins->get_shape());
Shucai Xiao's avatar
Shucai Xiao committed
297
            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
298
        });
299
    }
300

Shucai Xiao's avatar
Shucai Xiao committed
301
    template <class T, class F>
302
    void add_miopen_simple_op(std::string name, F f)
303
    {
Shucai Xiao's avatar
Shucai Xiao committed
304
305
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto ad     = f();
306
            auto output = insert_allocation(ins, ins->get_shape());
Shucai Xiao's avatar
Shucai Xiao committed
307
            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
308
        });
309
310
    }

Shucai Xiao's avatar
Shucai Xiao committed
311
    void add_batch_norm_inference_op()
312
    {
313
314
315
316
317
318
319
320
        apply_map.emplace("batch_norm_inference", [=](instruction_ref ins) {
            auto&& op       = any_cast<op::batch_norm_inference>(ins->get_operator());
            auto output     = insert_allocation(ins, ins->get_shape());
            shape old_shape = ins->inputs().at(1)->get_shape();
            std::vector<int64_t> new_shape{1, static_cast<int64_t>(old_shape.elements()), 1, 1};
            auto reshape_op = op::reshape{new_shape};
            std::vector<instruction_ref> reshapes;
            std::transform(ins->inputs().begin() + 1,
Shucai Xiao's avatar
Shucai Xiao committed
321
322
323
                           ins->inputs().end(),
                           std::back_inserter(reshapes),
                           [&](auto i) { return prog->insert_instruction(ins, reshape_op, i); });
324
            return prog->replace_instruction(ins,
Shucai Xiao's avatar
Shucai Xiao committed
325
326
327
328
329
330
331
                                             miopen_batch_norm_inference{op},
                                             ins->inputs().at(0),
                                             reshapes[0],
                                             reshapes[1],
                                             reshapes[2],
                                             reshapes[3],
                                             output);
332
        });
333
    }
Paul's avatar
Paul committed
334
335
};

Paul's avatar
Paul committed
336
void lowering::apply(program& p) const { miopen_apply{&p, ctx}.apply(); }
Paul's avatar
Paul committed
337
} // namespace gpu
Paul's avatar
Paul committed
338
} // namespace MIGRAPHX_INLINE_NS
Paul's avatar
Paul committed
339
} // namespace migraphx