lowering.cpp 12.1 KB
Newer Older
1
#include <rocblas.h>
Paul's avatar
Paul committed
2
3
4
5
6
7
8
9
10
11
12
13
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/iterator_for.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
14
15
#include <migraphx/gpu/argmax.hpp>
#include <migraphx/gpu/argmin.hpp>
Paul's avatar
Paul committed
16
17
18
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp>
19
#include <migraphx/gpu/quant_convolution.hpp>
Paul's avatar
Paul committed
20
21
#include <migraphx/gpu/contiguous.hpp>
#include <migraphx/gpu/relu.hpp>
Khalique's avatar
Khalique committed
22
23
#include <migraphx/gpu/sigmoid.hpp>
#include <migraphx/gpu/abs.hpp>
Paul's avatar
Paul committed
24
#include <migraphx/gpu/leaky_relu.hpp>
Khalique's avatar
Khalique committed
25
#include <migraphx/gpu/elu.hpp>
Paul's avatar
Paul committed
26
#include <migraphx/gpu/softmax.hpp>
27
#include <migraphx/gpu/logsoftmax.hpp>
Paul's avatar
Paul committed
28
#include <migraphx/gpu/add.hpp>
29
#include <migraphx/gpu/sub.hpp>
30
#include <migraphx/gpu/div.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
31
#include <migraphx/gpu/exp.hpp>
32
#include <migraphx/gpu/erf.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
33
#include <migraphx/gpu/log.hpp>
34
#include <migraphx/gpu/sin.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
35
#include <migraphx/gpu/sign.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
36
37
#include <migraphx/gpu/cos.hpp>
#include <migraphx/gpu/tan.hpp>
38
39
#include <migraphx/gpu/sinh.hpp>
#include <migraphx/gpu/cosh.hpp>
40
#include <migraphx/gpu/tanh.hpp>
41
42
43
#include <migraphx/gpu/asin.hpp>
#include <migraphx/gpu/acos.hpp>
#include <migraphx/gpu/atan.hpp>
Paul's avatar
Paul committed
44
#include <migraphx/gpu/mul.hpp>
Khalique's avatar
Khalique committed
45
46
#include <migraphx/gpu/max.hpp>
#include <migraphx/gpu/min.hpp>
Paul's avatar
Paul committed
47
48
49
50
#include <migraphx/gpu/batchnorm.hpp>
#include <migraphx/gpu/pooling.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/concat.hpp>
51
#include <migraphx/gpu/pad.hpp>
52
#include <migraphx/gpu/gather.hpp>
Khalique's avatar
Khalique committed
53
#include <migraphx/gpu/lrn.hpp>
54
#include <migraphx/gpu/convert.hpp>
Khalique's avatar
Khalique committed
55
#include <migraphx/gpu/clip.hpp>
Paul's avatar
Paul committed
56
#include <migraphx/gpu/reduce_sum.hpp>
57
#include <migraphx/gpu/round.hpp>
Khalique's avatar
Khalique committed
58
#include <migraphx/gpu/rsqrt.hpp>
59
#include <migraphx/gpu/sqrt.hpp>
60
#include <migraphx/gpu/reduce_mean.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
61
62
#include <migraphx/gpu/reduce_min.hpp>
#include <migraphx/gpu/reduce_max.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
63
#include <migraphx/gpu/pow.hpp>
Khalique's avatar
Khalique committed
64
#include <migraphx/gpu/sqdiff.hpp>
65
#include <migraphx/gpu/int8_conv_pack.hpp>
Paul's avatar
Paul committed
66
#include <utility>
67
#include <functional>
Khalique's avatar
Khalique committed
68
#include <algorithm>
Paul's avatar
Paul committed
69

Paul's avatar
Paul committed
70
namespace migraphx {
Paul's avatar
Paul committed
71
inline namespace MIGRAPHX_INLINE_NS {
Paul's avatar
Paul committed
72
namespace gpu {
Paul's avatar
Paul committed
73
74
75

struct miopen_apply
{
Paul's avatar
Paul committed
76
    program* prog = nullptr;
Paul's avatar
Paul committed
77
    context ctx{};
Shucai Xiao's avatar
Shucai Xiao committed
78
    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
Shucai Xiao's avatar
Shucai Xiao committed
79
    instruction_ref last{};
Paul's avatar
Paul committed
80

Paul's avatar
Paul committed
81
82
83
84
85
86
87
    void check_shape(shape x, instruction_ref i)
    {
        assert(x == i->get_shape());
        (void)x;
        (void)i;
    }

88
89
    void init()
    {
Shucai Xiao's avatar
Shucai Xiao committed
90
        this->last = instruction::get_output_alias(std::prev(prog->end()));
Paul's avatar
Paul committed
91

92
93
94
95
96
97
        add_miopen_simple_op<miopen_abs>("abs", make_abs);

        add_miopen_extend_op<miopen_leaky_relu, op::leaky_relu>("leaky_relu", make_leaky_relu);
        add_miopen_extend_op<miopen_elu, op::elu>("elu", make_elu);

        add_generic_op<hip_add>("add");
98
        add_generic_op<hip_sub>("sub");
99
        add_generic_op<hip_exp>("exp");
Shucai Xiao's avatar
Shucai Xiao committed
100
        add_generic_op<hip_erf>("erf");
101
        add_generic_op<hip_log>("log");
102
103
104
105
106
        add_generic_op<hip_sin>("sin");
        add_generic_op<hip_cos>("cos");
        add_generic_op<hip_tan>("tan");
        add_generic_op<hip_sinh>("sinh");
        add_generic_op<hip_cosh>("cosh");
107
        add_generic_op<hip_tanh>("tanh");
108
109
110
        add_generic_op<hip_asin>("asin");
        add_generic_op<hip_acos>("acos");
        add_generic_op<hip_atan>("atan");
111
        add_generic_op<hip_sqrt>("sqrt");
112
        add_generic_op<hip_mul>("mul");
113
        add_generic_op<hip_div>("div");
Khalique's avatar
Khalique committed
114
115
        add_generic_op<hip_max>("max");
        add_generic_op<hip_min>("min");
Khalique's avatar
Khalique committed
116
        add_generic_op<hip_rsqrt>("rsqrt");
117
        add_generic_op<hip_round>("round");
118
        add_generic_op<hip_pow>("pow");
Khalique's avatar
Khalique committed
119
        add_generic_op<hip_sqdiff>("sqdiff");
120
        add_generic_op<hip_relu>("relu");
Shucai Xiao's avatar
Shucai Xiao committed
121
        add_generic_op<hip_sign>("sign");
122
        add_generic_op<hip_sigmoid>("sigmoid");
123
124
125

        add_extend_op<miopen_contiguous, op::contiguous>("contiguous");
        add_extend_op<hip_concat, op::concat>("concat");
Khalique's avatar
Khalique committed
126
        add_extend_op<hip_softmax, op::softmax>("softmax");
127
        add_extend_op<hip_logsoftmax, op::logsoftmax>("logsoftmax");
128
129
        add_extend_op<hip_argmax, op::argmax>("argmax");
        add_extend_op<hip_argmin, op::argmin>("argmin");
Khalique's avatar
Khalique committed
130
        add_extend_op<hip_gather, op::gather>("gather");
131
        add_extend_op<hip_pad, op::pad>("pad");
132
        add_extend_op<hip_convert, op::convert>("convert");
Khalique's avatar
Khalique committed
133
        add_extend_op<hip_clip, op::clip>("clip");
Paul's avatar
Paul committed
134
        add_extend_op<hip_reduce_sum, op::reduce_sum>("reduce_sum");
135
        add_extend_op<hip_reduce_mean, op::reduce_mean>("reduce_mean");
Shucai Xiao's avatar
Shucai Xiao committed
136
137
        add_extend_op<hip_reduce_min, op::reduce_min>("reduce_min");
        add_extend_op<hip_reduce_max, op::reduce_max>("reduce_max");
138
139
        add_gemm_op<op::dot>("dot");
        add_gemm_op<op::quant_dot>("quant_dot");
140

Khalique's avatar
Khalique committed
141
        add_lrn_op();
142
        add_convolution_op();
143
        add_quant_convolution_op();
144
145
        add_pooling_op();
        add_batch_norm_inference_op();
146
147
    }

Paul's avatar
Paul committed
148
149
    void apply()
    {
150
        init();
Paul's avatar
Paul committed
151
152
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
Paul's avatar
Paul committed
153
            auto s = it->get_shape();
154
            if(apply_map.count(it->name()) > 0)
155
            {
156
                check_shape(s, apply_map.at(it->name())(it));
Paul's avatar
Paul committed
157
            }
Paul's avatar
Paul committed
158
159
160
        }
    }

Paul's avatar
Paul committed
161
    instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "")
Paul's avatar
Paul committed
162
    {
Shucai Xiao's avatar
Shucai Xiao committed
163
        if(ins == last and tag.empty())
Paul's avatar
Paul committed
164
165
166
167
168
        {
            return prog->add_parameter("output", s);
        }
        else
        {
169
            auto result = prog->insert_instruction(ins, hip_allocate{s, std::move(tag)});
Paul's avatar
Paul committed
170
171
172
173
            return result;
        }
    }

Shucai Xiao's avatar
Shucai Xiao committed
174
    void add_convolution_op()
Paul's avatar
Paul committed
175
    {
176
177
        apply_map.emplace("convolution", [=](instruction_ref ins) {
            auto&& op = any_cast<op::convolution>(ins->get_operator());
Paul's avatar
Paul committed
178

179
            auto conv = miopen_convolution{op, make_conv(op)};
Paul's avatar
Paul committed
180
            auto ws   = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));
Paul's avatar
Paul committed
181

182
183
            auto workspace = insert_allocation(ins, ws, "workspace");
            auto output    = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
184

185
186
187
            return prog->replace_instruction(
                ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
        });
Paul's avatar
Paul committed
188
189
    }

190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
    template <class Op>
    void add_gemm_op(std::string name)
    {
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto&& op                         = any_cast<Op>(ins->get_operator());
            auto beta                         = op.beta;
            std::vector<instruction_ref> refs = ins->inputs();
            if((refs.size() == 2) or (refs.size() == 3 and refs.back()->outputs().size() > 1) or
               (ins == last))
            {
                auto output = insert_allocation(ins, ins->get_shape());
                if(refs.size() == 2)
                {
                    beta = 0;
                    refs.push_back(output);
                }
                else
                {
                    auto copy_out = prog->insert_instruction(ins, hip_copy{}, refs.back(), output);
                    refs.back()   = copy_out;
                    refs.push_back(copy_out);
                }
            }
            else
            {
                refs.push_back(refs.back());
            }

            return prog->replace_instruction(ins, rocblas_gemm<Op>{Op{op.alpha, beta}}, refs);
        });
    }

222
223
224
225
226
227
228
    void add_quant_convolution_op()
    {
        apply_map.emplace("quant_convolution", [=](instruction_ref ins) {
            auto&& op = any_cast<op::quant_convolution>(ins->get_operator());
            auto conv = miopen_quant_convolution{op, make_conv(op)};
            auto ws   = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));

Shucai Xiao's avatar
Shucai Xiao committed
229
            auto args      = ins->inputs();
230
            auto workspace = insert_allocation(ins, ws, "workspace");
Shucai Xiao's avatar
Shucai Xiao committed
231
232
            auto output    = insert_allocation(ins, ins->get_shape());

Shucai Xiao's avatar
Shucai Xiao committed
233
            return prog->replace_instruction(ins, conv, args[0], args[1], workspace, output);
Shucai Xiao's avatar
Shucai Xiao committed
234
235
236
        });
    }

Shucai Xiao's avatar
Shucai Xiao committed
237
    void add_pooling_op()
Paul's avatar
Paul committed
238
    {
239
240
241
242
        apply_map.emplace("pooling", [=](instruction_ref ins) {
            auto&& op   = any_cast<op::pooling>(ins->get_operator());
            auto pd     = make_pooling(op);
            auto output = insert_allocation(ins, ins->get_shape());
243

244
245
246
            return prog->replace_instruction(
                ins, miopen_pooling{op, std::move(pd)}, ins->inputs().at(0), output);
        });
Paul's avatar
Paul committed
247
    }
248

Khalique's avatar
Khalique committed
249
    void add_lrn_op()
Khalique's avatar
Khalique committed
250
    {
Khalique's avatar
Khalique committed
251
        apply_map.emplace("lrn", [=](instruction_ref ins) {
Khalique's avatar
Khalique committed
252
253
254
255
256
257
            auto&& op   = any_cast<op::lrn>(ins->get_operator());
            auto ldesc  = make_lrn(op);
            auto output = insert_allocation(ins, ins->get_shape());
            return prog->replace_instruction(
                ins, miopen_lrn{std::move(ldesc)}, ins->inputs().at(0), output);
        });
Khalique's avatar
Khalique committed
258
    }
Paul's avatar
Paul committed
259

Shucai Xiao's avatar
Shucai Xiao committed
260
    template <class T>
261
    void add_generic_op(std::string name)
Paul's avatar
Paul committed
262
    {
263
264
265
266
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto output                       = insert_allocation(ins, ins->get_shape());
            std::vector<instruction_ref> refs = ins->inputs();
            refs.push_back(output);
Paul's avatar
Paul committed
267

268
269
            return prog->replace_instruction(ins, T{}, refs);
        });
Paul's avatar
Paul committed
270
    }
Paul's avatar
Paul committed
271

Shucai Xiao's avatar
Shucai Xiao committed
272
    template <class T, class Op>
273
    void add_extend_op(std::string name)
Khalique's avatar
Khalique committed
274
    {
275
276
277
278
279
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto&& op                         = any_cast<Op>(ins->get_operator());
            auto output                       = insert_allocation(ins, ins->get_shape());
            std::vector<instruction_ref> refs = ins->inputs();
            refs.push_back(output);
Paul's avatar
Paul committed
280

281
282
            return prog->replace_instruction(ins, T{op}, refs);
        });
Khalique's avatar
Khalique committed
283
284
    }

Shucai Xiao's avatar
Shucai Xiao committed
285
    template <class T, class Op, class F>
286
    void add_miopen_extend_op(std::string name, F f)
Paul's avatar
Paul committed
287
    {
Shucai Xiao's avatar
Shucai Xiao committed
288
        apply_map.emplace(name, [=](instruction_ref ins) {
289
290
            auto&& op = any_cast<Op>(ins->get_operator());
            auto ad   = f(op.alpha);
291

292
            auto output = insert_allocation(ins, ins->get_shape());
Shucai Xiao's avatar
Shucai Xiao committed
293
            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
294
        });
295
    }
296

Shucai Xiao's avatar
Shucai Xiao committed
297
    template <class T, class F>
298
    void add_miopen_simple_op(std::string name, F f)
299
    {
Shucai Xiao's avatar
Shucai Xiao committed
300
301
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto ad     = f();
302
            auto output = insert_allocation(ins, ins->get_shape());
Shucai Xiao's avatar
Shucai Xiao committed
303
            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
304
        });
305
306
    }

Shucai Xiao's avatar
Shucai Xiao committed
307
    void add_batch_norm_inference_op()
308
    {
309
310
311
312
313
314
315
316
        apply_map.emplace("batch_norm_inference", [=](instruction_ref ins) {
            auto&& op       = any_cast<op::batch_norm_inference>(ins->get_operator());
            auto output     = insert_allocation(ins, ins->get_shape());
            shape old_shape = ins->inputs().at(1)->get_shape();
            std::vector<int64_t> new_shape{1, static_cast<int64_t>(old_shape.elements()), 1, 1};
            auto reshape_op = op::reshape{new_shape};
            std::vector<instruction_ref> reshapes;
            std::transform(ins->inputs().begin() + 1,
Shucai Xiao's avatar
Shucai Xiao committed
317
318
319
                           ins->inputs().end(),
                           std::back_inserter(reshapes),
                           [&](auto i) { return prog->insert_instruction(ins, reshape_op, i); });
320
            return prog->replace_instruction(ins,
Shucai Xiao's avatar
Shucai Xiao committed
321
322
323
324
325
326
327
                                             miopen_batch_norm_inference{op},
                                             ins->inputs().at(0),
                                             reshapes[0],
                                             reshapes[1],
                                             reshapes[2],
                                             reshapes[3],
                                             output);
328
        });
329
    }
Paul's avatar
Paul committed
330
331
};

Paul's avatar
Paul committed
332
void lowering::apply(program& p) const { miopen_apply{&p, ctx}.apply(); }
Paul's avatar
Paul committed
333
} // namespace gpu
Paul's avatar
Paul committed
334
} // namespace MIGRAPHX_INLINE_NS
Paul's avatar
Paul committed
335
} // namespace migraphx