lowering.cpp 11.9 KB
Newer Older
1
#include <rocblas.h>
Paul's avatar
Paul committed
2
3
4
5
6
7
8
9
10
11
12
13
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/iterator_for.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
14
15
#include <migraphx/gpu/argmax.hpp>
#include <migraphx/gpu/argmin.hpp>
Paul's avatar
Paul committed
16
17
18
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp>
19
#include <migraphx/gpu/quant_convolution.hpp>
Paul's avatar
Paul committed
20
21
#include <migraphx/gpu/contiguous.hpp>
#include <migraphx/gpu/relu.hpp>
Khalique's avatar
Khalique committed
22
23
#include <migraphx/gpu/sigmoid.hpp>
#include <migraphx/gpu/abs.hpp>
Paul's avatar
Paul committed
24
#include <migraphx/gpu/leaky_relu.hpp>
Khalique's avatar
Khalique committed
25
#include <migraphx/gpu/elu.hpp>
Paul's avatar
Paul committed
26
#include <migraphx/gpu/softmax.hpp>
27
#include <migraphx/gpu/logsoftmax.hpp>
Paul's avatar
Paul committed
28
#include <migraphx/gpu/add.hpp>
29
#include <migraphx/gpu/sub.hpp>
30
#include <migraphx/gpu/div.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
31
#include <migraphx/gpu/exp.hpp>
32
#include <migraphx/gpu/erf.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
33
#include <migraphx/gpu/log.hpp>
34
#include <migraphx/gpu/sin.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
35
#include <migraphx/gpu/sign.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
36
37
#include <migraphx/gpu/cos.hpp>
#include <migraphx/gpu/tan.hpp>
38
39
#include <migraphx/gpu/sinh.hpp>
#include <migraphx/gpu/cosh.hpp>
40
#include <migraphx/gpu/tanh.hpp>
41
42
43
#include <migraphx/gpu/asin.hpp>
#include <migraphx/gpu/acos.hpp>
#include <migraphx/gpu/atan.hpp>
Paul's avatar
Paul committed
44
#include <migraphx/gpu/mul.hpp>
Khalique's avatar
Khalique committed
45
46
#include <migraphx/gpu/max.hpp>
#include <migraphx/gpu/min.hpp>
Paul's avatar
Paul committed
47
48
49
50
#include <migraphx/gpu/batchnorm.hpp>
#include <migraphx/gpu/pooling.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/concat.hpp>
51
#include <migraphx/gpu/pad.hpp>
52
#include <migraphx/gpu/gather.hpp>
Khalique's avatar
Khalique committed
53
#include <migraphx/gpu/lrn.hpp>
54
#include <migraphx/gpu/convert.hpp>
Khalique's avatar
Khalique committed
55
#include <migraphx/gpu/clip.hpp>
Paul's avatar
Paul committed
56
#include <migraphx/gpu/reduce_sum.hpp>
57
#include <migraphx/gpu/round.hpp>
Khalique's avatar
Khalique committed
58
#include <migraphx/gpu/rsqrt.hpp>
59
#include <migraphx/gpu/sqrt.hpp>
60
#include <migraphx/gpu/reduce_mean.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
61
#include <migraphx/gpu/pow.hpp>
Khalique's avatar
Khalique committed
62
#include <migraphx/gpu/sqdiff.hpp>
63
#include <migraphx/gpu/int8_conv_pack.hpp>
Paul's avatar
Paul committed
64
#include <utility>
65
#include <functional>
Khalique's avatar
Khalique committed
66
#include <algorithm>
Paul's avatar
Paul committed
67

Paul's avatar
Paul committed
68
namespace migraphx {
Paul's avatar
Paul committed
69
inline namespace MIGRAPHX_INLINE_NS {
Paul's avatar
Paul committed
70
namespace gpu {
Paul's avatar
Paul committed
71
72
73

struct miopen_apply
{
Paul's avatar
Paul committed
74
    program* prog = nullptr;
Paul's avatar
Paul committed
75
    context ctx{};
Shucai Xiao's avatar
Shucai Xiao committed
76
    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
Shucai Xiao's avatar
Shucai Xiao committed
77
    instruction_ref last{};
Paul's avatar
Paul committed
78

Paul's avatar
Paul committed
79
80
81
82
83
84
85
    void check_shape(shape x, instruction_ref i)
    {
        assert(x == i->get_shape());
        (void)x;
        (void)i;
    }

86
87
    void init()
    {
Shucai Xiao's avatar
Shucai Xiao committed
88
        this->last = instruction::get_output_alias(std::prev(prog->end()));
Paul's avatar
Paul committed
89

90
91
92
93
94
95
        add_miopen_simple_op<miopen_abs>("abs", make_abs);

        add_miopen_extend_op<miopen_leaky_relu, op::leaky_relu>("leaky_relu", make_leaky_relu);
        add_miopen_extend_op<miopen_elu, op::elu>("elu", make_elu);

        add_generic_op<hip_add>("add");
96
        add_generic_op<hip_sub>("sub");
97
        add_generic_op<hip_exp>("exp");
Shucai Xiao's avatar
Shucai Xiao committed
98
        add_generic_op<hip_erf>("erf");
99
        add_generic_op<hip_log>("log");
100
101
102
103
104
        add_generic_op<hip_sin>("sin");
        add_generic_op<hip_cos>("cos");
        add_generic_op<hip_tan>("tan");
        add_generic_op<hip_sinh>("sinh");
        add_generic_op<hip_cosh>("cosh");
105
        add_generic_op<hip_tanh>("tanh");
106
107
108
        add_generic_op<hip_asin>("asin");
        add_generic_op<hip_acos>("acos");
        add_generic_op<hip_atan>("atan");
109
        add_generic_op<hip_sqrt>("sqrt");
110
        add_generic_op<hip_mul>("mul");
111
        add_generic_op<hip_div>("div");
Khalique's avatar
Khalique committed
112
113
        add_generic_op<hip_max>("max");
        add_generic_op<hip_min>("min");
Khalique's avatar
Khalique committed
114
        add_generic_op<hip_rsqrt>("rsqrt");
115
        add_generic_op<hip_round>("round");
116
        add_generic_op<hip_pow>("pow");
Khalique's avatar
Khalique committed
117
        add_generic_op<hip_sqdiff>("sqdiff");
118
        add_generic_op<hip_relu>("relu");
Shucai Xiao's avatar
Shucai Xiao committed
119
        add_generic_op<hip_sign>("sign");
120
        add_generic_op<hip_sigmoid>("sigmoid");
121
122
123

        add_extend_op<miopen_contiguous, op::contiguous>("contiguous");
        add_extend_op<hip_concat, op::concat>("concat");
Khalique's avatar
Khalique committed
124
        add_extend_op<hip_softmax, op::softmax>("softmax");
125
        add_extend_op<hip_logsoftmax, op::logsoftmax>("logsoftmax");
126
127
        add_extend_op<hip_argmax, op::argmax>("argmax");
        add_extend_op<hip_argmin, op::argmin>("argmin");
Khalique's avatar
Khalique committed
128
        add_extend_op<hip_gather, op::gather>("gather");
129
        add_extend_op<hip_pad, op::pad>("pad");
130
        add_extend_op<hip_convert, op::convert>("convert");
Khalique's avatar
Khalique committed
131
        add_extend_op<hip_clip, op::clip>("clip");
Paul's avatar
Paul committed
132
        add_extend_op<hip_reduce_sum, op::reduce_sum>("reduce_sum");
133
        add_extend_op<hip_reduce_mean, op::reduce_mean>("reduce_mean");
134
135
        add_gemm_op<op::dot>("dot");
        add_gemm_op<op::quant_dot>("quant_dot");
136

Khalique's avatar
Khalique committed
137
        add_lrn_op();
138
        add_convolution_op();
139
        add_quant_convolution_op();
140
141
        add_pooling_op();
        add_batch_norm_inference_op();
142
143
    }

Paul's avatar
Paul committed
144
145
    void apply()
    {
146
        init();
Paul's avatar
Paul committed
147
148
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
Paul's avatar
Paul committed
149
            auto s = it->get_shape();
150
            if(apply_map.count(it->name()) > 0)
151
            {
152
                check_shape(s, apply_map.at(it->name())(it));
Paul's avatar
Paul committed
153
            }
Paul's avatar
Paul committed
154
155
156
        }
    }

Paul's avatar
Paul committed
157
    instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "")
Paul's avatar
Paul committed
158
    {
Shucai Xiao's avatar
Shucai Xiao committed
159
        if(ins == last and tag.empty())
Paul's avatar
Paul committed
160
161
162
163
164
        {
            return prog->add_parameter("output", s);
        }
        else
        {
165
            auto result = prog->insert_instruction(ins, hip_allocate{s, std::move(tag)});
Paul's avatar
Paul committed
166
167
168
169
            return result;
        }
    }

Shucai Xiao's avatar
Shucai Xiao committed
170
    void add_convolution_op()
Paul's avatar
Paul committed
171
    {
172
173
        apply_map.emplace("convolution", [=](instruction_ref ins) {
            auto&& op = any_cast<op::convolution>(ins->get_operator());
Paul's avatar
Paul committed
174

175
            auto conv = miopen_convolution{op, make_conv(op)};
Paul's avatar
Paul committed
176
            auto ws   = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));
Paul's avatar
Paul committed
177

178
179
            auto workspace = insert_allocation(ins, ws, "workspace");
            auto output    = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
180

181
182
183
            return prog->replace_instruction(
                ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
        });
Paul's avatar
Paul committed
184
185
    }

186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
    template <class Op>
    void add_gemm_op(std::string name)
    {
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto&& op                         = any_cast<Op>(ins->get_operator());
            auto beta                         = op.beta;
            std::vector<instruction_ref> refs = ins->inputs();
            if((refs.size() == 2) or (refs.size() == 3 and refs.back()->outputs().size() > 1) or
               (ins == last))
            {
                auto output = insert_allocation(ins, ins->get_shape());
                if(refs.size() == 2)
                {
                    beta = 0;
                    refs.push_back(output);
                }
                else
                {
                    auto copy_out = prog->insert_instruction(ins, hip_copy{}, refs.back(), output);
                    refs.back()   = copy_out;
                    refs.push_back(copy_out);
                }
            }
            else
            {
                refs.push_back(refs.back());
            }

            return prog->replace_instruction(ins, rocblas_gemm<Op>{Op{op.alpha, beta}}, refs);
        });
    }

218
219
220
221
222
223
224
    void add_quant_convolution_op()
    {
        apply_map.emplace("quant_convolution", [=](instruction_ref ins) {
            auto&& op = any_cast<op::quant_convolution>(ins->get_operator());
            auto conv = miopen_quant_convolution{op, make_conv(op)};
            auto ws   = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));

Shucai Xiao's avatar
Shucai Xiao committed
225
            auto args      = ins->inputs();
226
            auto workspace = insert_allocation(ins, ws, "workspace");
Shucai Xiao's avatar
Shucai Xiao committed
227
228
            auto output    = insert_allocation(ins, ins->get_shape());

Shucai Xiao's avatar
Shucai Xiao committed
229
            return prog->replace_instruction(ins, conv, args[0], args[1], workspace, output);
Shucai Xiao's avatar
Shucai Xiao committed
230
231
232
        });
    }

Shucai Xiao's avatar
Shucai Xiao committed
233
    void add_pooling_op()
Paul's avatar
Paul committed
234
    {
235
236
237
238
        apply_map.emplace("pooling", [=](instruction_ref ins) {
            auto&& op   = any_cast<op::pooling>(ins->get_operator());
            auto pd     = make_pooling(op);
            auto output = insert_allocation(ins, ins->get_shape());
239

240
241
242
            return prog->replace_instruction(
                ins, miopen_pooling{op, std::move(pd)}, ins->inputs().at(0), output);
        });
Paul's avatar
Paul committed
243
    }
244

Khalique's avatar
Khalique committed
245
    void add_lrn_op()
Khalique's avatar
Khalique committed
246
    {
Khalique's avatar
Khalique committed
247
        apply_map.emplace("lrn", [=](instruction_ref ins) {
Khalique's avatar
Khalique committed
248
249
250
251
252
253
            auto&& op   = any_cast<op::lrn>(ins->get_operator());
            auto ldesc  = make_lrn(op);
            auto output = insert_allocation(ins, ins->get_shape());
            return prog->replace_instruction(
                ins, miopen_lrn{std::move(ldesc)}, ins->inputs().at(0), output);
        });
Khalique's avatar
Khalique committed
254
    }
Paul's avatar
Paul committed
255

Shucai Xiao's avatar
Shucai Xiao committed
256
    template <class T>
257
    void add_generic_op(std::string name)
Paul's avatar
Paul committed
258
    {
259
260
261
262
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto output                       = insert_allocation(ins, ins->get_shape());
            std::vector<instruction_ref> refs = ins->inputs();
            refs.push_back(output);
Paul's avatar
Paul committed
263

264
265
            return prog->replace_instruction(ins, T{}, refs);
        });
Paul's avatar
Paul committed
266
    }
Paul's avatar
Paul committed
267

Shucai Xiao's avatar
Shucai Xiao committed
268
    template <class T, class Op>
269
    void add_extend_op(std::string name)
Khalique's avatar
Khalique committed
270
    {
271
272
273
274
275
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto&& op                         = any_cast<Op>(ins->get_operator());
            auto output                       = insert_allocation(ins, ins->get_shape());
            std::vector<instruction_ref> refs = ins->inputs();
            refs.push_back(output);
Paul's avatar
Paul committed
276

277
278
            return prog->replace_instruction(ins, T{op}, refs);
        });
Khalique's avatar
Khalique committed
279
280
    }

Shucai Xiao's avatar
Shucai Xiao committed
281
    template <class T, class Op, class F>
282
    void add_miopen_extend_op(std::string name, F f)
Paul's avatar
Paul committed
283
    {
Shucai Xiao's avatar
Shucai Xiao committed
284
        apply_map.emplace(name, [=](instruction_ref ins) {
285
286
            auto&& op = any_cast<Op>(ins->get_operator());
            auto ad   = f(op.alpha);
287

288
            auto output = insert_allocation(ins, ins->get_shape());
Shucai Xiao's avatar
Shucai Xiao committed
289
            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
290
        });
291
    }
292

Shucai Xiao's avatar
Shucai Xiao committed
293
    template <class T, class F>
294
    void add_miopen_simple_op(std::string name, F f)
295
    {
Shucai Xiao's avatar
Shucai Xiao committed
296
297
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto ad     = f();
298
            auto output = insert_allocation(ins, ins->get_shape());
Shucai Xiao's avatar
Shucai Xiao committed
299
            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
300
        });
301
302
    }

Shucai Xiao's avatar
Shucai Xiao committed
303
    void add_batch_norm_inference_op()
304
    {
305
306
307
308
309
310
311
312
        apply_map.emplace("batch_norm_inference", [=](instruction_ref ins) {
            auto&& op       = any_cast<op::batch_norm_inference>(ins->get_operator());
            auto output     = insert_allocation(ins, ins->get_shape());
            shape old_shape = ins->inputs().at(1)->get_shape();
            std::vector<int64_t> new_shape{1, static_cast<int64_t>(old_shape.elements()), 1, 1};
            auto reshape_op = op::reshape{new_shape};
            std::vector<instruction_ref> reshapes;
            std::transform(ins->inputs().begin() + 1,
Shucai Xiao's avatar
Shucai Xiao committed
313
314
315
                           ins->inputs().end(),
                           std::back_inserter(reshapes),
                           [&](auto i) { return prog->insert_instruction(ins, reshape_op, i); });
316
            return prog->replace_instruction(ins,
Shucai Xiao's avatar
Shucai Xiao committed
317
318
319
320
321
322
323
                                             miopen_batch_norm_inference{op},
                                             ins->inputs().at(0),
                                             reshapes[0],
                                             reshapes[1],
                                             reshapes[2],
                                             reshapes[3],
                                             output);
324
        });
325
    }
Paul's avatar
Paul committed
326
327
};

Paul's avatar
Paul committed
328
void lowering::apply(program& p) const { miopen_apply{&p, ctx}.apply(); }
Paul's avatar
Paul committed
329
} // namespace gpu
Paul's avatar
Paul committed
330
} // namespace MIGRAPHX_INLINE_NS
Paul's avatar
Paul committed
331
} // namespace migraphx