lowering.cpp 17.2 KB
Newer Older
Paul's avatar
Paul committed
1

Paul's avatar
Paul committed
2
3
4
#include <migraphx/cpu/lowering.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp>
5
#include <migraphx/op/identity.hpp>
6
#include <migraphx/op/batch_norm_inference.hpp>
Paul's avatar
Paul committed
7
#include <migraphx/op/convolution.hpp>
kahmed10's avatar
kahmed10 committed
8
#include <migraphx/op/deconvolution.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
9
#include <migraphx/op/quant_convolution.hpp>
Paul's avatar
Paul committed
10
#include <migraphx/op/dot.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
11
#include <migraphx/op/quant_dot.hpp>
Paul's avatar
Paul committed
12
13
14
15
16
17
18
19
#include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/logsoftmax.hpp>
#include <migraphx/op/lrn.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/softmax.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
20
21
#include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
22
#include <migraphx/op/rnn_var_sl_last_output.hpp>
Paul's avatar
Paul committed
23
24
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
Paul's avatar
Paul committed
25
#include <migraphx/par_dfor.hpp>
26
#include <migraphx/clamp.hpp>
27
#include <migraphx/cpu/context.hpp>
28
#include <migraphx/register_op.hpp>
29
#include <migraphx/make_op.hpp>
30
#include <migraphx/program.hpp>
31
#include <migraphx/tune_axis.hpp>
32
33
34
35
#include <migraphx/match/layernorm.hpp>
#include <migraphx/match/gelu_erf.hpp>
#include <migraphx/match/gelu_tanh.hpp>
#include <migraphx/matcher.hpp>
Paul's avatar
Paul committed
36
#include <unordered_map>
Paul's avatar
Paul committed
37
#include <utility>
kahmed10's avatar
kahmed10 committed
38
#include <iostream>
Paul's avatar
Paul committed
39

Paul's avatar
Paul committed
40
namespace migraphx {
Paul's avatar
Paul committed
41
inline namespace MIGRAPHX_INLINE_NS {
Paul's avatar
Paul committed
42
43
44
45
46
47
48
49
namespace cpu {

template <typename T>
T zero(const T&)
{
    return T(0);
}

Khalique's avatar
Khalique committed
50
51
52
53
template <class T>
typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::enable_if<true, T>>::
    type
    make_signed(T x)
Khalique's avatar
Khalique committed
54
55
56
57
{
    return x;
}

Scott Thornton's avatar
Scott Thornton committed
58
59
struct cpu_im2col
{
60
    op::im2col op;
Scott Thornton's avatar
Scott Thornton committed
61

62
63
64
65
66
67
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

Scott Thornton's avatar
Scott Thornton committed
68
69
    static std::string name() { return "cpu::im2col"; }
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
Scott Thornton's avatar
Scott Thornton committed
70

wsttiger's avatar
wsttiger committed
71
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
Scott Thornton's avatar
Scott Thornton committed
72
    {
Scott Thornton's avatar
Scott Thornton committed
73
        argument result{output_shape};
Scott Thornton's avatar
Scott Thornton committed
74
        auto input_shape   = args[0].get_shape();
Scott Thornton's avatar
Scott Thornton committed
75
76
        auto weights_shape = args[1].get_shape();
        visit_all(result, args[0])([&](auto col, auto input) {
Scott Thornton's avatar
Scott Thornton committed
77
78
            const std::size_t& height   = input_shape.lens()[2];
            const std::size_t& width    = input_shape.lens()[3];
Scott Thornton's avatar
Scott Thornton committed
79
80
81
            const std::size_t& channels = weights_shape.lens()[1];
            const std::size_t& kernel_h = weights_shape.lens()[2];
            const std::size_t& kernel_w = weights_shape.lens()[3];
Scott Thornton's avatar
Scott Thornton committed
82
83
            const std::size_t& pad_h    = op.padding[0];
            const std::size_t& pad_w    = op.padding[1];
Scott Thornton's avatar
Scott Thornton committed
84
85
86
            const std::size_t& stride_h = op.stride[0];
            const std::size_t& stride_w = op.stride[1];

Paul's avatar
Paul committed
87
88
            long kdiv2_h = long(kernel_h) / 2;
            long kdiv2_w = long(kernel_w) / 2;
Scott Thornton's avatar
Scott Thornton committed
89
            // calculate output sizes
Scott Thornton's avatar
Scott Thornton committed
90
91
            const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
            const std::size_t col_width  = (width - kernel_w + 2 * pad_w) / stride_w + 1;
wsttiger's avatar
wsttiger committed
92
            // account for padding for the starting position of the input pixels
Paul's avatar
Paul committed
93
            long iinput = kdiv2_h - long(pad_h);
wsttiger's avatar
wsttiger committed
94
            // loop over output pixels (ioutput, joutput)
Scott Thornton's avatar
Scott Thornton committed
95
96
            for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h)
            {
Paul's avatar
Paul committed
97
                long jinput = kdiv2_w - long(pad_w);
Scott Thornton's avatar
Scott Thornton committed
98
99
100
101
102
                for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w)
                {
                    // compute linear index for output
                    std::size_t ldx = ioutput * col_width + joutput;
                    std::size_t p   = 0;
wsttiger's avatar
wsttiger committed
103
104
105
                    dfor(channels,
                         kernel_h,
                         kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
Paul's avatar
Paul committed
106
107
                        auto idx    = iinput + long(koffset) - kdiv2_h;
                        auto jdx    = jinput + long(loffset) - kdiv2_w;
wsttiger's avatar
wsttiger committed
108
109
110
111
112
                        col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
                                          ? input(0, c, idx, jdx)
                                          : 0;
                        p++;
                    });
Scott Thornton's avatar
Scott Thornton committed
113
114
                }
            }
Scott Thornton's avatar
Scott Thornton committed
115
        });
Scott Thornton's avatar
Scott Thornton committed
116
117
118
        return result;
    }
};
119
MIGRAPHX_REGISTER_OP(cpu_im2col)
Scott Thornton's avatar
Scott Thornton committed
120

121
struct cpu_op
Paul's avatar
Paul committed
122
{
123
    operation op = op::identity{};
kahmed10's avatar
kahmed10 committed
124
125
126
127
128
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }
129
    std::string name() const { return "cpu::op"; }
Paul's avatar
Paul committed
130
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
Paul's avatar
Paul committed
131
    argument compute(context&, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
132
    {
Paul's avatar
Paul committed
133
        return op.compute(output_shape, args);
Paul's avatar
Paul committed
134
    }
135
136
137
138
139
140
141
142
143
144
145
    value to_value() const
    {
        value v;
        v["name"]     = op.name();
        v["operator"] = op.to_value();
        return v;
    }
    void from_value(const value& v)
    {
        op = make_op(v.at("name").to<std::string>(), v.at("operator"));
    }
146
    friend std::ostream& operator<<(std::ostream& os, const cpu_op& x)
Paul's avatar
Paul committed
147
    {
148
149
        os << "cpu::" << x.op;
        return os;
Paul's avatar
Paul committed
150
151
    }
};
152
MIGRAPHX_REGISTER_OP(cpu_op)
Paul's avatar
Paul committed
153

Khalique's avatar
Khalique committed
154
struct cpu_pad
155
{
Khalique's avatar
Khalique committed
156
    op::pad op;
157
158
159
160
161
162
163

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

kahmed10's avatar
kahmed10 committed
164
    std::string name() const { return "cpu::pad"; }
165
166
167
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
    {
Khalique's avatar
Khalique committed
168
        assert(output_shape.standard());
169
        argument result{output_shape};
170
171
172
173
        result.visit([&](auto output) {
            using type = typename decltype(output)::value_type;
            std::fill(output.begin(), output.end(), pad_clamp<type>(op.value));
        });
Khalique's avatar
Khalique committed
174
175

        visit_all(result, args[0])([&](auto output, auto input) {
176
            shape_for_each(input.get_shape(), [&](const auto& idx) {
Khalique's avatar
Khalique committed
177
                std::vector<std::size_t> new_idx(idx.size());
Khalique's avatar
Khalique committed
178
179
180
181
                std::transform(
                    idx.begin(), idx.end(), op.pads.begin(), new_idx.begin(), [](auto i, auto j) {
                        return i + j;
                    });
Khalique's avatar
Khalique committed
182
                output(new_idx.begin(), new_idx.end()) = input(idx.begin(), idx.end());
183
            });
Khalique's avatar
Khalique committed
184
185
        });

186
187
188
        return result;
    }
};
189
MIGRAPHX_REGISTER_OP(cpu_pad)
190

Khalique's avatar
Khalique committed
191
192
193
194
195
196
struct leaky_relu_op
{
    op::leaky_relu op;
    std::string name() const { return "cpu::leaky_relu"; }
    auto fcn() const
    {
Paul's avatar
Paul committed
197
        auto a = op.alpha;
Khalique's avatar
Khalique committed
198
199
200
201
        return [a](auto x) { return x > 0 ? x : x * a; };
    }
};

Paul's avatar
Paul committed
202
template <typename Op>
203
struct cpu_unary2 : auto_register_op<cpu_unary2<Op>>
Paul's avatar
Paul committed
204
{
205
    cpu_unary2() = default;
206
207

    template <class T>
208
    cpu_unary2(T pop) : op(Op{std::move(pop)})
209
210
211
    {
    }

Paul's avatar
Paul committed
212
    Op op;
213
214
215
216
217
218

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op.op, f);
    }
Paul's avatar
Paul committed
219
    std::string name() const { return op.name(); }
Shucai Xiao's avatar
Shucai Xiao committed
220
    shape compute_shape(const std::vector<shape>& inputs) const
221
    {
222
        check_shapes{inputs, *this}.has(1);
Shucai Xiao's avatar
Shucai Xiao committed
223
        auto s = inputs.at(0);
224
        return {s.type(), s.lens()};
225
226
    }

Paul's avatar
Paul committed
227
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
228
229
    {
        argument result{output_shape};
230
231
232
        visit_all(result, args[0])([&](auto output, auto input) {
            assert(input.get_shape().standard());
            std::transform(input.begin(), input.end(), output.begin(), op.fcn());
Paul's avatar
Paul committed
233
        });
234

Paul's avatar
Paul committed
235
236
237
        return result;
    }
};
238
template struct cpu_unary2<leaky_relu_op>;
Shucai Xiao's avatar
Shucai Xiao committed
239

Shucai Xiao's avatar
Shucai Xiao committed
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
struct cpu_rnn_var_sl_last_output
{
    op::rnn_var_sl_last_output op;

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

    std::string name() const { return "cpu::rnn_var_sl_last_output"; }

    shape compute_shape(std::vector<shape> inputs) const
    {
        return op.compute_shape(std::move(inputs));
    }

    argument compute(const shape& output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
        auto out_comp_lens = args[0].get_shape().lens();
        out_comp_lens[0]   = 1;
        shape out_comp_s{output_shape.type(), out_comp_lens};

        visit_all(result, args[0])([&](auto output, auto input) {
            args[1].visit([&](auto seq_lens) {
                par_for(output_shape.elements(), [&](auto i) {
                    auto idx = out_comp_s.multi(i);
                    auto b   = idx[2];
                    if(op.direction == op::rnn_direction::reverse or idx[1] == 1)
                    {
                        idx[0] = 0;
                    }
                    else
                    {
                        idx[0] = seq_lens[b] - 1;
                    }
                    output[i] = input(idx.begin(), idx.end());
                });
            });
        });

        return result;
    }
};
285
MIGRAPHX_REGISTER_OP(cpu_rnn_var_sl_last_output)
Shucai Xiao's avatar
Shucai Xiao committed
286

Paul's avatar
Paul committed
287
288
struct cpu_apply
{
Shucai Xiao's avatar
Shucai Xiao committed
289
    module* modl;
290
291
292
    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
    std::unordered_map<instruction_ref, std::string> prog_output_names{};
    instruction_ref last{};
Paul's avatar
Paul committed
293

294
295
    void create_output_names()
    {
Shucai Xiao's avatar
Shucai Xiao committed
296
        this->last = instruction::get_output_alias(std::prev(modl->end()));
297
298
299
300
301
302
303
304
305
306
307
308
309
        if(this->last->name() == "@return")
        {
            const auto& prog_outputs = last->inputs();
            std::vector<instruction_ref> outputs_alias(prog_outputs.size());

            std::transform(prog_outputs.begin(),
                           prog_outputs.end(),
                           outputs_alias.begin(),
                           [](const auto& i) { return instruction::get_output_alias(i); });

            std::size_t index = 0;
            for(auto ins : outputs_alias)
            {
Shucai Xiao's avatar
Shucai Xiao committed
310
                prog_output_names[ins] = modl->name() + ":#output_" + std::to_string(index++);
311
312
313
314
            }
        }
    }

315
    void extend_op(const std::string& op_name, const std::string& cpu_name, bool allocate = true)
316
317
318
319
    {
        apply_map.emplace(op_name, [=](instruction_ref ins) {
            auto&& op = ins->get_operator();
            if(allocate)
320
                return replace(ins, make_op(cpu_name, op.to_value()));
Shucai Xiao's avatar
Shucai Xiao committed
321
            return modl->replace_instruction(ins, make_op(cpu_name, op.to_value()), ins->inputs());
322
323
324
        });
    }

325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
    void extend_dnnl_algos(const std::string& dnnl_name,
                           const std::vector<std::pair<std::string, std::string>>& algos)
    {
        for(auto&& pp : algos)
        {
            std::string op_name = pp.first;
            std::string algo    = pp.second;
            apply_map.emplace(op_name, [=](instruction_ref ins) {
                auto v = ins->get_operator().to_value();
                if(not v.is_object())
                    return ins;
                v["algo"] = algo;
                auto op   = make_op(dnnl_name, v);
                return replace(ins, op);
            });
        }
    }

343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
    template <class M>
    auto fuse_match(M matcher, const operation& op, const std::vector<std::string>& bind_inputs)
    {
        return match::make_match_finder(matcher, [=](auto&, const auto& r) {
            auto ins = r.result;
            std::vector<instruction_ref> inputs;
            std::transform(bind_inputs.begin(),
                           bind_inputs.end(),
                           std::back_inserter(inputs),
                           [&](const auto& s) { return r.instructions.at(s); });
            inputs.push_back(this->insert_allocation(ins, ins->get_shape()));
            this->modl->replace_instruction(ins, op, inputs);
        });
    }

Paul's avatar
Paul committed
358
359
    void init()
    {
360
        create_output_names();
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
        extend_dnnl_algos("dnnl::binary",
                          {
                              {"add", "binary_add"},
                              {"div", "binary_div"},
                              {"max", "binary_max"},
                              {"min", "binary_min"},
                              {"mul", "binary_mul"},
                          });

        extend_dnnl_algos("dnnl::eltwise",
                          {
                              {"abs", "eltwise_abs"},
                              {"elu", "eltwise_elu"},
                              {"exp", "eltwise_exp"},
                              {"log", "eltwise_log"},
                              {"relu", "eltwise_relu"},
                              {"sqrt", "eltwise_sqrt"},
                              {"tanh", "eltwise_tanh"},
                          });

        extend_dnnl_algos("dnnl::reduction",
                          {
                              {"reduce_max", "reduction_max"},
                              {"reduce_mean", "reduction_mean"},
                              {"reduce_min", "reduction_min"},
                              {"reduce_sum", "reduction_sum"},
                          });

        extend_op("concat", "dnnl::concat");
        extend_op("contiguous", "dnnl::reorder");
        extend_op("convolution", "dnnl::convolution");
        extend_op("deconvolution", "dnnl::deconvolution");
        extend_op("dot", "dnnl::dot");
        extend_op("erf", "cpu::erf");
        extend_op("gather", "cpu::gather");
        extend_op("logsoftmax", "dnnl::logsoftmax");
        extend_op("lrn", "dnnl::lrn");
        extend_op("softmax", "dnnl::softmax");
        extend_op("sub", "cpu::sub");

        extend_op("im2col", "cpu::im2col", false);
        extend_op("leaky_relu", "cpu::leaky_relu", false);
        extend_op("pad", "cpu::pad", false);
        extend_op("rnn_var_sl_last_output", "cpu::rnn_var_sl_last_output", false);
Paul's avatar
Paul committed
405
406
407
408
409
    }

    void apply()
    {
        init();
410
411
412
413
414
415
416
417
418
        // Apply fusion matchers first
        match::find_matches(*modl,
                            fuse_match(match::gelu_erf(),
                                       make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_erf"}}),
                                       {"x"}),
                            fuse_match(match::gelu_tanh(),
                                       make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_tanh"}}),
                                       {"x"}),
                            fuse_match(match::layernorm(), make_op("dnnl::layernorm"), {"x"}));
419
        // Apply these operators first so the inputs can be const folded
Shucai Xiao's avatar
Shucai Xiao committed
420
        for(auto it : iterator_for(*modl))
Paul's avatar
Paul committed
421
        {
422
            if(it->name() == "pow")
423
            {
424
                apply_pow(it);
425
            }
426
427
428
429
        }
        for(auto it : iterator_for(*modl))
        {
            if(it->name() == "pooling")
Paul's avatar
Paul committed
430
431
432
            {
                apply_pooling(it);
            }
Paul's avatar
Paul committed
433
            else if(apply_map.count(it->name()) > 0)
Paul's avatar
Paul committed
434
            {
Paul's avatar
Paul committed
435
                apply_map.at(it->name())(it);
Paul's avatar
Paul committed
436
437
438
439
            }
        }
    }

440
    instruction_ref apply_pow(instruction_ref ins)
441
    {
442
443
444
445
446
447
448
        auto beta = read_scalar<float>(ins->inputs()[1]);
        if(beta.empty())
            return ins;
        return replace(ins,
                       make_op("dnnl::eltwise",
                               {{"algo", "eltwise_pow"}, {"alpha", 1.0}, {"beta", beta.front()}}),
                       {ins->inputs().front()});
449
450
    }

451
    instruction_ref apply_pooling(instruction_ref ins)
Paul's avatar
Paul committed
452
    {
453
454
455
456
457
458
459
460
461
462
463
        auto&& op = ins->get_operator();
        auto v    = op.to_value();
        if(has_op("dnnl::pooling") and ins->get_shape().type() == shape::type_t::float_type and
           not v["ceil_mode"].to<bool>())
            return replace(ins, make_op("dnnl::pooling", op.to_value()));
        std::string mode = v["mode"].to<std::string>();
        if(mode == "max")
            return replace(ins, make_op("cpu::pooling_max", v));
        else if(mode == "average")
            return replace(ins, make_op("cpu::pooling_average", v));
        return ins;
Paul's avatar
Paul committed
464
465
    }

466
467
468
469
470
471
472
473
474
475
476
477
478
    template <class T>
    static std::vector<T> read_scalar(instruction_ref ins)
    {
        if(ins->name() == "contiguous")
            return read_scalar<T>(ins->inputs().front());
        if(ins->get_shape().elements() != 1 and not ins->get_shape().scalar())
            return {};
        auto r = ins->eval();
        if(r.empty())
            return {};
        return {r.at<T>()};
    }

479
    instruction_ref replace(instruction_ref ins, const operation& op)
Paul's avatar
Paul committed
480
    {
481
482
483
484
485
486
        return replace(ins, op, ins->inputs());
    }

    instruction_ref
    replace(instruction_ref ins, const operation& op, std::vector<instruction_ref> inputs)
    {
487
        inputs.push_back(insert_allocation(ins, ins->get_shape()));
Shucai Xiao's avatar
Shucai Xiao committed
488
        return modl->replace_instruction(ins, op, inputs);
Paul's avatar
Paul committed
489
490
    }

491
    instruction_ref insert_allocation(instruction_ref ins, const shape& s)
Paul's avatar
Paul committed
492
    {
493
494
495
        auto ins_alias = instruction::get_output_alias(ins);
        if(last->name() == "@return" and prog_output_names.count(ins_alias) > 0)
        {
Shucai Xiao's avatar
Shucai Xiao committed
496
            return modl->add_parameter(prog_output_names[ins_alias], s);
497
498
499
        }
        else if(ins == last)
        {
Shucai Xiao's avatar
Shucai Xiao committed
500
            return modl->add_parameter("output", s);
501
502
        }

Shucai Xiao's avatar
Shucai Xiao committed
503
        return modl->insert_instruction(ins, make_op("cpu::allocate", {{"shape", to_value(s)}}));
Paul's avatar
Paul committed
504
505
506
    }
};

Shucai Xiao's avatar
Shucai Xiao committed
507
void lowering::apply(module& m) const { cpu_apply{&m}.apply(); }
Paul's avatar
Paul committed
508
509

} // namespace cpu
Paul's avatar
Paul committed
510
} // namespace MIGRAPHX_INLINE_NS
Paul's avatar
Paul committed
511
} // namespace migraphx