lowering.cpp 16.8 KB
Newer Older
Paul's avatar
Paul committed
1

Paul's avatar
Paul committed
2
3
4
#include <migraphx/cpu/lowering.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp>
5
#include <migraphx/op/identity.hpp>
6
#include <migraphx/op/batch_norm_inference.hpp>
Paul's avatar
Paul committed
7
#include <migraphx/op/convolution.hpp>
kahmed10's avatar
kahmed10 committed
8
#include <migraphx/op/deconvolution.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
9
#include <migraphx/op/quant_convolution.hpp>
Paul's avatar
Paul committed
10
#include <migraphx/op/dot.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
11
#include <migraphx/op/quant_dot.hpp>
Paul's avatar
Paul committed
12
13
14
15
16
17
18
19
#include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/logsoftmax.hpp>
#include <migraphx/op/lrn.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/softmax.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
20
21
#include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
22
#include <migraphx/op/rnn_var_sl_last_output.hpp>
Paul's avatar
Paul committed
23
24
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
Paul's avatar
Paul committed
25
#include <migraphx/par_dfor.hpp>
26
#include <migraphx/clamp.hpp>
27
#include <migraphx/cpu/context.hpp>
28
#include <migraphx/register_op.hpp>
29
#include <migraphx/make_op.hpp>
30
#include <migraphx/program.hpp>
31
#include <migraphx/tune_axis.hpp>
32
33
34
35
#include <migraphx/match/layernorm.hpp>
#include <migraphx/match/gelu_erf.hpp>
#include <migraphx/match/gelu_tanh.hpp>
#include <migraphx/matcher.hpp>
Paul's avatar
Paul committed
36
#include <unordered_map>
Paul's avatar
Paul committed
37
#include <utility>
kahmed10's avatar
kahmed10 committed
38
#include <iostream>
Paul's avatar
Paul committed
39

Paul's avatar
Paul committed
40
namespace migraphx {
Paul's avatar
Paul committed
41
inline namespace MIGRAPHX_INLINE_NS {
Paul's avatar
Paul committed
42
43
44
45
46
47
48
49
namespace cpu {

template <typename T>
T zero(const T&)
{
    return T(0);
}

Khalique's avatar
Khalique committed
50
51
52
53
template <class T>
typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::enable_if<true, T>>::
    type
    make_signed(T x)
Khalique's avatar
Khalique committed
54
55
56
57
{
    return x;
}

Scott Thornton's avatar
Scott Thornton committed
58
59
struct cpu_im2col
{
60
    op::im2col op;
Scott Thornton's avatar
Scott Thornton committed
61

62
63
64
65
66
67
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

Scott Thornton's avatar
Scott Thornton committed
68
    static std::string name() { return "cpu::im2col"; }
kahmed10's avatar
kahmed10 committed
69
70
71
72
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        return op.normalize_compute_shape(inputs);
    }
Scott Thornton's avatar
Scott Thornton committed
73

wsttiger's avatar
wsttiger committed
74
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
Scott Thornton's avatar
Scott Thornton committed
75
    {
Scott Thornton's avatar
Scott Thornton committed
76
        argument result{output_shape};
Scott Thornton's avatar
Scott Thornton committed
77
        auto input_shape   = args[0].get_shape();
Scott Thornton's avatar
Scott Thornton committed
78
79
        auto weights_shape = args[1].get_shape();
        visit_all(result, args[0])([&](auto col, auto input) {
Shucai Xiao's avatar
Shucai Xiao committed
80
81
82
83
84
85
86
87
88
            const int& height   = input_shape.lens()[2];
            const int& width    = input_shape.lens()[3];
            const int& channels = weights_shape.lens()[1];
            const int& kernel_h = weights_shape.lens()[2];
            const int& kernel_w = weights_shape.lens()[3];
            const int& pad_h    = op.padding[0];
            const int& pad_w    = op.padding[1];
            const int& stride_h = op.stride[0];
            const int& stride_w = op.stride[1];
Scott Thornton's avatar
Scott Thornton committed
89

Paul's avatar
Paul committed
90
91
            long kdiv2_h = long(kernel_h) / 2;
            long kdiv2_w = long(kernel_w) / 2;
Scott Thornton's avatar
Scott Thornton committed
92
            // calculate output sizes
Shucai Xiao's avatar
Shucai Xiao committed
93
94
            const int col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
            const int col_width  = (width - kernel_w + 2 * pad_w) / stride_w + 1;
wsttiger's avatar
wsttiger committed
95
            // account for padding for the starting position of the input pixels
Paul's avatar
Paul committed
96
            long iinput = kdiv2_h - long(pad_h);
wsttiger's avatar
wsttiger committed
97
            // loop over output pixels (ioutput, joutput)
Shucai Xiao's avatar
Shucai Xiao committed
98
            for(int ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h)
Scott Thornton's avatar
Scott Thornton committed
99
            {
Paul's avatar
Paul committed
100
                long jinput = kdiv2_w - long(pad_w);
Shucai Xiao's avatar
Shucai Xiao committed
101
                for(int joutput = 0; joutput < col_width; joutput++, jinput += stride_w)
Scott Thornton's avatar
Scott Thornton committed
102
103
                {
                    // compute linear index for output
Shucai Xiao's avatar
Shucai Xiao committed
104
105
                    int ldx = ioutput * col_width + joutput;
                    int p   = 0;
Shucai Xiao's avatar
Shucai Xiao committed
106
                    dfor(channels, kernel_h, kernel_w)([&](int c, int koffset, int loffset) {
Paul's avatar
Paul committed
107
108
                        auto idx    = iinput + long(koffset) - kdiv2_h;
                        auto jdx    = jinput + long(loffset) - kdiv2_w;
wsttiger's avatar
wsttiger committed
109
110
111
112
113
                        col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
                                          ? input(0, c, idx, jdx)
                                          : 0;
                        p++;
                    });
Scott Thornton's avatar
Scott Thornton committed
114
115
                }
            }
Scott Thornton's avatar
Scott Thornton committed
116
        });
Scott Thornton's avatar
Scott Thornton committed
117
118
119
        return result;
    }
};
120
MIGRAPHX_REGISTER_OP(cpu_im2col)
Scott Thornton's avatar
Scott Thornton committed
121

122
struct cpu_op
Paul's avatar
Paul committed
123
{
124
    operation op = op::identity{};
kahmed10's avatar
kahmed10 committed
125
126
127
128
129
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }
130
    std::string name() const { return "cpu::op"; }
Paul's avatar
Paul committed
131
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
Paul's avatar
Paul committed
132
    argument compute(context&, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
133
    {
Paul's avatar
Paul committed
134
        return op.compute(output_shape, args);
Paul's avatar
Paul committed
135
    }
136
137
138
139
140
141
142
143
144
145
146
    value to_value() const
    {
        value v;
        v["name"]     = op.name();
        v["operator"] = op.to_value();
        return v;
    }
    void from_value(const value& v)
    {
        op = make_op(v.at("name").to<std::string>(), v.at("operator"));
    }
147
    friend std::ostream& operator<<(std::ostream& os, const cpu_op& x)
Paul's avatar
Paul committed
148
    {
149
150
        os << "cpu::" << x.op;
        return os;
Paul's avatar
Paul committed
151
152
    }
};
153
MIGRAPHX_REGISTER_OP(cpu_op)
Paul's avatar
Paul committed
154

Khalique's avatar
Khalique committed
155
struct cpu_pad
156
{
Khalique's avatar
Khalique committed
157
    op::pad op;
158
159
160
161
162
163
164

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

kahmed10's avatar
kahmed10 committed
165
    std::string name() const { return "cpu::pad"; }
166
167
168
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
    {
Khalique's avatar
Khalique committed
169
        assert(output_shape.standard());
170
        argument result{output_shape};
171
172
173
174
        result.visit([&](auto output) {
            using type = typename decltype(output)::value_type;
            std::fill(output.begin(), output.end(), pad_clamp<type>(op.value));
        });
Khalique's avatar
Khalique committed
175
176

        visit_all(result, args[0])([&](auto output, auto input) {
177
            shape_for_each(input.get_shape(), [&](const auto& idx) {
Shucai Xiao's avatar
Shucai Xiao committed
178
                std::vector<int> new_idx(idx.size());
Khalique's avatar
Khalique committed
179
180
181
182
                std::transform(
                    idx.begin(), idx.end(), op.pads.begin(), new_idx.begin(), [](auto i, auto j) {
                        return i + j;
                    });
Khalique's avatar
Khalique committed
183
                output(new_idx.begin(), new_idx.end()) = input(idx.begin(), idx.end());
184
            });
Khalique's avatar
Khalique committed
185
186
        });

187
188
189
        return result;
    }
};
190
MIGRAPHX_REGISTER_OP(cpu_pad)
191

Khalique's avatar
Khalique committed
192
193
194
195
196
197
struct leaky_relu_op
{
    op::leaky_relu op;
    std::string name() const { return "cpu::leaky_relu"; }
    auto fcn() const
    {
Paul's avatar
Paul committed
198
        auto a = op.alpha;
Khalique's avatar
Khalique committed
199
200
201
202
        return [a](auto x) { return x > 0 ? x : x * a; };
    }
};

Paul's avatar
Paul committed
203
template <typename Op>
204
struct cpu_unary2 : auto_register_op<cpu_unary2<Op>>
Paul's avatar
Paul committed
205
{
206
    cpu_unary2() = default;
207
208

    template <class T>
209
    cpu_unary2(T pop) : op(Op{std::move(pop)})
210
211
212
    {
    }

Paul's avatar
Paul committed
213
    Op op;
214
215
216
217
218
219

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op.op, f);
    }
Paul's avatar
Paul committed
220
    std::string name() const { return op.name(); }
Shucai Xiao's avatar
Shucai Xiao committed
221
    shape compute_shape(const std::vector<shape>& inputs) const
222
    {
223
        check_shapes{inputs, *this}.has(1);
Shucai Xiao's avatar
Shucai Xiao committed
224
        auto s = inputs.at(0);
225
        return {s.type(), s.lens()};
226
227
    }

Paul's avatar
Paul committed
228
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
229
230
    {
        argument result{output_shape};
231
232
233
        visit_all(result, args[0])([&](auto output, auto input) {
            assert(input.get_shape().standard());
            std::transform(input.begin(), input.end(), output.begin(), op.fcn());
Paul's avatar
Paul committed
234
        });
235

Paul's avatar
Paul committed
236
237
238
        return result;
    }
};
239
template struct cpu_unary2<leaky_relu_op>;
Shucai Xiao's avatar
Shucai Xiao committed
240

Shucai Xiao's avatar
Shucai Xiao committed
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
struct cpu_rnn_var_sl_last_output
{
    op::rnn_var_sl_last_output op;

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

    std::string name() const { return "cpu::rnn_var_sl_last_output"; }

    shape compute_shape(std::vector<shape> inputs) const
    {
        return op.compute_shape(std::move(inputs));
    }

    argument compute(const shape& output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
        auto out_comp_lens = args[0].get_shape().lens();
        out_comp_lens[0]   = 1;
        shape out_comp_s{output_shape.type(), out_comp_lens};

        visit_all(result, args[0])([&](auto output, auto input) {
            args[1].visit([&](auto seq_lens) {
                par_for(output_shape.elements(), [&](auto i) {
                    auto idx = out_comp_s.multi(i);
                    auto b   = idx[2];
                    if(op.direction == op::rnn_direction::reverse or idx[1] == 1)
                    {
                        idx[0] = 0;
                    }
                    else
                    {
                        idx[0] = seq_lens[b] - 1;
                    }
                    output[i] = input(idx.begin(), idx.end());
                });
            });
        });

        return result;
    }
};
286
MIGRAPHX_REGISTER_OP(cpu_rnn_var_sl_last_output)
Shucai Xiao's avatar
Shucai Xiao committed
287

Paul's avatar
Paul committed
288
289
struct cpu_apply
{
Shucai Xiao's avatar
Shucai Xiao committed
290
    module* modl;
291
292
293
    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
    std::unordered_map<instruction_ref, std::string> prog_output_names{};
    instruction_ref last{};
Paul's avatar
Paul committed
294

295
296
    void create_output_names()
    {
Shucai Xiao's avatar
Shucai Xiao committed
297
        this->last = instruction::get_output_alias(std::prev(modl->end()));
298
299
300
301
302
303
304
305
306
307
        if(this->last->name() == "@return")
        {
            const auto& prog_outputs = last->inputs();
            std::vector<instruction_ref> outputs_alias(prog_outputs.size());

            std::transform(prog_outputs.begin(),
                           prog_outputs.end(),
                           outputs_alias.begin(),
                           [](const auto& i) { return instruction::get_output_alias(i); });

Shucai Xiao's avatar
Shucai Xiao committed
308
            int index = 0;
309
310
            for(auto ins : outputs_alias)
            {
Shucai Xiao's avatar
Shucai Xiao committed
311
                prog_output_names[ins] = modl->name() + ":#output_" + std::to_string(index++);
312
313
314
315
            }
        }
    }

316
    void extend_op(const std::string& op_name, const std::string& cpu_name, bool allocate = true)
317
318
319
320
    {
        apply_map.emplace(op_name, [=](instruction_ref ins) {
            auto&& op = ins->get_operator();
            if(allocate)
321
                return replace(ins, make_op(cpu_name, op.to_value()));
Shucai Xiao's avatar
Shucai Xiao committed
322
            return modl->replace_instruction(ins, make_op(cpu_name, op.to_value()), ins->inputs());
323
324
325
        });
    }

326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
    void extend_dnnl_algos(const std::string& dnnl_name,
                           const std::vector<std::pair<std::string, std::string>>& algos)
    {
        for(auto&& pp : algos)
        {
            std::string op_name = pp.first;
            std::string algo    = pp.second;
            apply_map.emplace(op_name, [=](instruction_ref ins) {
                auto v = ins->get_operator().to_value();
                if(not v.is_object())
                    return ins;
                v["algo"] = algo;
                auto op   = make_op(dnnl_name, v);
                return replace(ins, op);
            });
        }
    }

344
345
346
347
348
349
350
351
352
353
354
    template <class M>
    auto fuse_match(M matcher, const operation& op, const std::vector<std::string>& bind_inputs)
    {
        return match::make_match_finder(matcher, [=](auto&, const auto& r) {
            auto ins = r.result;
            std::vector<instruction_ref> inputs;
            std::transform(bind_inputs.begin(),
                           bind_inputs.end(),
                           std::back_inserter(inputs),
                           [&](const auto& s) { return r.instructions.at(s); });
            inputs.push_back(this->insert_allocation(ins, ins->get_shape()));
355
            modl->replace_instruction(ins, op, inputs);
356
357
358
        });
    }

Paul's avatar
Paul committed
359
360
    void init()
    {
361
        create_output_names();
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
        extend_dnnl_algos("dnnl::binary",
                          {
                              {"add", "binary_add"},
                              {"div", "binary_div"},
                              {"max", "binary_max"},
                              {"min", "binary_min"},
                              {"mul", "binary_mul"},
                          });

        extend_dnnl_algos("dnnl::eltwise",
                          {
                              {"abs", "eltwise_abs"},
                              {"elu", "eltwise_elu"},
                              {"exp", "eltwise_exp"},
                              {"log", "eltwise_log"},
                              {"relu", "eltwise_relu"},
                              {"sqrt", "eltwise_sqrt"},
                              {"tanh", "eltwise_tanh"},
                          });

        extend_dnnl_algos("dnnl::reduction",
                          {
                              {"reduce_max", "reduction_max"},
                              {"reduce_mean", "reduction_mean"},
                              {"reduce_min", "reduction_min"},
                              {"reduce_sum", "reduction_sum"},
                          });

        extend_op("concat", "dnnl::concat");
        extend_op("contiguous", "dnnl::reorder");
        extend_op("convolution", "dnnl::convolution");
393
#ifndef MIGRAPHX_ENABLE_ZENDNN
394
395
        extend_op("deconvolution", "dnnl::deconvolution");
        extend_op("dot", "dnnl::dot");
396
#endif
397
398
399
400
401
402
403
404
405
406
407
        extend_op("erf", "cpu::erf");
        extend_op("gather", "cpu::gather");
        extend_op("logsoftmax", "dnnl::logsoftmax");
        extend_op("lrn", "dnnl::lrn");
        extend_op("softmax", "dnnl::softmax");
        extend_op("sub", "cpu::sub");

        extend_op("im2col", "cpu::im2col", false);
        extend_op("leaky_relu", "cpu::leaky_relu", false);
        extend_op("pad", "cpu::pad", false);
        extend_op("rnn_var_sl_last_output", "cpu::rnn_var_sl_last_output", false);
Paul's avatar
Paul committed
408
409
410
411
412
    }

    void apply()
    {
        init();
413
414
415
416
417
418
419
420
421
        // Apply fusion matchers first
        match::find_matches(*modl,
                            fuse_match(match::gelu_erf(),
                                       make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_erf"}}),
                                       {"x"}),
                            fuse_match(match::gelu_tanh(),
                                       make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_tanh"}}),
                                       {"x"}),
                            fuse_match(match::layernorm(), make_op("dnnl::layernorm"), {"x"}));
422
        // Apply these operators first so the inputs can be const folded
Shucai Xiao's avatar
Shucai Xiao committed
423
        for(auto it : iterator_for(*modl))
Paul's avatar
Paul committed
424
        {
425
            if(it->name() == "pow")
426
            {
427
                apply_pow(it);
428
            }
429
430
431
432
        }
        for(auto it : iterator_for(*modl))
        {
            if(it->name() == "pooling")
Paul's avatar
Paul committed
433
434
435
            {
                apply_pooling(it);
            }
Paul's avatar
Paul committed
436
            else if(apply_map.count(it->name()) > 0)
Paul's avatar
Paul committed
437
            {
Paul's avatar
Paul committed
438
                apply_map.at(it->name())(it);
Paul's avatar
Paul committed
439
440
441
442
            }
        }
    }

443
    instruction_ref apply_pow(instruction_ref ins) const
444
    {
445
446
447
448
449
450
451
        auto beta = read_scalar<float>(ins->inputs()[1]);
        if(beta.empty())
            return ins;
        return replace(ins,
                       make_op("dnnl::eltwise",
                               {{"algo", "eltwise_pow"}, {"alpha", 1.0}, {"beta", beta.front()}}),
                       {ins->inputs().front()});
452
453
    }

454
    instruction_ref apply_pooling(instruction_ref ins) const
Paul's avatar
Paul committed
455
    {
456
457
458
459
460
461
462
463
464
465
466
        auto&& op = ins->get_operator();
        auto v    = op.to_value();
        if(has_op("dnnl::pooling") and ins->get_shape().type() == shape::type_t::float_type and
           not v["ceil_mode"].to<bool>())
            return replace(ins, make_op("dnnl::pooling", op.to_value()));
        std::string mode = v["mode"].to<std::string>();
        if(mode == "max")
            return replace(ins, make_op("cpu::pooling_max", v));
        else if(mode == "average")
            return replace(ins, make_op("cpu::pooling_average", v));
        return ins;
Paul's avatar
Paul committed
467
468
    }

469
470
471
472
473
474
475
476
477
478
479
480
481
    template <class T>
    static std::vector<T> read_scalar(instruction_ref ins)
    {
        if(ins->name() == "contiguous")
            return read_scalar<T>(ins->inputs().front());
        if(ins->get_shape().elements() != 1 and not ins->get_shape().scalar())
            return {};
        auto r = ins->eval();
        if(r.empty())
            return {};
        return {r.at<T>()};
    }

482
    instruction_ref replace(instruction_ref ins, const operation& op) const
Paul's avatar
Paul committed
483
    {
484
485
486
487
        return replace(ins, op, ins->inputs());
    }

    instruction_ref
488
    replace(instruction_ref ins, const operation& op, std::vector<instruction_ref> inputs) const
489
    {
490
        inputs.push_back(insert_allocation(ins, ins->get_shape()));
Shucai Xiao's avatar
Shucai Xiao committed
491
        return modl->replace_instruction(ins, op, inputs);
Paul's avatar
Paul committed
492
493
    }

494
    instruction_ref insert_allocation(instruction_ref ins, const shape& s) const
Paul's avatar
Paul committed
495
    {
Shucai Xiao's avatar
Shucai Xiao committed
496
        return modl->insert_instruction(ins, make_op("cpu::allocate", {{"shape", to_value(s)}}));
Paul's avatar
Paul committed
497
498
499
    }
};

Shucai Xiao's avatar
Shucai Xiao committed
500
void lowering::apply(module& m) const { cpu_apply{&m}.apply(); }
Paul's avatar
Paul committed
501
502

} // namespace cpu
Paul's avatar
Paul committed
503
} // namespace MIGRAPHX_INLINE_NS
Paul's avatar
Paul committed
504
} // namespace migraphx