lowering.cpp 17 KB
Newer Older
Paul's avatar
Paul committed
1

Paul's avatar
Paul committed
2
3
4
#include <migraphx/cpu/lowering.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp>
5
#include <migraphx/op/identity.hpp>
6
#include <migraphx/op/batch_norm_inference.hpp>
Paul's avatar
Paul committed
7
#include <migraphx/op/convolution.hpp>
kahmed10's avatar
kahmed10 committed
8
#include <migraphx/op/deconvolution.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
9
#include <migraphx/op/quant_convolution.hpp>
Paul's avatar
Paul committed
10
#include <migraphx/op/dot.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
11
#include <migraphx/op/quant_dot.hpp>
Paul's avatar
Paul committed
12
13
14
15
16
17
18
19
#include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/logsoftmax.hpp>
#include <migraphx/op/lrn.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/softmax.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
20
21
#include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
22
#include <migraphx/op/rnn_var_sl_last_output.hpp>
Paul's avatar
Paul committed
23
24
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
Paul's avatar
Paul committed
25
#include <migraphx/par_dfor.hpp>
26
#include <migraphx/clamp.hpp>
27
#include <migraphx/cpu/context.hpp>
28
#include <migraphx/register_op.hpp>
29
#include <migraphx/make_op.hpp>
30
#include <migraphx/program.hpp>
31
#include <migraphx/tune_axis.hpp>
32
33
34
35
#include <migraphx/match/layernorm.hpp>
#include <migraphx/match/gelu_erf.hpp>
#include <migraphx/match/gelu_tanh.hpp>
#include <migraphx/matcher.hpp>
Paul's avatar
Paul committed
36
#include <unordered_map>
Paul's avatar
Paul committed
37
#include <utility>
kahmed10's avatar
kahmed10 committed
38
#include <iostream>
Paul's avatar
Paul committed
39

Paul's avatar
Paul committed
40
namespace migraphx {
Paul's avatar
Paul committed
41
inline namespace MIGRAPHX_INLINE_NS {
Paul's avatar
Paul committed
42
43
44
45
46
47
48
49
namespace cpu {

template <typename T>
T zero(const T&)
{
    return T(0);
}

Khalique's avatar
Khalique committed
50
51
52
53
template <class T>
typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::enable_if<true, T>>::
    type
    make_signed(T x)
Khalique's avatar
Khalique committed
54
55
56
57
{
    return x;
}

Scott Thornton's avatar
Scott Thornton committed
58
59
struct cpu_im2col
{
60
    op::im2col op;
Scott Thornton's avatar
Scott Thornton committed
61

62
63
64
65
66
67
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

Scott Thornton's avatar
Scott Thornton committed
68
    static std::string name() { return "cpu::im2col"; }
kahmed10's avatar
kahmed10 committed
69
70
71
72
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        return op.normalize_compute_shape(inputs);
    }
Scott Thornton's avatar
Scott Thornton committed
73

wsttiger's avatar
wsttiger committed
74
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
Scott Thornton's avatar
Scott Thornton committed
75
    {
Scott Thornton's avatar
Scott Thornton committed
76
        argument result{output_shape};
Scott Thornton's avatar
Scott Thornton committed
77
        auto input_shape   = args[0].get_shape();
Scott Thornton's avatar
Scott Thornton committed
78
79
        auto weights_shape = args[1].get_shape();
        visit_all(result, args[0])([&](auto col, auto input) {
Scott Thornton's avatar
Scott Thornton committed
80
81
            const std::size_t& height   = input_shape.lens()[2];
            const std::size_t& width    = input_shape.lens()[3];
Scott Thornton's avatar
Scott Thornton committed
82
83
84
            const std::size_t& channels = weights_shape.lens()[1];
            const std::size_t& kernel_h = weights_shape.lens()[2];
            const std::size_t& kernel_w = weights_shape.lens()[3];
Scott Thornton's avatar
Scott Thornton committed
85
86
            const std::size_t& pad_h    = op.padding[0];
            const std::size_t& pad_w    = op.padding[1];
Scott Thornton's avatar
Scott Thornton committed
87
88
89
            const std::size_t& stride_h = op.stride[0];
            const std::size_t& stride_w = op.stride[1];

Paul's avatar
Paul committed
90
91
            long kdiv2_h = long(kernel_h) / 2;
            long kdiv2_w = long(kernel_w) / 2;
Scott Thornton's avatar
Scott Thornton committed
92
            // calculate output sizes
Scott Thornton's avatar
Scott Thornton committed
93
94
            const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
            const std::size_t col_width  = (width - kernel_w + 2 * pad_w) / stride_w + 1;
wsttiger's avatar
wsttiger committed
95
            // account for padding for the starting position of the input pixels
Paul's avatar
Paul committed
96
            long iinput = kdiv2_h - long(pad_h);
wsttiger's avatar
wsttiger committed
97
            // loop over output pixels (ioutput, joutput)
Scott Thornton's avatar
Scott Thornton committed
98
99
            for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h)
            {
Paul's avatar
Paul committed
100
                long jinput = kdiv2_w - long(pad_w);
Scott Thornton's avatar
Scott Thornton committed
101
102
103
104
105
                for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w)
                {
                    // compute linear index for output
                    std::size_t ldx = ioutput * col_width + joutput;
                    std::size_t p   = 0;
wsttiger's avatar
wsttiger committed
106
107
108
                    dfor(channels,
                         kernel_h,
                         kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
Paul's avatar
Paul committed
109
110
                        auto idx    = iinput + long(koffset) - kdiv2_h;
                        auto jdx    = jinput + long(loffset) - kdiv2_w;
wsttiger's avatar
wsttiger committed
111
112
113
114
115
                        col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
                                          ? input(0, c, idx, jdx)
                                          : 0;
                        p++;
                    });
Scott Thornton's avatar
Scott Thornton committed
116
117
                }
            }
Scott Thornton's avatar
Scott Thornton committed
118
        });
Scott Thornton's avatar
Scott Thornton committed
119
120
121
        return result;
    }
};
122
MIGRAPHX_REGISTER_OP(cpu_im2col)
Scott Thornton's avatar
Scott Thornton committed
123

124
struct cpu_op
Paul's avatar
Paul committed
125
{
126
    operation op = op::identity{};
kahmed10's avatar
kahmed10 committed
127
128
129
130
131
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }
132
    std::string name() const { return "cpu::op"; }
Paul's avatar
Paul committed
133
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
Paul's avatar
Paul committed
134
    argument compute(context&, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
135
    {
Paul's avatar
Paul committed
136
        return op.compute(output_shape, args);
Paul's avatar
Paul committed
137
    }
138
139
140
141
142
143
144
145
146
147
148
    value to_value() const
    {
        value v;
        v["name"]     = op.name();
        v["operator"] = op.to_value();
        return v;
    }
    void from_value(const value& v)
    {
        op = make_op(v.at("name").to<std::string>(), v.at("operator"));
    }
149
    friend std::ostream& operator<<(std::ostream& os, const cpu_op& x)
Paul's avatar
Paul committed
150
    {
151
152
        os << "cpu::" << x.op;
        return os;
Paul's avatar
Paul committed
153
154
    }
};
155
MIGRAPHX_REGISTER_OP(cpu_op)
Paul's avatar
Paul committed
156

Khalique's avatar
Khalique committed
157
struct cpu_pad
158
{
Khalique's avatar
Khalique committed
159
    op::pad op;
160
161
162
163
164
165
166

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

kahmed10's avatar
kahmed10 committed
167
    std::string name() const { return "cpu::pad"; }
168
169
170
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
    {
Khalique's avatar
Khalique committed
171
        assert(output_shape.standard());
172
        argument result{output_shape};
173
174
175
176
        result.visit([&](auto output) {
            using type = typename decltype(output)::value_type;
            std::fill(output.begin(), output.end(), pad_clamp<type>(op.value));
        });
Khalique's avatar
Khalique committed
177
178

        visit_all(result, args[0])([&](auto output, auto input) {
179
            shape_for_each(input.get_shape(), [&](const auto& idx) {
Khalique's avatar
Khalique committed
180
                std::vector<std::size_t> new_idx(idx.size());
Khalique's avatar
Khalique committed
181
182
183
184
                std::transform(
                    idx.begin(), idx.end(), op.pads.begin(), new_idx.begin(), [](auto i, auto j) {
                        return i + j;
                    });
Khalique's avatar
Khalique committed
185
                output(new_idx.begin(), new_idx.end()) = input(idx.begin(), idx.end());
186
            });
Khalique's avatar
Khalique committed
187
188
        });

189
190
191
        return result;
    }
};
192
MIGRAPHX_REGISTER_OP(cpu_pad)
193

Khalique's avatar
Khalique committed
194
195
196
197
198
199
struct leaky_relu_op
{
    op::leaky_relu op;
    std::string name() const { return "cpu::leaky_relu"; }
    auto fcn() const
    {
Paul's avatar
Paul committed
200
        auto a = op.alpha;
Khalique's avatar
Khalique committed
201
202
203
204
        return [a](auto x) { return x > 0 ? x : x * a; };
    }
};

Paul's avatar
Paul committed
205
template <typename Op>
206
struct cpu_unary2 : auto_register_op<cpu_unary2<Op>>
Paul's avatar
Paul committed
207
{
208
    cpu_unary2() = default;
209
210

    template <class T>
211
    cpu_unary2(T pop) : op(Op{std::move(pop)})
212
213
214
    {
    }

Paul's avatar
Paul committed
215
    Op op;
216
217
218
219
220
221

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op.op, f);
    }
Paul's avatar
Paul committed
222
    std::string name() const { return op.name(); }
Shucai Xiao's avatar
Shucai Xiao committed
223
    shape compute_shape(const std::vector<shape>& inputs) const
224
    {
225
        check_shapes{inputs, *this}.has(1);
Shucai Xiao's avatar
Shucai Xiao committed
226
        auto s = inputs.at(0);
227
        return {s.type(), s.lens()};
228
229
    }

Paul's avatar
Paul committed
230
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
231
232
    {
        argument result{output_shape};
233
234
235
        visit_all(result, args[0])([&](auto output, auto input) {
            assert(input.get_shape().standard());
            std::transform(input.begin(), input.end(), output.begin(), op.fcn());
Paul's avatar
Paul committed
236
        });
237

Paul's avatar
Paul committed
238
239
240
        return result;
    }
};
241
template struct cpu_unary2<leaky_relu_op>;
Shucai Xiao's avatar
Shucai Xiao committed
242

Shucai Xiao's avatar
Shucai Xiao committed
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
struct cpu_rnn_var_sl_last_output
{
    op::rnn_var_sl_last_output op;

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

    std::string name() const { return "cpu::rnn_var_sl_last_output"; }

    shape compute_shape(std::vector<shape> inputs) const
    {
        return op.compute_shape(std::move(inputs));
    }

    argument compute(const shape& output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
        auto out_comp_lens = args[0].get_shape().lens();
        out_comp_lens[0]   = 1;
        shape out_comp_s{output_shape.type(), out_comp_lens};

        visit_all(result, args[0])([&](auto output, auto input) {
            args[1].visit([&](auto seq_lens) {
                par_for(output_shape.elements(), [&](auto i) {
                    auto idx = out_comp_s.multi(i);
                    auto b   = idx[2];
                    if(op.direction == op::rnn_direction::reverse or idx[1] == 1)
                    {
                        idx[0] = 0;
                    }
                    else
                    {
                        idx[0] = seq_lens[b] - 1;
                    }
                    output[i] = input(idx.begin(), idx.end());
                });
            });
        });

        return result;
    }
};
288
MIGRAPHX_REGISTER_OP(cpu_rnn_var_sl_last_output)
Shucai Xiao's avatar
Shucai Xiao committed
289

Paul's avatar
Paul committed
290
291
struct cpu_apply
{
Shucai Xiao's avatar
Shucai Xiao committed
292
    module* modl;
293
294
295
    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
    std::unordered_map<instruction_ref, std::string> prog_output_names{};
    instruction_ref last{};
Paul's avatar
Paul committed
296

297
298
    void create_output_names()
    {
Shucai Xiao's avatar
Shucai Xiao committed
299
        this->last = instruction::get_output_alias(std::prev(modl->end()));
300
301
302
303
304
305
306
307
308
309
310
311
312
        if(this->last->name() == "@return")
        {
            const auto& prog_outputs = last->inputs();
            std::vector<instruction_ref> outputs_alias(prog_outputs.size());

            std::transform(prog_outputs.begin(),
                           prog_outputs.end(),
                           outputs_alias.begin(),
                           [](const auto& i) { return instruction::get_output_alias(i); });

            std::size_t index = 0;
            for(auto ins : outputs_alias)
            {
Shucai Xiao's avatar
Shucai Xiao committed
313
                prog_output_names[ins] = modl->name() + ":#output_" + std::to_string(index++);
314
315
316
317
            }
        }
    }

318
    void extend_op(const std::string& op_name, const std::string& cpu_name, bool allocate = true)
319
320
321
322
    {
        apply_map.emplace(op_name, [=](instruction_ref ins) {
            auto&& op = ins->get_operator();
            if(allocate)
323
                return replace(ins, make_op(cpu_name, op.to_value()));
Shucai Xiao's avatar
Shucai Xiao committed
324
            return modl->replace_instruction(ins, make_op(cpu_name, op.to_value()), ins->inputs());
325
326
327
        });
    }

328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
    void extend_dnnl_algos(const std::string& dnnl_name,
                           const std::vector<std::pair<std::string, std::string>>& algos)
    {
        for(auto&& pp : algos)
        {
            std::string op_name = pp.first;
            std::string algo    = pp.second;
            apply_map.emplace(op_name, [=](instruction_ref ins) {
                auto v = ins->get_operator().to_value();
                if(not v.is_object())
                    return ins;
                v["algo"] = algo;
                auto op   = make_op(dnnl_name, v);
                return replace(ins, op);
            });
        }
    }

346
347
348
349
350
351
352
353
354
355
356
    template <class M>
    auto fuse_match(M matcher, const operation& op, const std::vector<std::string>& bind_inputs)
    {
        return match::make_match_finder(matcher, [=](auto&, const auto& r) {
            auto ins = r.result;
            std::vector<instruction_ref> inputs;
            std::transform(bind_inputs.begin(),
                           bind_inputs.end(),
                           std::back_inserter(inputs),
                           [&](const auto& s) { return r.instructions.at(s); });
            inputs.push_back(this->insert_allocation(ins, ins->get_shape()));
357
            modl->replace_instruction(ins, op, inputs);
358
359
360
        });
    }

Paul's avatar
Paul committed
361
362
    void init()
    {
363
        create_output_names();
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
        extend_dnnl_algos("dnnl::binary",
                          {
                              {"add", "binary_add"},
                              {"div", "binary_div"},
                              {"max", "binary_max"},
                              {"min", "binary_min"},
                              {"mul", "binary_mul"},
                          });

        extend_dnnl_algos("dnnl::eltwise",
                          {
                              {"abs", "eltwise_abs"},
                              {"elu", "eltwise_elu"},
                              {"exp", "eltwise_exp"},
                              {"log", "eltwise_log"},
                              {"relu", "eltwise_relu"},
                              {"sqrt", "eltwise_sqrt"},
                              {"tanh", "eltwise_tanh"},
                          });

        extend_dnnl_algos("dnnl::reduction",
                          {
                              {"reduce_max", "reduction_max"},
                              {"reduce_mean", "reduction_mean"},
                              {"reduce_min", "reduction_min"},
                              {"reduce_sum", "reduction_sum"},
                          });

        extend_op("concat", "dnnl::concat");
        extend_op("contiguous", "dnnl::reorder");
        extend_op("convolution", "dnnl::convolution");
395
#ifndef MIGRAPHX_ENABLE_ZENDNN
396
397
        extend_op("deconvolution", "dnnl::deconvolution");
        extend_op("dot", "dnnl::dot");
398
#endif
399
400
401
402
403
404
405
406
407
408
409
        extend_op("erf", "cpu::erf");
        extend_op("gather", "cpu::gather");
        extend_op("logsoftmax", "dnnl::logsoftmax");
        extend_op("lrn", "dnnl::lrn");
        extend_op("softmax", "dnnl::softmax");
        extend_op("sub", "cpu::sub");

        extend_op("im2col", "cpu::im2col", false);
        extend_op("leaky_relu", "cpu::leaky_relu", false);
        extend_op("pad", "cpu::pad", false);
        extend_op("rnn_var_sl_last_output", "cpu::rnn_var_sl_last_output", false);
Paul's avatar
Paul committed
410
411
412
413
414
    }

    void apply()
    {
        init();
415
416
417
418
419
420
421
422
423
        // Apply fusion matchers first
        match::find_matches(*modl,
                            fuse_match(match::gelu_erf(),
                                       make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_erf"}}),
                                       {"x"}),
                            fuse_match(match::gelu_tanh(),
                                       make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_tanh"}}),
                                       {"x"}),
                            fuse_match(match::layernorm(), make_op("dnnl::layernorm"), {"x"}));
424
        // Apply these operators first so the inputs can be const folded
Shucai Xiao's avatar
Shucai Xiao committed
425
        for(auto it : iterator_for(*modl))
Paul's avatar
Paul committed
426
        {
427
            if(it->name() == "pow")
428
            {
429
                apply_pow(it);
430
            }
431
432
433
434
        }
        for(auto it : iterator_for(*modl))
        {
            if(it->name() == "pooling")
Paul's avatar
Paul committed
435
436
437
            {
                apply_pooling(it);
            }
Paul's avatar
Paul committed
438
            else if(apply_map.count(it->name()) > 0)
Paul's avatar
Paul committed
439
            {
Paul's avatar
Paul committed
440
                apply_map.at(it->name())(it);
Paul's avatar
Paul committed
441
442
443
444
            }
        }
    }

445
    instruction_ref apply_pow(instruction_ref ins) const
446
    {
447
448
449
450
451
452
453
        auto beta = read_scalar<float>(ins->inputs()[1]);
        if(beta.empty())
            return ins;
        return replace(ins,
                       make_op("dnnl::eltwise",
                               {{"algo", "eltwise_pow"}, {"alpha", 1.0}, {"beta", beta.front()}}),
                       {ins->inputs().front()});
454
455
    }

456
    instruction_ref apply_pooling(instruction_ref ins) const
Paul's avatar
Paul committed
457
    {
458
459
460
461
462
463
464
465
466
467
468
        auto&& op = ins->get_operator();
        auto v    = op.to_value();
        if(has_op("dnnl::pooling") and ins->get_shape().type() == shape::type_t::float_type and
           not v["ceil_mode"].to<bool>())
            return replace(ins, make_op("dnnl::pooling", op.to_value()));
        std::string mode = v["mode"].to<std::string>();
        if(mode == "max")
            return replace(ins, make_op("cpu::pooling_max", v));
        else if(mode == "average")
            return replace(ins, make_op("cpu::pooling_average", v));
        return ins;
Paul's avatar
Paul committed
469
470
    }

471
472
473
474
475
476
477
478
479
480
481
482
483
    template <class T>
    static std::vector<T> read_scalar(instruction_ref ins)
    {
        if(ins->name() == "contiguous")
            return read_scalar<T>(ins->inputs().front());
        if(ins->get_shape().elements() != 1 and not ins->get_shape().scalar())
            return {};
        auto r = ins->eval();
        if(r.empty())
            return {};
        return {r.at<T>()};
    }

484
    instruction_ref replace(instruction_ref ins, const operation& op) const
Paul's avatar
Paul committed
485
    {
486
487
488
489
        return replace(ins, op, ins->inputs());
    }

    instruction_ref
490
    replace(instruction_ref ins, const operation& op, std::vector<instruction_ref> inputs) const
491
    {
492
        inputs.push_back(insert_allocation(ins, ins->get_shape()));
Shucai Xiao's avatar
Shucai Xiao committed
493
        return modl->replace_instruction(ins, op, inputs);
Paul's avatar
Paul committed
494
495
    }

496
    instruction_ref insert_allocation(instruction_ref ins, const shape& s) const
Paul's avatar
Paul committed
497
    {
Shucai Xiao's avatar
Shucai Xiao committed
498
        return modl->insert_instruction(ins, make_op("cpu::allocate", {{"shape", to_value(s)}}));
Paul's avatar
Paul committed
499
500
501
    }
};

Shucai Xiao's avatar
Shucai Xiao committed
502
void lowering::apply(module& m) const { cpu_apply{&m}.apply(); }
Paul's avatar
Paul committed
503
504

} // namespace cpu
Paul's avatar
Paul committed
505
} // namespace MIGRAPHX_INLINE_NS
Paul's avatar
Paul committed
506
} // namespace migraphx