lowering.cpp 15.8 KB
Newer Older
Paul's avatar
Paul committed
1

Paul's avatar
Paul committed
2
3
4
#include <migraphx/cpu/lowering.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp>
5
#include <migraphx/op/identity.hpp>
6
#include <migraphx/op/batch_norm_inference.hpp>
Paul's avatar
Paul committed
7
#include <migraphx/op/convolution.hpp>
kahmed10's avatar
kahmed10 committed
8
#include <migraphx/op/deconvolution.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
9
#include <migraphx/op/quant_convolution.hpp>
Paul's avatar
Paul committed
10
#include <migraphx/op/dot.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
11
#include <migraphx/op/quant_dot.hpp>
Paul's avatar
Paul committed
12
13
14
15
16
17
18
19
#include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/logsoftmax.hpp>
#include <migraphx/op/lrn.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/softmax.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
20
21
#include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
22
#include <migraphx/op/rnn_var_sl_last_output.hpp>
Paul's avatar
Paul committed
23
24
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
Paul's avatar
Paul committed
25
#include <migraphx/par_dfor.hpp>
26
#include <migraphx/clamp.hpp>
27
#include <migraphx/cpu/context.hpp>
28
#include <migraphx/register_op.hpp>
29
#include <migraphx/make_op.hpp>
30
#include <migraphx/program.hpp>
31
#include <migraphx/tune_axis.hpp>
Paul's avatar
Paul committed
32
#include <unordered_map>
Paul's avatar
Paul committed
33
#include <utility>
kahmed10's avatar
kahmed10 committed
34
#include <iostream>
Paul's avatar
Paul committed
35

Paul's avatar
Paul committed
36
namespace migraphx {
Paul's avatar
Paul committed
37
inline namespace MIGRAPHX_INLINE_NS {
Paul's avatar
Paul committed
38
39
40
41
42
43
44
45
namespace cpu {

template <typename T>
T zero(const T&)
{
    return T(0);
}

Khalique's avatar
Khalique committed
46
47
48
49
template <class T>
typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::enable_if<true, T>>::
    type
    make_signed(T x)
Khalique's avatar
Khalique committed
50
51
52
53
{
    return x;
}

Scott Thornton's avatar
Scott Thornton committed
54
55
struct cpu_im2col
{
56
    op::im2col op;
Scott Thornton's avatar
Scott Thornton committed
57

58
59
60
61
62
63
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

Scott Thornton's avatar
Scott Thornton committed
64
65
    static std::string name() { return "cpu::im2col"; }
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
Scott Thornton's avatar
Scott Thornton committed
66

wsttiger's avatar
wsttiger committed
67
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
Scott Thornton's avatar
Scott Thornton committed
68
    {
Scott Thornton's avatar
Scott Thornton committed
69
        argument result{output_shape};
Scott Thornton's avatar
Scott Thornton committed
70
        auto input_shape   = args[0].get_shape();
Scott Thornton's avatar
Scott Thornton committed
71
72
        auto weights_shape = args[1].get_shape();
        visit_all(result, args[0])([&](auto col, auto input) {
Scott Thornton's avatar
Scott Thornton committed
73
74
            const std::size_t& height   = input_shape.lens()[2];
            const std::size_t& width    = input_shape.lens()[3];
Scott Thornton's avatar
Scott Thornton committed
75
76
77
            const std::size_t& channels = weights_shape.lens()[1];
            const std::size_t& kernel_h = weights_shape.lens()[2];
            const std::size_t& kernel_w = weights_shape.lens()[3];
Scott Thornton's avatar
Scott Thornton committed
78
79
            const std::size_t& pad_h    = op.padding[0];
            const std::size_t& pad_w    = op.padding[1];
Scott Thornton's avatar
Scott Thornton committed
80
81
82
            const std::size_t& stride_h = op.stride[0];
            const std::size_t& stride_w = op.stride[1];

Paul's avatar
Paul committed
83
84
            long kdiv2_h = long(kernel_h) / 2;
            long kdiv2_w = long(kernel_w) / 2;
Scott Thornton's avatar
Scott Thornton committed
85
            // calculate output sizes
Scott Thornton's avatar
Scott Thornton committed
86
87
            const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
            const std::size_t col_width  = (width - kernel_w + 2 * pad_w) / stride_w + 1;
wsttiger's avatar
wsttiger committed
88
            // account for padding for the starting position of the input pixels
Paul's avatar
Paul committed
89
            long iinput = kdiv2_h - long(pad_h);
wsttiger's avatar
wsttiger committed
90
            // loop over output pixels (ioutput, joutput)
Scott Thornton's avatar
Scott Thornton committed
91
92
            for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h)
            {
Paul's avatar
Paul committed
93
                long jinput = kdiv2_w - long(pad_w);
Scott Thornton's avatar
Scott Thornton committed
94
95
96
97
98
                for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w)
                {
                    // compute linear index for output
                    std::size_t ldx = ioutput * col_width + joutput;
                    std::size_t p   = 0;
wsttiger's avatar
wsttiger committed
99
100
101
                    dfor(channels,
                         kernel_h,
                         kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
Paul's avatar
Paul committed
102
103
                        auto idx    = iinput + long(koffset) - kdiv2_h;
                        auto jdx    = jinput + long(loffset) - kdiv2_w;
wsttiger's avatar
wsttiger committed
104
105
106
107
108
                        col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
                                          ? input(0, c, idx, jdx)
                                          : 0;
                        p++;
                    });
Scott Thornton's avatar
Scott Thornton committed
109
110
                }
            }
Scott Thornton's avatar
Scott Thornton committed
111
        });
Scott Thornton's avatar
Scott Thornton committed
112
113
114
        return result;
    }
};
115
MIGRAPHX_REGISTER_OP(cpu_im2col)
Scott Thornton's avatar
Scott Thornton committed
116

117
struct cpu_op
Paul's avatar
Paul committed
118
{
119
    operation op = op::identity{};
kahmed10's avatar
kahmed10 committed
120
121
122
123
124
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }
125
    std::string name() const { return "cpu::op"; }
Paul's avatar
Paul committed
126
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
Paul's avatar
Paul committed
127
    argument compute(context&, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
128
    {
Paul's avatar
Paul committed
129
        return op.compute(output_shape, args);
Paul's avatar
Paul committed
130
    }
131
132
133
134
135
136
137
138
139
140
141
    value to_value() const
    {
        value v;
        v["name"]     = op.name();
        v["operator"] = op.to_value();
        return v;
    }
    void from_value(const value& v)
    {
        op = make_op(v.at("name").to<std::string>(), v.at("operator"));
    }
142
    friend std::ostream& operator<<(std::ostream& os, const cpu_op& x)
Paul's avatar
Paul committed
143
    {
144
145
        os << "cpu::" << x.op;
        return os;
Paul's avatar
Paul committed
146
147
    }
};
148
MIGRAPHX_REGISTER_OP(cpu_op)
Paul's avatar
Paul committed
149

Khalique's avatar
Khalique committed
150
struct cpu_pad
151
{
Khalique's avatar
Khalique committed
152
    op::pad op;
153
154
155
156
157
158
159

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

kahmed10's avatar
kahmed10 committed
160
    std::string name() const { return "cpu::pad"; }
161
162
163
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
    {
Khalique's avatar
Khalique committed
164
        assert(output_shape.standard());
165
        argument result{output_shape};
166
167
168
169
        result.visit([&](auto output) {
            using type = typename decltype(output)::value_type;
            std::fill(output.begin(), output.end(), pad_clamp<type>(op.value));
        });
Khalique's avatar
Khalique committed
170
171

        visit_all(result, args[0])([&](auto output, auto input) {
172
            shape_for_each(input.get_shape(), [&](const auto& idx) {
Khalique's avatar
Khalique committed
173
                std::vector<std::size_t> new_idx(idx.size());
Khalique's avatar
Khalique committed
174
175
176
177
                std::transform(
                    idx.begin(), idx.end(), op.pads.begin(), new_idx.begin(), [](auto i, auto j) {
                        return i + j;
                    });
Khalique's avatar
Khalique committed
178
                output(new_idx.begin(), new_idx.end()) = input(idx.begin(), idx.end());
179
            });
Khalique's avatar
Khalique committed
180
181
        });

182
183
184
        return result;
    }
};
185
MIGRAPHX_REGISTER_OP(cpu_pad)
186

Khalique's avatar
Khalique committed
187
188
189
190
191
192
struct leaky_relu_op
{
    op::leaky_relu op;
    std::string name() const { return "cpu::leaky_relu"; }
    auto fcn() const
    {
Paul's avatar
Paul committed
193
        auto a = op.alpha;
Khalique's avatar
Khalique committed
194
195
196
197
        return [a](auto x) { return x > 0 ? x : x * a; };
    }
};

Paul's avatar
Paul committed
198
template <typename Op>
199
struct cpu_unary2 : auto_register_op<cpu_unary2<Op>>
Paul's avatar
Paul committed
200
{
201
    cpu_unary2() = default;
202
203

    template <class T>
204
    cpu_unary2(T pop) : op(Op{std::move(pop)})
205
206
207
    {
    }

Paul's avatar
Paul committed
208
    Op op;
209
210
211
212
213
214

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op.op, f);
    }
Paul's avatar
Paul committed
215
    std::string name() const { return op.name(); }
Shucai Xiao's avatar
Shucai Xiao committed
216
    shape compute_shape(const std::vector<shape>& inputs) const
217
    {
218
        check_shapes{inputs, *this}.has(1);
Shucai Xiao's avatar
Shucai Xiao committed
219
        auto s = inputs.at(0);
220
        return {s.type(), s.lens()};
221
222
    }

Paul's avatar
Paul committed
223
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
224
225
    {
        argument result{output_shape};
226
227
228
        visit_all(result, args[0])([&](auto output, auto input) {
            assert(input.get_shape().standard());
            std::transform(input.begin(), input.end(), output.begin(), op.fcn());
Paul's avatar
Paul committed
229
        });
230

Paul's avatar
Paul committed
231
232
233
        return result;
    }
};
234
template struct cpu_unary2<leaky_relu_op>;
Shucai Xiao's avatar
Shucai Xiao committed
235

Shucai Xiao's avatar
Shucai Xiao committed
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
struct cpu_rnn_var_sl_last_output
{
    op::rnn_var_sl_last_output op;

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

    std::string name() const { return "cpu::rnn_var_sl_last_output"; }

    shape compute_shape(std::vector<shape> inputs) const
    {
        return op.compute_shape(std::move(inputs));
    }

    argument compute(const shape& output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
        auto out_comp_lens = args[0].get_shape().lens();
        out_comp_lens[0]   = 1;
        shape out_comp_s{output_shape.type(), out_comp_lens};

        visit_all(result, args[0])([&](auto output, auto input) {
            args[1].visit([&](auto seq_lens) {
                par_for(output_shape.elements(), [&](auto i) {
                    auto idx = out_comp_s.multi(i);
                    auto b   = idx[2];
                    if(op.direction == op::rnn_direction::reverse or idx[1] == 1)
                    {
                        idx[0] = 0;
                    }
                    else
                    {
                        idx[0] = seq_lens[b] - 1;
                    }
                    output[i] = input(idx.begin(), idx.end());
                });
            });
        });

        return result;
    }
};
281
MIGRAPHX_REGISTER_OP(cpu_rnn_var_sl_last_output)
Shucai Xiao's avatar
Shucai Xiao committed
282

Paul's avatar
Paul committed
283
284
struct cpu_apply
{
Shucai Xiao's avatar
Shucai Xiao committed
285
    module* modl;
286
287
288
    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
    std::unordered_map<instruction_ref, std::string> prog_output_names{};
    instruction_ref last{};
Paul's avatar
Paul committed
289

290
291
    void create_output_names()
    {
Shucai Xiao's avatar
Shucai Xiao committed
292
        this->last = instruction::get_output_alias(std::prev(modl->end()));
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
        if(this->last->name() == "@return")
        {
            const auto& prog_outputs = last->inputs();
            std::vector<instruction_ref> outputs_alias(prog_outputs.size());

            std::transform(prog_outputs.begin(),
                           prog_outputs.end(),
                           outputs_alias.begin(),
                           [](const auto& i) { return instruction::get_output_alias(i); });

            std::size_t index = 0;
            for(auto ins : outputs_alias)
            {
                prog_output_names[ins] = "#output_" + std::to_string(index++);
            }
        }
    }

311
    void extend_op(const std::string& op_name, const std::string& cpu_name, bool allocate = true)
312
313
314
315
    {
        apply_map.emplace(op_name, [=](instruction_ref ins) {
            auto&& op = ins->get_operator();
            if(allocate)
316
                return replace(ins, make_op(cpu_name, op.to_value()));
Shucai Xiao's avatar
Shucai Xiao committed
317
            return modl->replace_instruction(ins, make_op(cpu_name, op.to_value()), ins->inputs());
318
319
320
        });
    }

321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
    void extend_dnnl_algos(const std::string& dnnl_name,
                           const std::vector<std::pair<std::string, std::string>>& algos)
    {
        for(auto&& pp : algos)
        {
            std::string op_name = pp.first;
            std::string algo    = pp.second;
            apply_map.emplace(op_name, [=](instruction_ref ins) {
                auto v = ins->get_operator().to_value();
                if(not v.is_object())
                    return ins;
                v["algo"] = algo;
                auto op   = make_op(dnnl_name, v);
                return replace(ins, op);
            });
        }
    }

Paul's avatar
Paul committed
339
340
    void init()
    {
341
        create_output_names();
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
        extend_dnnl_algos("dnnl::binary",
                          {
                              {"add", "binary_add"},
                              {"div", "binary_div"},
                              {"max", "binary_max"},
                              {"min", "binary_min"},
                              {"mul", "binary_mul"},
                          });

        extend_dnnl_algos("dnnl::eltwise",
                          {
                              {"abs", "eltwise_abs"},
                              {"elu", "eltwise_elu"},
                              {"exp", "eltwise_exp"},
                              {"log", "eltwise_log"},
                              {"relu", "eltwise_relu"},
                              {"sqrt", "eltwise_sqrt"},
                              {"tanh", "eltwise_tanh"},
                          });

        extend_dnnl_algos("dnnl::reduction",
                          {
                              {"reduce_max", "reduction_max"},
                              {"reduce_mean", "reduction_mean"},
                              {"reduce_min", "reduction_min"},
                              {"reduce_sum", "reduction_sum"},
                          });

        extend_op("concat", "dnnl::concat");
        extend_op("contiguous", "dnnl::reorder");
        extend_op("convolution", "dnnl::convolution");
        extend_op("deconvolution", "dnnl::deconvolution");
        extend_op("dot", "dnnl::dot");
        extend_op("erf", "cpu::erf");
        extend_op("gather", "cpu::gather");
        extend_op("logsoftmax", "dnnl::logsoftmax");
        extend_op("lrn", "dnnl::lrn");
        extend_op("softmax", "dnnl::softmax");
        extend_op("sub", "cpu::sub");

        extend_op("im2col", "cpu::im2col", false);
        extend_op("leaky_relu", "cpu::leaky_relu", false);
        extend_op("pad", "cpu::pad", false);
        extend_op("rnn_var_sl_last_output", "cpu::rnn_var_sl_last_output", false);
Paul's avatar
Paul committed
386
387
388
389
390
    }

    void apply()
    {
        init();
391
        // Apply these operators first so the inputs can be const folded
Shucai Xiao's avatar
Shucai Xiao committed
392
        for(auto it : iterator_for(*modl))
Paul's avatar
Paul committed
393
        {
394
            if(it->name() == "pow")
395
            {
396
                apply_pow(it);
397
            }
398
399
400
401
        }
        for(auto it : iterator_for(*modl))
        {
            if(it->name() == "pooling")
Paul's avatar
Paul committed
402
403
404
            {
                apply_pooling(it);
            }
Paul's avatar
Paul committed
405
            else if(apply_map.count(it->name()) > 0)
Paul's avatar
Paul committed
406
            {
Paul's avatar
Paul committed
407
                apply_map.at(it->name())(it);
Paul's avatar
Paul committed
408
409
410
411
            }
        }
    }

412
    instruction_ref apply_pow(instruction_ref ins)
413
    {
414
415
416
417
418
419
420
        auto beta = read_scalar<float>(ins->inputs()[1]);
        if(beta.empty())
            return ins;
        return replace(ins,
                       make_op("dnnl::eltwise",
                               {{"algo", "eltwise_pow"}, {"alpha", 1.0}, {"beta", beta.front()}}),
                       {ins->inputs().front()});
421
422
    }

423
    instruction_ref apply_pooling(instruction_ref ins)
Paul's avatar
Paul committed
424
    {
425
426
427
428
429
430
431
432
433
434
435
        auto&& op = ins->get_operator();
        auto v    = op.to_value();
        if(has_op("dnnl::pooling") and ins->get_shape().type() == shape::type_t::float_type and
           not v["ceil_mode"].to<bool>())
            return replace(ins, make_op("dnnl::pooling", op.to_value()));
        std::string mode = v["mode"].to<std::string>();
        if(mode == "max")
            return replace(ins, make_op("cpu::pooling_max", v));
        else if(mode == "average")
            return replace(ins, make_op("cpu::pooling_average", v));
        return ins;
Paul's avatar
Paul committed
436
437
    }

438
439
440
441
442
443
444
445
446
447
448
449
450
    template <class T>
    static std::vector<T> read_scalar(instruction_ref ins)
    {
        if(ins->name() == "contiguous")
            return read_scalar<T>(ins->inputs().front());
        if(ins->get_shape().elements() != 1 and not ins->get_shape().scalar())
            return {};
        auto r = ins->eval();
        if(r.empty())
            return {};
        return {r.at<T>()};
    }

451
    instruction_ref replace(instruction_ref ins, const operation& op)
Paul's avatar
Paul committed
452
    {
453
454
455
456
457
458
        return replace(ins, op, ins->inputs());
    }

    instruction_ref
    replace(instruction_ref ins, const operation& op, std::vector<instruction_ref> inputs)
    {
459
        inputs.push_back(insert_allocation(ins, ins->get_shape()));
Shucai Xiao's avatar
Shucai Xiao committed
460
        return modl->replace_instruction(ins, op, inputs);
Paul's avatar
Paul committed
461
462
    }

463
    instruction_ref insert_allocation(instruction_ref ins, const shape& s)
Paul's avatar
Paul committed
464
    {
465
466
467
        auto ins_alias = instruction::get_output_alias(ins);
        if(last->name() == "@return" and prog_output_names.count(ins_alias) > 0)
        {
Shucai Xiao's avatar
Shucai Xiao committed
468
            return modl->add_parameter(prog_output_names[ins_alias], s);
469
470
471
        }
        else if(ins == last)
        {
Shucai Xiao's avatar
Shucai Xiao committed
472
            return modl->add_parameter("output", s);
473
474
        }

Shucai Xiao's avatar
Shucai Xiao committed
475
        return modl->insert_instruction(ins, make_op("cpu::allocate", {{"shape", to_value(s)}}));
Paul's avatar
Paul committed
476
477
478
    }
};

Shucai Xiao's avatar
Shucai Xiao committed
479
void lowering::apply(module& m) const { cpu_apply{&m}.apply(); }
Paul's avatar
Paul committed
480
481

} // namespace cpu
Paul's avatar
Paul committed
482
} // namespace MIGRAPHX_INLINE_NS
Paul's avatar
Paul committed
483
} // namespace migraphx