lowering.cpp 15.9 KB
Newer Older
Paul's avatar
Paul committed
1

Paul's avatar
Paul committed
2
3
4
#include <migraphx/cpu/lowering.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp>
5
#include <migraphx/op/identity.hpp>
6
#include <migraphx/op/batch_norm_inference.hpp>
Paul's avatar
Paul committed
7
#include <migraphx/op/convolution.hpp>
kahmed10's avatar
kahmed10 committed
8
#include <migraphx/op/deconvolution.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
9
#include <migraphx/op/quant_convolution.hpp>
Paul's avatar
Paul committed
10
#include <migraphx/op/dot.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
11
#include <migraphx/op/quant_dot.hpp>
Paul's avatar
Paul committed
12
13
14
15
16
17
18
19
#include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/logsoftmax.hpp>
#include <migraphx/op/lrn.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/softmax.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
20
21
#include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
22
#include <migraphx/op/rnn_var_sl_last_output.hpp>
Paul's avatar
Paul committed
23
24
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
Paul's avatar
Paul committed
25
#include <migraphx/par_dfor.hpp>
26
#include <migraphx/clamp.hpp>
27
#include <migraphx/cpu/context.hpp>
28
#include <migraphx/register_op.hpp>
29
#include <migraphx/make_op.hpp>
30
#include <migraphx/program.hpp>
31
#include <migraphx/tune_axis.hpp>
32
33
34
35
#include <migraphx/match/layernorm.hpp>
#include <migraphx/match/gelu_erf.hpp>
#include <migraphx/match/gelu_tanh.hpp>
#include <migraphx/matcher.hpp>
Paul's avatar
Paul committed
36
#include <unordered_map>
Paul's avatar
Paul committed
37
#include <utility>
kahmed10's avatar
kahmed10 committed
38
#include <iostream>
Paul's avatar
Paul committed
39

Paul's avatar
Paul committed
40
namespace migraphx {
Paul's avatar
Paul committed
41
inline namespace MIGRAPHX_INLINE_NS {
Paul's avatar
Paul committed
42
43
44
45
46
47
48
49
namespace cpu {

template <typename T>
T zero(const T&)
{
    return T(0);
}

Khalique's avatar
Khalique committed
50
51
52
53
template <class T>
typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::enable_if<true, T>>::
    type
    make_signed(T x)
Khalique's avatar
Khalique committed
54
55
56
57
{
    return x;
}

Scott Thornton's avatar
Scott Thornton committed
58
59
struct cpu_im2col
{
60
    op::im2col op;
Scott Thornton's avatar
Scott Thornton committed
61

62
63
64
65
66
67
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

Scott Thornton's avatar
Scott Thornton committed
68
    static std::string name() { return "cpu::im2col"; }
kahmed10's avatar
kahmed10 committed
69
70
71
72
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        return op.normalize_compute_shape(inputs);
    }
Scott Thornton's avatar
Scott Thornton committed
73

wsttiger's avatar
wsttiger committed
74
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
Scott Thornton's avatar
Scott Thornton committed
75
    {
Scott Thornton's avatar
Scott Thornton committed
76
        argument result{output_shape};
Scott Thornton's avatar
Scott Thornton committed
77
        auto input_shape   = args[0].get_shape();
Scott Thornton's avatar
Scott Thornton committed
78
79
        auto weights_shape = args[1].get_shape();
        visit_all(result, args[0])([&](auto col, auto input) {
Scott Thornton's avatar
Scott Thornton committed
80
81
            const std::size_t& height   = input_shape.lens()[2];
            const std::size_t& width    = input_shape.lens()[3];
Scott Thornton's avatar
Scott Thornton committed
82
83
84
            const std::size_t& channels = weights_shape.lens()[1];
            const std::size_t& kernel_h = weights_shape.lens()[2];
            const std::size_t& kernel_w = weights_shape.lens()[3];
Scott Thornton's avatar
Scott Thornton committed
85
86
            const std::size_t& pad_h    = op.padding[0];
            const std::size_t& pad_w    = op.padding[1];
Scott Thornton's avatar
Scott Thornton committed
87
88
89
            const std::size_t& stride_h = op.stride[0];
            const std::size_t& stride_w = op.stride[1];

Paul's avatar
Paul committed
90
91
            long kdiv2_h = long(kernel_h) / 2;
            long kdiv2_w = long(kernel_w) / 2;
Scott Thornton's avatar
Scott Thornton committed
92
            // calculate output sizes
Scott Thornton's avatar
Scott Thornton committed
93
94
            const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
            const std::size_t col_width  = (width - kernel_w + 2 * pad_w) / stride_w + 1;
wsttiger's avatar
wsttiger committed
95
            // account for padding for the starting position of the input pixels
Paul's avatar
Paul committed
96
            long iinput = kdiv2_h - long(pad_h);
wsttiger's avatar
wsttiger committed
97
            // loop over output pixels (ioutput, joutput)
Scott Thornton's avatar
Scott Thornton committed
98
99
            for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h)
            {
Paul's avatar
Paul committed
100
                long jinput = kdiv2_w - long(pad_w);
Scott Thornton's avatar
Scott Thornton committed
101
102
103
104
105
                for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w)
                {
                    // compute linear index for output
                    std::size_t ldx = ioutput * col_width + joutput;
                    std::size_t p   = 0;
wsttiger's avatar
wsttiger committed
106
107
108
                    dfor(channels,
                         kernel_h,
                         kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
Paul's avatar
Paul committed
109
110
                        auto idx    = iinput + long(koffset) - kdiv2_h;
                        auto jdx    = jinput + long(loffset) - kdiv2_w;
wsttiger's avatar
wsttiger committed
111
112
113
114
115
                        col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
                                          ? input(0, c, idx, jdx)
                                          : 0;
                        p++;
                    });
Scott Thornton's avatar
Scott Thornton committed
116
117
                }
            }
Scott Thornton's avatar
Scott Thornton committed
118
        });
Scott Thornton's avatar
Scott Thornton committed
119
120
121
        return result;
    }
};
122
MIGRAPHX_REGISTER_OP(cpu_im2col)
Scott Thornton's avatar
Scott Thornton committed
123

124
struct cpu_op
Paul's avatar
Paul committed
125
{
126
    operation op = op::identity{};
kahmed10's avatar
kahmed10 committed
127
128
129
130
131
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }
132
    std::string name() const { return "cpu::op"; }
Paul's avatar
Paul committed
133
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
Paul's avatar
Paul committed
134
    argument compute(context&, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
135
    {
Paul's avatar
Paul committed
136
        return op.compute(output_shape, args);
Paul's avatar
Paul committed
137
    }
138
139
140
141
142
143
144
145
146
147
148
    value to_value() const
    {
        value v;
        v["name"]     = op.name();
        v["operator"] = op.to_value();
        return v;
    }
    void from_value(const value& v)
    {
        op = make_op(v.at("name").to<std::string>(), v.at("operator"));
    }
149
    friend std::ostream& operator<<(std::ostream& os, const cpu_op& x)
Paul's avatar
Paul committed
150
    {
151
152
        os << "cpu::" << x.op;
        return os;
Paul's avatar
Paul committed
153
154
    }
};
155
MIGRAPHX_REGISTER_OP(cpu_op)
Paul's avatar
Paul committed
156

Khalique's avatar
Khalique committed
157
struct cpu_pad
158
{
Khalique's avatar
Khalique committed
159
    op::pad op;
160
161
162
163
164
165
166

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

kahmed10's avatar
kahmed10 committed
167
    std::string name() const { return "cpu::pad"; }
168
169
170
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
    {
Khalique's avatar
Khalique committed
171
        assert(output_shape.standard());
172
        argument result{output_shape};
173
174
175
176
        result.visit([&](auto output) {
            using type = typename decltype(output)::value_type;
            std::fill(output.begin(), output.end(), pad_clamp<type>(op.value));
        });
Khalique's avatar
Khalique committed
177
178

        visit_all(result, args[0])([&](auto output, auto input) {
179
            shape_for_each(input.get_shape(), [&](const auto& idx) {
Khalique's avatar
Khalique committed
180
                std::vector<std::size_t> new_idx(idx.size());
Khalique's avatar
Khalique committed
181
182
183
184
                std::transform(
                    idx.begin(), idx.end(), op.pads.begin(), new_idx.begin(), [](auto i, auto j) {
                        return i + j;
                    });
Khalique's avatar
Khalique committed
185
                output(new_idx.begin(), new_idx.end()) = input(idx.begin(), idx.end());
186
            });
Khalique's avatar
Khalique committed
187
188
        });

189
190
191
        return result;
    }
};
192
MIGRAPHX_REGISTER_OP(cpu_pad)
193

Khalique's avatar
Khalique committed
194
195
196
197
198
199
struct leaky_relu_op
{
    op::leaky_relu op;
    std::string name() const { return "cpu::leaky_relu"; }
    auto fcn() const
    {
Paul's avatar
Paul committed
200
        auto a = op.alpha;
Khalique's avatar
Khalique committed
201
202
203
204
        return [a](auto x) { return x > 0 ? x : x * a; };
    }
};

Paul's avatar
Paul committed
205
template <typename Op>
206
struct cpu_unary2 : auto_register_op<cpu_unary2<Op>>
Paul's avatar
Paul committed
207
{
208
    cpu_unary2() = default;
209
210

    template <class T>
211
    cpu_unary2(T pop) : op(Op{std::move(pop)})
212
213
214
    {
    }

Paul's avatar
Paul committed
215
    Op op;
216
217
218
219
220
221

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op.op, f);
    }
Paul's avatar
Paul committed
222
    std::string name() const { return op.name(); }
Shucai Xiao's avatar
Shucai Xiao committed
223
    shape compute_shape(const std::vector<shape>& inputs) const
224
    {
225
        check_shapes{inputs, *this}.has(1);
226
        const auto& s = inputs.at(0);
227
        return {s.type(), s.lens()};
228
229
    }

Paul's avatar
Paul committed
230
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
231
232
    {
        argument result{output_shape};
233
234
235
        visit_all(result, args[0])([&](auto output, auto input) {
            assert(input.get_shape().standard());
            std::transform(input.begin(), input.end(), output.begin(), op.fcn());
Paul's avatar
Paul committed
236
        });
237

Paul's avatar
Paul committed
238
239
240
        return result;
    }
};
241
template struct cpu_unary2<leaky_relu_op>;
Shucai Xiao's avatar
Shucai Xiao committed
242

Shucai Xiao's avatar
Shucai Xiao committed
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
struct cpu_rnn_var_sl_last_output
{
    op::rnn_var_sl_last_output op;

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

    std::string name() const { return "cpu::rnn_var_sl_last_output"; }

    shape compute_shape(std::vector<shape> inputs) const
    {
        return op.compute_shape(std::move(inputs));
    }

    argument compute(const shape& output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
        auto out_comp_lens = args[0].get_shape().lens();
        out_comp_lens[0]   = 1;
        shape out_comp_s{output_shape.type(), out_comp_lens};

        visit_all(result, args[0])([&](auto output, auto input) {
            args[1].visit([&](auto seq_lens) {
                par_for(output_shape.elements(), [&](auto i) {
                    auto idx = out_comp_s.multi(i);
                    auto b   = idx[2];
                    if(op.direction == op::rnn_direction::reverse or idx[1] == 1)
                    {
                        idx[0] = 0;
                    }
                    else
                    {
                        idx[0] = seq_lens[b] - 1;
                    }
                    output[i] = input(idx.begin(), idx.end());
                });
            });
        });

        return result;
    }
};
288
MIGRAPHX_REGISTER_OP(cpu_rnn_var_sl_last_output)
Shucai Xiao's avatar
Shucai Xiao committed
289

Paul's avatar
Paul committed
290
291
struct cpu_apply
{
Shucai Xiao's avatar
Shucai Xiao committed
292
    module* modl;
293
294
    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
    instruction_ref last{};
Paul's avatar
Paul committed
295

296
    void extend_op(const std::string& op_name, const std::string& cpu_name, bool allocate = true)
297
298
299
300
    {
        apply_map.emplace(op_name, [=](instruction_ref ins) {
            auto&& op = ins->get_operator();
            if(allocate)
301
                return replace(ins, make_op(cpu_name, op.to_value()));
Shucai Xiao's avatar
Shucai Xiao committed
302
            return modl->replace_instruction(ins, make_op(cpu_name, op.to_value()), ins->inputs());
303
304
305
        });
    }

306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
    void extend_dnnl_algos(const std::string& dnnl_name,
                           const std::vector<std::pair<std::string, std::string>>& algos)
    {
        for(auto&& pp : algos)
        {
            std::string op_name = pp.first;
            std::string algo    = pp.second;
            apply_map.emplace(op_name, [=](instruction_ref ins) {
                auto v = ins->get_operator().to_value();
                if(not v.is_object())
                    return ins;
                v["algo"] = algo;
                auto op   = make_op(dnnl_name, v);
                return replace(ins, op);
            });
        }
    }

324
325
326
327
328
329
330
331
332
    template <class M>
    auto fuse_match(M matcher, const operation& op, const std::vector<std::string>& bind_inputs)
    {
        return match::make_match_finder(matcher, [=](auto&, const auto& r) {
            auto ins = r.result;
            std::vector<instruction_ref> inputs;
            std::transform(bind_inputs.begin(),
                           bind_inputs.end(),
                           std::back_inserter(inputs),
333
                           [&](const auto& s) { return r.instructions[s]; });
334
            inputs.push_back(this->insert_allocation(ins, ins->get_shape()));
335
            modl->replace_instruction(ins, op, inputs);
336
337
338
        });
    }

Paul's avatar
Paul committed
339
340
    void init()
    {
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
        extend_dnnl_algos("dnnl::binary",
                          {
                              {"add", "binary_add"},
                              {"div", "binary_div"},
                              {"max", "binary_max"},
                              {"min", "binary_min"},
                              {"mul", "binary_mul"},
                          });

        extend_dnnl_algos("dnnl::eltwise",
                          {
                              {"abs", "eltwise_abs"},
                              {"elu", "eltwise_elu"},
                              {"exp", "eltwise_exp"},
                              {"log", "eltwise_log"},
                              {"relu", "eltwise_relu"},
                              {"sqrt", "eltwise_sqrt"},
                              {"tanh", "eltwise_tanh"},
                          });

        extend_dnnl_algos("dnnl::reduction",
                          {
                              {"reduce_max", "reduction_max"},
                              {"reduce_mean", "reduction_mean"},
                              {"reduce_min", "reduction_min"},
                              {"reduce_sum", "reduction_sum"},
                          });

        extend_op("concat", "dnnl::concat");
        extend_op("contiguous", "dnnl::reorder");
        extend_op("convolution", "dnnl::convolution");
372
#ifndef MIGRAPHX_ENABLE_ZENDNN
373
374
        extend_op("deconvolution", "dnnl::deconvolution");
        extend_op("dot", "dnnl::dot");
375
#endif
376
377
378
379
380
381
382
383
384
385
386
        extend_op("erf", "cpu::erf");
        extend_op("gather", "cpu::gather");
        extend_op("logsoftmax", "dnnl::logsoftmax");
        extend_op("lrn", "dnnl::lrn");
        extend_op("softmax", "dnnl::softmax");
        extend_op("sub", "cpu::sub");

        extend_op("im2col", "cpu::im2col", false);
        extend_op("leaky_relu", "cpu::leaky_relu", false);
        extend_op("pad", "cpu::pad", false);
        extend_op("rnn_var_sl_last_output", "cpu::rnn_var_sl_last_output", false);
Paul's avatar
Paul committed
387
388
389
390
391
    }

    void apply()
    {
        init();
392
393
394
395
396
397
398
399
400
        // Apply fusion matchers first
        match::find_matches(*modl,
                            fuse_match(match::gelu_erf(),
                                       make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_erf"}}),
                                       {"x"}),
                            fuse_match(match::gelu_tanh(),
                                       make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_tanh"}}),
                                       {"x"}),
                            fuse_match(match::layernorm(), make_op("dnnl::layernorm"), {"x"}));
401
        // Apply these operators first so the inputs can be const folded
Shucai Xiao's avatar
Shucai Xiao committed
402
        for(auto it : iterator_for(*modl))
Paul's avatar
Paul committed
403
        {
404
            if(it->name() == "pow")
405
            {
406
                apply_pow(it);
407
            }
408
409
410
411
        }
        for(auto it : iterator_for(*modl))
        {
            if(it->name() == "pooling")
Paul's avatar
Paul committed
412
413
414
            {
                apply_pooling(it);
            }
Paul's avatar
Paul committed
415
            else if(apply_map.count(it->name()) > 0)
Paul's avatar
Paul committed
416
            {
Paul's avatar
Paul committed
417
                apply_map.at(it->name())(it);
Paul's avatar
Paul committed
418
419
420
421
            }
        }
    }

422
    instruction_ref apply_pow(instruction_ref ins) const
423
    {
424
425
426
427
428
429
430
        auto beta = read_scalar<float>(ins->inputs()[1]);
        if(beta.empty())
            return ins;
        return replace(ins,
                       make_op("dnnl::eltwise",
                               {{"algo", "eltwise_pow"}, {"alpha", 1.0}, {"beta", beta.front()}}),
                       {ins->inputs().front()});
431
432
    }

433
    instruction_ref apply_pooling(instruction_ref ins) const
Paul's avatar
Paul committed
434
    {
435
436
437
438
439
440
        auto&& op = ins->get_operator();
        auto v    = op.to_value();
        if(has_op("dnnl::pooling") and ins->get_shape().type() == shape::type_t::float_type and
           not v["ceil_mode"].to<bool>())
            return replace(ins, make_op("dnnl::pooling", op.to_value()));
        return ins;
Paul's avatar
Paul committed
441
442
    }

443
444
445
446
447
448
449
450
451
452
453
454
455
    template <class T>
    static std::vector<T> read_scalar(instruction_ref ins)
    {
        if(ins->name() == "contiguous")
            return read_scalar<T>(ins->inputs().front());
        if(ins->get_shape().elements() != 1 and not ins->get_shape().scalar())
            return {};
        auto r = ins->eval();
        if(r.empty())
            return {};
        return {r.at<T>()};
    }

456
    instruction_ref replace(instruction_ref ins, const operation& op) const
Paul's avatar
Paul committed
457
    {
458
459
460
461
        return replace(ins, op, ins->inputs());
    }

    instruction_ref
462
    replace(instruction_ref ins, const operation& op, std::vector<instruction_ref> inputs) const
463
    {
464
        inputs.push_back(insert_allocation(ins, ins->get_shape()));
Shucai Xiao's avatar
Shucai Xiao committed
465
        return modl->replace_instruction(ins, op, inputs);
Paul's avatar
Paul committed
466
467
    }

468
    instruction_ref insert_allocation(instruction_ref ins, const shape& s) const
Paul's avatar
Paul committed
469
    {
470
        return modl->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}}));
Paul's avatar
Paul committed
471
472
473
    }
};

Shucai Xiao's avatar
Shucai Xiao committed
474
void lowering::apply(module& m) const { cpu_apply{&m}.apply(); }
Paul's avatar
Paul committed
475
476

} // namespace cpu
Paul's avatar
Paul committed
477
} // namespace MIGRAPHX_INLINE_NS
Paul's avatar
Paul committed
478
} // namespace migraphx