lowering.cpp 17 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/*
 * The MIT License (MIT)
 *
 * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
Paul's avatar
Paul committed
24

Paul's avatar
Paul committed
25
26
27
#include <migraphx/cpu/lowering.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp>
28
#include <migraphx/op/identity.hpp>
29
#include <migraphx/op/batch_norm_inference.hpp>
Paul's avatar
Paul committed
30
#include <migraphx/op/convolution.hpp>
kahmed10's avatar
kahmed10 committed
31
#include <migraphx/op/deconvolution.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
32
#include <migraphx/op/quant_convolution.hpp>
Paul's avatar
Paul committed
33
#include <migraphx/op/dot.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
34
#include <migraphx/op/quant_dot.hpp>
Paul's avatar
Paul committed
35
36
37
38
39
40
41
42
#include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/logsoftmax.hpp>
#include <migraphx/op/lrn.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/softmax.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
43
44
#include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
45
#include <migraphx/op/rnn_var_sl_last_output.hpp>
Paul's avatar
Paul committed
46
47
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
Paul's avatar
Paul committed
48
#include <migraphx/par_dfor.hpp>
49
#include <migraphx/clamp.hpp>
50
#include <migraphx/cpu/context.hpp>
51
#include <migraphx/register_op.hpp>
52
#include <migraphx/make_op.hpp>
53
#include <migraphx/program.hpp>
54
#include <migraphx/tune_axis.hpp>
55
56
57
58
#include <migraphx/match/layernorm.hpp>
#include <migraphx/match/gelu_erf.hpp>
#include <migraphx/match/gelu_tanh.hpp>
#include <migraphx/matcher.hpp>
Paul's avatar
Paul committed
59
#include <unordered_map>
Paul's avatar
Paul committed
60
#include <utility>
kahmed10's avatar
kahmed10 committed
61
#include <iostream>
Paul's avatar
Paul committed
62

Paul's avatar
Paul committed
63
namespace migraphx {
Paul's avatar
Paul committed
64
inline namespace MIGRAPHX_INLINE_NS {
Paul's avatar
Paul committed
65
66
67
68
69
70
71
72
namespace cpu {

template <typename T>
T zero(const T&)
{
    return T(0);
}

Khalique's avatar
Khalique committed
73
74
75
76
template <class T>
typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::enable_if<true, T>>::
    type
    make_signed(T x)
Khalique's avatar
Khalique committed
77
78
79
80
{
    return x;
}

Scott Thornton's avatar
Scott Thornton committed
81
82
struct cpu_im2col
{
83
    op::im2col op;
Scott Thornton's avatar
Scott Thornton committed
84

85
86
87
88
89
90
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

Scott Thornton's avatar
Scott Thornton committed
91
    static std::string name() { return "cpu::im2col"; }
kahmed10's avatar
kahmed10 committed
92
93
94
95
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        return op.normalize_compute_shape(inputs);
    }
Scott Thornton's avatar
Scott Thornton committed
96

wsttiger's avatar
wsttiger committed
97
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
Scott Thornton's avatar
Scott Thornton committed
98
    {
Scott Thornton's avatar
Scott Thornton committed
99
        argument result{output_shape};
Scott Thornton's avatar
Scott Thornton committed
100
        auto input_shape   = args[0].get_shape();
Scott Thornton's avatar
Scott Thornton committed
101
102
        auto weights_shape = args[1].get_shape();
        visit_all(result, args[0])([&](auto col, auto input) {
Scott Thornton's avatar
Scott Thornton committed
103
104
            const std::size_t& height   = input_shape.lens()[2];
            const std::size_t& width    = input_shape.lens()[3];
Scott Thornton's avatar
Scott Thornton committed
105
106
107
            const std::size_t& channels = weights_shape.lens()[1];
            const std::size_t& kernel_h = weights_shape.lens()[2];
            const std::size_t& kernel_w = weights_shape.lens()[3];
Scott Thornton's avatar
Scott Thornton committed
108
109
            const std::size_t& pad_h    = op.padding[0];
            const std::size_t& pad_w    = op.padding[1];
Scott Thornton's avatar
Scott Thornton committed
110
111
112
            const std::size_t& stride_h = op.stride[0];
            const std::size_t& stride_w = op.stride[1];

Paul's avatar
Paul committed
113
114
            long kdiv2_h = long(kernel_h) / 2;
            long kdiv2_w = long(kernel_w) / 2;
Scott Thornton's avatar
Scott Thornton committed
115
            // calculate output sizes
Scott Thornton's avatar
Scott Thornton committed
116
117
            const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
            const std::size_t col_width  = (width - kernel_w + 2 * pad_w) / stride_w + 1;
wsttiger's avatar
wsttiger committed
118
            // account for padding for the starting position of the input pixels
Paul's avatar
Paul committed
119
            long iinput = kdiv2_h - long(pad_h);
wsttiger's avatar
wsttiger committed
120
            // loop over output pixels (ioutput, joutput)
Scott Thornton's avatar
Scott Thornton committed
121
122
            for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h)
            {
Paul's avatar
Paul committed
123
                long jinput = kdiv2_w - long(pad_w);
Scott Thornton's avatar
Scott Thornton committed
124
125
126
127
128
                for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w)
                {
                    // compute linear index for output
                    std::size_t ldx = ioutput * col_width + joutput;
                    std::size_t p   = 0;
wsttiger's avatar
wsttiger committed
129
130
131
                    dfor(channels,
                         kernel_h,
                         kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
Paul's avatar
Paul committed
132
133
                        auto idx    = iinput + long(koffset) - kdiv2_h;
                        auto jdx    = jinput + long(loffset) - kdiv2_w;
wsttiger's avatar
wsttiger committed
134
135
136
137
138
                        col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
                                          ? input(0, c, idx, jdx)
                                          : 0;
                        p++;
                    });
Scott Thornton's avatar
Scott Thornton committed
139
140
                }
            }
Scott Thornton's avatar
Scott Thornton committed
141
        });
Scott Thornton's avatar
Scott Thornton committed
142
143
144
        return result;
    }
};
145
MIGRAPHX_REGISTER_OP(cpu_im2col)
Scott Thornton's avatar
Scott Thornton committed
146

147
struct cpu_op
Paul's avatar
Paul committed
148
{
149
    operation op = op::identity{};
kahmed10's avatar
kahmed10 committed
150
151
152
153
154
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }
155
    std::string name() const { return "cpu::op"; }
Paul's avatar
Paul committed
156
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
Paul's avatar
Paul committed
157
    argument compute(context&, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
158
    {
Paul's avatar
Paul committed
159
        return op.compute(output_shape, args);
Paul's avatar
Paul committed
160
    }
161
162
163
164
165
166
167
168
169
170
171
    value to_value() const
    {
        value v;
        v["name"]     = op.name();
        v["operator"] = op.to_value();
        return v;
    }
    void from_value(const value& v)
    {
        op = make_op(v.at("name").to<std::string>(), v.at("operator"));
    }
172
    friend std::ostream& operator<<(std::ostream& os, const cpu_op& x)
Paul's avatar
Paul committed
173
    {
174
175
        os << "cpu::" << x.op;
        return os;
Paul's avatar
Paul committed
176
177
    }
};
178
MIGRAPHX_REGISTER_OP(cpu_op)
Paul's avatar
Paul committed
179

Khalique's avatar
Khalique committed
180
struct cpu_pad
181
{
Khalique's avatar
Khalique committed
182
    op::pad op;
183
184
185
186
187
188
189

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

kahmed10's avatar
kahmed10 committed
190
    std::string name() const { return "cpu::pad"; }
191
192
193
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
    {
Khalique's avatar
Khalique committed
194
        assert(output_shape.standard());
195
        argument result{output_shape};
196
197
198
199
        result.visit([&](auto output) {
            using type = typename decltype(output)::value_type;
            std::fill(output.begin(), output.end(), pad_clamp<type>(op.value));
        });
Khalique's avatar
Khalique committed
200
201

        visit_all(result, args[0])([&](auto output, auto input) {
202
            shape_for_each(input.get_shape(), [&](const auto& idx) {
Khalique's avatar
Khalique committed
203
                std::vector<std::size_t> new_idx(idx.size());
Khalique's avatar
Khalique committed
204
205
206
207
                std::transform(
                    idx.begin(), idx.end(), op.pads.begin(), new_idx.begin(), [](auto i, auto j) {
                        return i + j;
                    });
Khalique's avatar
Khalique committed
208
                output(new_idx.begin(), new_idx.end()) = input(idx.begin(), idx.end());
209
            });
Khalique's avatar
Khalique committed
210
211
        });

212
213
214
        return result;
    }
};
215
MIGRAPHX_REGISTER_OP(cpu_pad)
216

Khalique's avatar
Khalique committed
217
218
219
220
221
222
struct leaky_relu_op
{
    op::leaky_relu op;
    std::string name() const { return "cpu::leaky_relu"; }
    auto fcn() const
    {
Paul's avatar
Paul committed
223
        auto a = op.alpha;
Khalique's avatar
Khalique committed
224
225
226
227
        return [a](auto x) { return x > 0 ? x : x * a; };
    }
};

Paul's avatar
Paul committed
228
template <typename Op>
229
struct cpu_unary2 : auto_register_op<cpu_unary2<Op>>
Paul's avatar
Paul committed
230
{
231
    cpu_unary2() = default;
232
233

    template <class T>
234
    cpu_unary2(T pop) : op(Op{std::move(pop)})
235
236
237
    {
    }

Paul's avatar
Paul committed
238
    Op op;
239
240
241
242
243
244

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op.op, f);
    }
Paul's avatar
Paul committed
245
    std::string name() const { return op.name(); }
Shucai Xiao's avatar
Shucai Xiao committed
246
    shape compute_shape(const std::vector<shape>& inputs) const
247
    {
248
        check_shapes{inputs, *this}.has(1);
249
        const auto& s = inputs.at(0);
250
        return {s.type(), s.lens()};
251
252
    }

Paul's avatar
Paul committed
253
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
254
255
    {
        argument result{output_shape};
256
257
258
        visit_all(result, args[0])([&](auto output, auto input) {
            assert(input.get_shape().standard());
            std::transform(input.begin(), input.end(), output.begin(), op.fcn());
Paul's avatar
Paul committed
259
        });
260

Paul's avatar
Paul committed
261
262
263
        return result;
    }
};
264
template struct cpu_unary2<leaky_relu_op>;
Shucai Xiao's avatar
Shucai Xiao committed
265

Shucai Xiao's avatar
Shucai Xiao committed
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
struct cpu_rnn_var_sl_last_output
{
    op::rnn_var_sl_last_output op;

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

    std::string name() const { return "cpu::rnn_var_sl_last_output"; }

    shape compute_shape(std::vector<shape> inputs) const
    {
        return op.compute_shape(std::move(inputs));
    }

    argument compute(const shape& output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
        auto out_comp_lens = args[0].get_shape().lens();
        out_comp_lens[0]   = 1;
        shape out_comp_s{output_shape.type(), out_comp_lens};

        visit_all(result, args[0])([&](auto output, auto input) {
            args[1].visit([&](auto seq_lens) {
                par_for(output_shape.elements(), [&](auto i) {
                    auto idx = out_comp_s.multi(i);
                    auto b   = idx[2];
                    if(op.direction == op::rnn_direction::reverse or idx[1] == 1)
                    {
                        idx[0] = 0;
                    }
                    else
                    {
                        idx[0] = seq_lens[b] - 1;
                    }
                    output[i] = input(idx.begin(), idx.end());
                });
            });
        });

        return result;
    }
};
311
MIGRAPHX_REGISTER_OP(cpu_rnn_var_sl_last_output)
Shucai Xiao's avatar
Shucai Xiao committed
312

Paul's avatar
Paul committed
313
314
struct cpu_apply
{
Shucai Xiao's avatar
Shucai Xiao committed
315
    module* modl;
316
317
    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
    instruction_ref last{};
Paul's avatar
Paul committed
318

319
    void extend_op(const std::string& op_name, const std::string& cpu_name, bool allocate = true)
320
321
322
323
    {
        apply_map.emplace(op_name, [=](instruction_ref ins) {
            auto&& op = ins->get_operator();
            if(allocate)
324
                return replace(ins, make_op(cpu_name, op.to_value()));
Shucai Xiao's avatar
Shucai Xiao committed
325
            return modl->replace_instruction(ins, make_op(cpu_name, op.to_value()), ins->inputs());
326
327
328
        });
    }

329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
    void extend_dnnl_algos(const std::string& dnnl_name,
                           const std::vector<std::pair<std::string, std::string>>& algos)
    {
        for(auto&& pp : algos)
        {
            std::string op_name = pp.first;
            std::string algo    = pp.second;
            apply_map.emplace(op_name, [=](instruction_ref ins) {
                auto v = ins->get_operator().to_value();
                if(not v.is_object())
                    return ins;
                v["algo"] = algo;
                auto op   = make_op(dnnl_name, v);
                return replace(ins, op);
            });
        }
    }

347
348
349
350
351
352
353
354
355
    template <class M>
    auto fuse_match(M matcher, const operation& op, const std::vector<std::string>& bind_inputs)
    {
        return match::make_match_finder(matcher, [=](auto&, const auto& r) {
            auto ins = r.result;
            std::vector<instruction_ref> inputs;
            std::transform(bind_inputs.begin(),
                           bind_inputs.end(),
                           std::back_inserter(inputs),
356
                           [&](const auto& s) { return r.instructions[s]; });
357
            inputs.push_back(this->insert_allocation(ins, ins->get_shape()));
358
            modl->replace_instruction(ins, op, inputs);
359
360
361
        });
    }

Paul's avatar
Paul committed
362
363
    void init()
    {
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
        extend_dnnl_algos("dnnl::binary",
                          {
                              {"add", "binary_add"},
                              {"div", "binary_div"},
                              {"max", "binary_max"},
                              {"min", "binary_min"},
                              {"mul", "binary_mul"},
                          });

        extend_dnnl_algos("dnnl::eltwise",
                          {
                              {"abs", "eltwise_abs"},
                              {"elu", "eltwise_elu"},
                              {"exp", "eltwise_exp"},
                              {"log", "eltwise_log"},
                              {"relu", "eltwise_relu"},
                              {"sqrt", "eltwise_sqrt"},
                              {"tanh", "eltwise_tanh"},
                          });

        extend_dnnl_algos("dnnl::reduction",
                          {
                              {"reduce_max", "reduction_max"},
                              {"reduce_mean", "reduction_mean"},
                              {"reduce_min", "reduction_min"},
                              {"reduce_sum", "reduction_sum"},
                          });

        extend_op("concat", "dnnl::concat");
        extend_op("contiguous", "dnnl::reorder");
        extend_op("convolution", "dnnl::convolution");
395
#ifndef MIGRAPHX_ENABLE_ZENDNN
396
397
        extend_op("deconvolution", "dnnl::deconvolution");
        extend_op("dot", "dnnl::dot");
398
#endif
399
400
401
402
403
404
405
406
407
408
409
        extend_op("erf", "cpu::erf");
        extend_op("gather", "cpu::gather");
        extend_op("logsoftmax", "dnnl::logsoftmax");
        extend_op("lrn", "dnnl::lrn");
        extend_op("softmax", "dnnl::softmax");
        extend_op("sub", "cpu::sub");

        extend_op("im2col", "cpu::im2col", false);
        extend_op("leaky_relu", "cpu::leaky_relu", false);
        extend_op("pad", "cpu::pad", false);
        extend_op("rnn_var_sl_last_output", "cpu::rnn_var_sl_last_output", false);
Paul's avatar
Paul committed
410
411
412
413
414
    }

    void apply()
    {
        init();
415
416
417
418
419
420
421
422
423
        // Apply fusion matchers first
        match::find_matches(*modl,
                            fuse_match(match::gelu_erf(),
                                       make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_erf"}}),
                                       {"x"}),
                            fuse_match(match::gelu_tanh(),
                                       make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_tanh"}}),
                                       {"x"}),
                            fuse_match(match::layernorm(), make_op("dnnl::layernorm"), {"x"}));
424
        // Apply these operators first so the inputs can be const folded
Shucai Xiao's avatar
Shucai Xiao committed
425
        for(auto it : iterator_for(*modl))
Paul's avatar
Paul committed
426
        {
427
            if(it->name() == "pow")
428
            {
429
                apply_pow(it);
430
            }
431
432
433
434
        }
        for(auto it : iterator_for(*modl))
        {
            if(it->name() == "pooling")
Paul's avatar
Paul committed
435
436
437
            {
                apply_pooling(it);
            }
Paul's avatar
Paul committed
438
            else if(apply_map.count(it->name()) > 0)
Paul's avatar
Paul committed
439
            {
Paul's avatar
Paul committed
440
                apply_map.at(it->name())(it);
Paul's avatar
Paul committed
441
442
443
444
            }
        }
    }

445
    instruction_ref apply_pow(instruction_ref ins) const
446
    {
447
448
449
450
451
452
453
        auto beta = read_scalar<float>(ins->inputs()[1]);
        if(beta.empty())
            return ins;
        return replace(ins,
                       make_op("dnnl::eltwise",
                               {{"algo", "eltwise_pow"}, {"alpha", 1.0}, {"beta", beta.front()}}),
                       {ins->inputs().front()});
454
455
    }

456
    instruction_ref apply_pooling(instruction_ref ins) const
Paul's avatar
Paul committed
457
    {
458
459
460
461
462
463
        auto&& op = ins->get_operator();
        auto v    = op.to_value();
        if(has_op("dnnl::pooling") and ins->get_shape().type() == shape::type_t::float_type and
           not v["ceil_mode"].to<bool>())
            return replace(ins, make_op("dnnl::pooling", op.to_value()));
        return ins;
Paul's avatar
Paul committed
464
465
    }

466
467
468
469
470
471
472
473
474
475
476
477
478
    template <class T>
    static std::vector<T> read_scalar(instruction_ref ins)
    {
        if(ins->name() == "contiguous")
            return read_scalar<T>(ins->inputs().front());
        if(ins->get_shape().elements() != 1 and not ins->get_shape().scalar())
            return {};
        auto r = ins->eval();
        if(r.empty())
            return {};
        return {r.at<T>()};
    }

479
    instruction_ref replace(instruction_ref ins, const operation& op) const
Paul's avatar
Paul committed
480
    {
481
482
483
484
        return replace(ins, op, ins->inputs());
    }

    instruction_ref
485
    replace(instruction_ref ins, const operation& op, std::vector<instruction_ref> inputs) const
486
    {
487
        inputs.push_back(insert_allocation(ins, ins->get_shape()));
Shucai Xiao's avatar
Shucai Xiao committed
488
        return modl->replace_instruction(ins, op, inputs);
Paul's avatar
Paul committed
489
490
    }

491
    instruction_ref insert_allocation(instruction_ref ins, const shape& s) const
Paul's avatar
Paul committed
492
    {
493
        return modl->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}}));
Paul's avatar
Paul committed
494
495
496
    }
};

Shucai Xiao's avatar
Shucai Xiao committed
497
void lowering::apply(module& m) const { cpu_apply{&m}.apply(); }
Paul's avatar
Paul committed
498
499

} // namespace cpu
Paul's avatar
Paul committed
500
} // namespace MIGRAPHX_INLINE_NS
Paul's avatar
Paul committed
501
} // namespace migraphx