lowering.cpp 17.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/*
 * The MIT License (MIT)
 *
 * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
Paul's avatar
Paul committed
24

Paul's avatar
Paul committed
25
26
27
#include <migraphx/cpu/lowering.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp>
28
#include <migraphx/op/identity.hpp>
29
#include <migraphx/op/batch_norm_inference.hpp>
Paul's avatar
Paul committed
30
#include <migraphx/op/convolution.hpp>
kahmed10's avatar
kahmed10 committed
31
#include <migraphx/op/deconvolution.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
32
#include <migraphx/op/quant_convolution.hpp>
Paul's avatar
Paul committed
33
#include <migraphx/op/dot.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
34
#include <migraphx/op/quant_dot.hpp>
Paul's avatar
Paul committed
35
36
37
38
39
40
41
42
#include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/logsoftmax.hpp>
#include <migraphx/op/lrn.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/softmax.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
43
44
#include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
45
#include <migraphx/op/rnn_var_sl_last_output.hpp>
46
47
#include <migraphx/op/mod.hpp>
#include <migraphx/op/fmod.hpp>
Paul's avatar
Paul committed
48
49
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
Paul's avatar
Paul committed
50
#include <migraphx/par_dfor.hpp>
51
#include <migraphx/clamp.hpp>
52
#include <migraphx/cpu/context.hpp>
53
#include <migraphx/register_op.hpp>
54
#include <migraphx/make_op.hpp>
55
#include <migraphx/program.hpp>
56
#include <migraphx/tune_axis.hpp>
57
58
59
60
#include <migraphx/match/layernorm.hpp>
#include <migraphx/match/gelu_erf.hpp>
#include <migraphx/match/gelu_tanh.hpp>
#include <migraphx/matcher.hpp>
Paul's avatar
Paul committed
61
#include <unordered_map>
Paul's avatar
Paul committed
62
#include <utility>
kahmed10's avatar
kahmed10 committed
63
#include <iostream>
Paul's avatar
Paul committed
64

Paul's avatar
Paul committed
65
namespace migraphx {
Paul's avatar
Paul committed
66
inline namespace MIGRAPHX_INLINE_NS {
Paul's avatar
Paul committed
67
68
69
70
71
72
73
74
namespace cpu {

template <typename T>
T zero(const T&)
{
    return T(0);
}

Khalique's avatar
Khalique committed
75
76
77
78
template <class T>
typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::enable_if<true, T>>::
    type
    make_signed(T x)
Khalique's avatar
Khalique committed
79
80
81
82
{
    return x;
}

Scott Thornton's avatar
Scott Thornton committed
83
84
struct cpu_im2col
{
85
    op::im2col op;
Scott Thornton's avatar
Scott Thornton committed
86

87
88
89
90
91
92
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

Scott Thornton's avatar
Scott Thornton committed
93
    static std::string name() { return "cpu::im2col"; }
kahmed10's avatar
kahmed10 committed
94
95
96
97
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        return op.normalize_compute_shape(inputs);
    }
Scott Thornton's avatar
Scott Thornton committed
98

wsttiger's avatar
wsttiger committed
99
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
Scott Thornton's avatar
Scott Thornton committed
100
    {
Scott Thornton's avatar
Scott Thornton committed
101
        argument result{output_shape};
Scott Thornton's avatar
Scott Thornton committed
102
        auto input_shape   = args[0].get_shape();
Scott Thornton's avatar
Scott Thornton committed
103
104
        auto weights_shape = args[1].get_shape();
        visit_all(result, args[0])([&](auto col, auto input) {
Scott Thornton's avatar
Scott Thornton committed
105
106
            const std::size_t& height   = input_shape.lens()[2];
            const std::size_t& width    = input_shape.lens()[3];
Scott Thornton's avatar
Scott Thornton committed
107
108
109
            const std::size_t& channels = weights_shape.lens()[1];
            const std::size_t& kernel_h = weights_shape.lens()[2];
            const std::size_t& kernel_w = weights_shape.lens()[3];
Scott Thornton's avatar
Scott Thornton committed
110
111
            const std::size_t& pad_h    = op.padding[0];
            const std::size_t& pad_w    = op.padding[1];
Scott Thornton's avatar
Scott Thornton committed
112
113
114
            const std::size_t& stride_h = op.stride[0];
            const std::size_t& stride_w = op.stride[1];

Paul's avatar
Paul committed
115
116
            long kdiv2_h = long(kernel_h) / 2;
            long kdiv2_w = long(kernel_w) / 2;
Scott Thornton's avatar
Scott Thornton committed
117
            // calculate output sizes
Scott Thornton's avatar
Scott Thornton committed
118
119
            const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
            const std::size_t col_width  = (width - kernel_w + 2 * pad_w) / stride_w + 1;
wsttiger's avatar
wsttiger committed
120
            // account for padding for the starting position of the input pixels
Paul's avatar
Paul committed
121
            long iinput = kdiv2_h - long(pad_h);
wsttiger's avatar
wsttiger committed
122
            // loop over output pixels (ioutput, joutput)
Scott Thornton's avatar
Scott Thornton committed
123
124
            for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h)
            {
Paul's avatar
Paul committed
125
                long jinput = kdiv2_w - long(pad_w);
Scott Thornton's avatar
Scott Thornton committed
126
127
128
129
130
                for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w)
                {
                    // compute linear index for output
                    std::size_t ldx = ioutput * col_width + joutput;
                    std::size_t p   = 0;
wsttiger's avatar
wsttiger committed
131
132
133
                    dfor(channels,
                         kernel_h,
                         kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
Paul's avatar
Paul committed
134
135
                        auto idx    = iinput + long(koffset) - kdiv2_h;
                        auto jdx    = jinput + long(loffset) - kdiv2_w;
wsttiger's avatar
wsttiger committed
136
137
138
139
140
                        col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
                                          ? input(0, c, idx, jdx)
                                          : 0;
                        p++;
                    });
Scott Thornton's avatar
Scott Thornton committed
141
142
                }
            }
Scott Thornton's avatar
Scott Thornton committed
143
        });
Scott Thornton's avatar
Scott Thornton committed
144
145
146
        return result;
    }
};
147
MIGRAPHX_REGISTER_OP(cpu_im2col)
Scott Thornton's avatar
Scott Thornton committed
148

149
struct cpu_op
Paul's avatar
Paul committed
150
{
151
    operation op = op::identity{};
kahmed10's avatar
kahmed10 committed
152
153
154
155
156
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }
157
    std::string name() const { return "cpu::op"; }
Paul's avatar
Paul committed
158
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
Paul's avatar
Paul committed
159
    argument compute(context&, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
160
    {
Paul's avatar
Paul committed
161
        return op.compute(output_shape, args);
Paul's avatar
Paul committed
162
    }
163
164
165
166
167
168
169
170
171
172
173
    value to_value() const
    {
        value v;
        v["name"]     = op.name();
        v["operator"] = op.to_value();
        return v;
    }
    void from_value(const value& v)
    {
        op = make_op(v.at("name").to<std::string>(), v.at("operator"));
    }
174
    friend std::ostream& operator<<(std::ostream& os, const cpu_op& x)
Paul's avatar
Paul committed
175
    {
176
177
        os << "cpu::" << x.op;
        return os;
Paul's avatar
Paul committed
178
179
    }
};
180
MIGRAPHX_REGISTER_OP(cpu_op)
Paul's avatar
Paul committed
181

Khalique's avatar
Khalique committed
182
struct cpu_pad
183
{
Khalique's avatar
Khalique committed
184
    op::pad op;
185
186
187
188
189
190
191

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

kahmed10's avatar
kahmed10 committed
192
    std::string name() const { return "cpu::pad"; }
193
194
195
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
    {
Khalique's avatar
Khalique committed
196
        assert(output_shape.standard());
197
        argument result{output_shape};
198
199
200
201
        result.visit([&](auto output) {
            using type = typename decltype(output)::value_type;
            std::fill(output.begin(), output.end(), pad_clamp<type>(op.value));
        });
Khalique's avatar
Khalique committed
202
203

        visit_all(result, args[0])([&](auto output, auto input) {
204
            shape_for_each(input.get_shape(), [&](const auto& idx) {
Khalique's avatar
Khalique committed
205
                std::vector<std::size_t> new_idx(idx.size());
Khalique's avatar
Khalique committed
206
207
208
209
                std::transform(
                    idx.begin(), idx.end(), op.pads.begin(), new_idx.begin(), [](auto i, auto j) {
                        return i + j;
                    });
Khalique's avatar
Khalique committed
210
                output(new_idx.begin(), new_idx.end()) = input(idx.begin(), idx.end());
211
            });
Khalique's avatar
Khalique committed
212
213
        });

214
215
216
        return result;
    }
};
217
MIGRAPHX_REGISTER_OP(cpu_pad)
218

Khalique's avatar
Khalique committed
219
220
221
222
223
224
struct leaky_relu_op
{
    op::leaky_relu op;
    std::string name() const { return "cpu::leaky_relu"; }
    auto fcn() const
    {
Paul's avatar
Paul committed
225
        auto a = op.alpha;
Khalique's avatar
Khalique committed
226
227
228
229
        return [a](auto x) { return x > 0 ? x : x * a; };
    }
};

Paul's avatar
Paul committed
230
template <typename Op>
231
struct cpu_unary2 : auto_register_op<cpu_unary2<Op>>
Paul's avatar
Paul committed
232
{
233
    cpu_unary2() = default;
234
235

    template <class T>
236
    cpu_unary2(T pop) : op(Op{std::move(pop)})
237
238
239
    {
    }

Paul's avatar
Paul committed
240
    Op op;
241
242
243
244
245
246

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op.op, f);
    }
Paul's avatar
Paul committed
247
    std::string name() const { return op.name(); }
Shucai Xiao's avatar
Shucai Xiao committed
248
    shape compute_shape(const std::vector<shape>& inputs) const
249
    {
250
        check_shapes{inputs, *this}.has(1);
251
        const auto& s = inputs.at(0);
252
        return {s.type(), s.lens()};
253
254
    }

Paul's avatar
Paul committed
255
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
256
257
    {
        argument result{output_shape};
258
259
260
        visit_all(result, args[0])([&](auto output, auto input) {
            assert(input.get_shape().standard());
            std::transform(input.begin(), input.end(), output.begin(), op.fcn());
Paul's avatar
Paul committed
261
        });
262

Paul's avatar
Paul committed
263
264
265
        return result;
    }
};
266
template struct cpu_unary2<leaky_relu_op>;
Shucai Xiao's avatar
Shucai Xiao committed
267

Shucai Xiao's avatar
Shucai Xiao committed
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
struct cpu_rnn_var_sl_last_output
{
    op::rnn_var_sl_last_output op;

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

    std::string name() const { return "cpu::rnn_var_sl_last_output"; }

    shape compute_shape(std::vector<shape> inputs) const
    {
        return op.compute_shape(std::move(inputs));
    }

    argument compute(const shape& output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
        auto out_comp_lens = args[0].get_shape().lens();
        out_comp_lens[0]   = 1;
        shape out_comp_s{output_shape.type(), out_comp_lens};

        visit_all(result, args[0])([&](auto output, auto input) {
            args[1].visit([&](auto seq_lens) {
                par_for(output_shape.elements(), [&](auto i) {
                    auto idx = out_comp_s.multi(i);
                    auto b   = idx[2];
                    if(op.direction == op::rnn_direction::reverse or idx[1] == 1)
                    {
                        idx[0] = 0;
                    }
                    else
                    {
                        idx[0] = seq_lens[b] - 1;
                    }
                    output[i] = input(idx.begin(), idx.end());
                });
            });
        });

        return result;
    }
};
313
MIGRAPHX_REGISTER_OP(cpu_rnn_var_sl_last_output)
Shucai Xiao's avatar
Shucai Xiao committed
314

Paul's avatar
Paul committed
315
316
struct cpu_apply
{
Shucai Xiao's avatar
Shucai Xiao committed
317
    module* modl;
318
319
    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
    instruction_ref last{};
Paul's avatar
Paul committed
320

321
    void extend_op(const std::string& op_name, const std::string& cpu_name, bool allocate = true)
322
323
324
325
    {
        apply_map.emplace(op_name, [=](instruction_ref ins) {
            auto&& op = ins->get_operator();
            if(allocate)
326
                return replace(ins, make_op(cpu_name, op.to_value()));
Shucai Xiao's avatar
Shucai Xiao committed
327
            return modl->replace_instruction(ins, make_op(cpu_name, op.to_value()), ins->inputs());
328
329
330
        });
    }

331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
    void extend_dnnl_algos(const std::string& dnnl_name,
                           const std::vector<std::pair<std::string, std::string>>& algos)
    {
        for(auto&& pp : algos)
        {
            std::string op_name = pp.first;
            std::string algo    = pp.second;
            apply_map.emplace(op_name, [=](instruction_ref ins) {
                auto v = ins->get_operator().to_value();
                if(not v.is_object())
                    return ins;
                v["algo"] = algo;
                auto op   = make_op(dnnl_name, v);
                return replace(ins, op);
            });
        }
    }

349
350
351
352
353
354
355
356
357
    template <class M>
    auto fuse_match(M matcher, const operation& op, const std::vector<std::string>& bind_inputs)
    {
        return match::make_match_finder(matcher, [=](auto&, const auto& r) {
            auto ins = r.result;
            std::vector<instruction_ref> inputs;
            std::transform(bind_inputs.begin(),
                           bind_inputs.end(),
                           std::back_inserter(inputs),
358
                           [&](const auto& s) { return r.instructions[s]; });
359
            inputs.push_back(this->insert_allocation(ins, ins->get_shape()));
360
            modl->replace_instruction(ins, op, inputs);
361
362
363
        });
    }

Paul's avatar
Paul committed
364
365
    void init()
    {
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
        extend_dnnl_algos("dnnl::binary",
                          {
                              {"add", "binary_add"},
                              {"div", "binary_div"},
                              {"max", "binary_max"},
                              {"min", "binary_min"},
                              {"mul", "binary_mul"},
                          });

        extend_dnnl_algos("dnnl::eltwise",
                          {
                              {"abs", "eltwise_abs"},
                              {"elu", "eltwise_elu"},
                              {"exp", "eltwise_exp"},
                              {"log", "eltwise_log"},
                              {"relu", "eltwise_relu"},
                              {"sqrt", "eltwise_sqrt"},
                              {"tanh", "eltwise_tanh"},
                          });

        extend_dnnl_algos("dnnl::reduction",
                          {
                              {"reduce_max", "reduction_max"},
                              {"reduce_mean", "reduction_mean"},
                              {"reduce_min", "reduction_min"},
                              {"reduce_sum", "reduction_sum"},
                          });

        extend_op("concat", "dnnl::concat");
        extend_op("contiguous", "dnnl::reorder");
        extend_op("convolution", "dnnl::convolution");
397
#ifndef MIGRAPHX_ENABLE_ZENDNN
398
399
        extend_op("deconvolution", "dnnl::deconvolution");
        extend_op("dot", "dnnl::dot");
400
#endif
401
402
403
404
405
406
407
408
409
410
411
        extend_op("erf", "cpu::erf");
        extend_op("gather", "cpu::gather");
        extend_op("logsoftmax", "dnnl::logsoftmax");
        extend_op("lrn", "dnnl::lrn");
        extend_op("softmax", "dnnl::softmax");
        extend_op("sub", "cpu::sub");

        extend_op("im2col", "cpu::im2col", false);
        extend_op("leaky_relu", "cpu::leaky_relu", false);
        extend_op("pad", "cpu::pad", false);
        extend_op("rnn_var_sl_last_output", "cpu::rnn_var_sl_last_output", false);
Paul's avatar
Paul committed
412
413
414
415
416
    }

    void apply()
    {
        init();
417
418
419
420
421
422
423
424
425
        // Apply fusion matchers first
        match::find_matches(*modl,
                            fuse_match(match::gelu_erf(),
                                       make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_erf"}}),
                                       {"x"}),
                            fuse_match(match::gelu_tanh(),
                                       make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_tanh"}}),
                                       {"x"}),
                            fuse_match(match::layernorm(), make_op("dnnl::layernorm"), {"x"}));
426
        // Apply these operators first so the inputs can be const folded
Shucai Xiao's avatar
Shucai Xiao committed
427
        for(auto it : iterator_for(*modl))
Paul's avatar
Paul committed
428
        {
429
            if(it->name() == "pow")
430
            {
431
                apply_pow(it);
432
            }
433
434
435
436
        }
        for(auto it : iterator_for(*modl))
        {
            if(it->name() == "pooling")
Paul's avatar
Paul committed
437
438
439
            {
                apply_pooling(it);
            }
Paul's avatar
Paul committed
440
            else if(apply_map.count(it->name()) > 0)
Paul's avatar
Paul committed
441
            {
Paul's avatar
Paul committed
442
                apply_map.at(it->name())(it);
Paul's avatar
Paul committed
443
444
445
446
            }
        }
    }

447
    instruction_ref apply_pow(instruction_ref ins) const
448
    {
449
450
451
452
453
454
455
        auto beta = read_scalar<float>(ins->inputs()[1]);
        if(beta.empty())
            return ins;
        return replace(ins,
                       make_op("dnnl::eltwise",
                               {{"algo", "eltwise_pow"}, {"alpha", 1.0}, {"beta", beta.front()}}),
                       {ins->inputs().front()});
456
457
    }

458
    instruction_ref apply_pooling(instruction_ref ins) const
Paul's avatar
Paul committed
459
    {
460
461
462
463
464
465
        auto&& op = ins->get_operator();
        auto v    = op.to_value();
        if(has_op("dnnl::pooling") and ins->get_shape().type() == shape::type_t::float_type and
           not v["ceil_mode"].to<bool>())
            return replace(ins, make_op("dnnl::pooling", op.to_value()));
        return ins;
Paul's avatar
Paul committed
466
467
    }

468
469
470
471
472
473
474
475
476
477
478
479
480
    template <class T>
    static std::vector<T> read_scalar(instruction_ref ins)
    {
        if(ins->name() == "contiguous")
            return read_scalar<T>(ins->inputs().front());
        if(ins->get_shape().elements() != 1 and not ins->get_shape().scalar())
            return {};
        auto r = ins->eval();
        if(r.empty())
            return {};
        return {r.at<T>()};
    }

481
    instruction_ref replace(instruction_ref ins, const operation& op) const
Paul's avatar
Paul committed
482
    {
483
484
485
486
        return replace(ins, op, ins->inputs());
    }

    instruction_ref
487
    replace(instruction_ref ins, const operation& op, std::vector<instruction_ref> inputs) const
488
    {
489
        inputs.push_back(insert_allocation(ins, ins->get_shape()));
Shucai Xiao's avatar
Shucai Xiao committed
490
        return modl->replace_instruction(ins, op, inputs);
Paul's avatar
Paul committed
491
492
    }

493
    instruction_ref insert_allocation(instruction_ref ins, const shape& s) const
Paul's avatar
Paul committed
494
    {
495
        return modl->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}}));
Paul's avatar
Paul committed
496
497
498
    }
};

Shucai Xiao's avatar
Shucai Xiao committed
499
void lowering::apply(module& m) const { cpu_apply{&m}.apply(); }
Paul's avatar
Paul committed
500
501

} // namespace cpu
Paul's avatar
Paul committed
502
} // namespace MIGRAPHX_INLINE_NS
Paul's avatar
Paul committed
503
} // namespace migraphx