lowering.cpp 16 KB
Newer Older
1
2
3
/*
 * The MIT License (MIT)
 *
4
 * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
Paul's avatar
Paul committed
24

Paul's avatar
Paul committed
25
#include <migraphx/cpu/lowering.hpp>
Artur Wojcik's avatar
Artur Wojcik committed
26
#if !defined(_MSC_VER)
Paul's avatar
Paul committed
27
28
#include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp>
29
#include <migraphx/op/identity.hpp>
Paul's avatar
Paul committed
30
#include <migraphx/op/convolution.hpp>
31
#include <migraphx/op/convolution_backwards.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
32
#include <migraphx/op/quant_convolution.hpp>
Paul's avatar
Paul committed
33
#include <migraphx/op/dot.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
34
#include <migraphx/op/quant_dot.hpp>
Paul's avatar
Paul committed
35
36
37
38
39
40
41
42
#include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/logsoftmax.hpp>
#include <migraphx/op/lrn.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/softmax.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
43
44
#include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
45
#include <migraphx/op/rnn_var_sl_last_output.hpp>
46
47
#include <migraphx/op/mod.hpp>
#include <migraphx/op/fmod.hpp>
Paul's avatar
Paul committed
48
49
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
Paul's avatar
Paul committed
50
#include <migraphx/par_dfor.hpp>
51
#include <migraphx/clamp.hpp>
52
#include <migraphx/cpu/context.hpp>
53
#include <migraphx/register_op.hpp>
54
#include <migraphx/make_op.hpp>
Artur Wojcik's avatar
Artur Wojcik committed
55
#endif
56
#include <migraphx/program.hpp>
Artur Wojcik's avatar
Artur Wojcik committed
57
#if !defined(_MSC_VER)
58
#include <migraphx/tune_axis.hpp>
59
60
61
62
#include <migraphx/match/layernorm.hpp>
#include <migraphx/match/gelu_erf.hpp>
#include <migraphx/match/gelu_tanh.hpp>
#include <migraphx/matcher.hpp>
Artur Wojcik's avatar
Artur Wojcik committed
63
#endif
Paul's avatar
Paul committed
64
#include <unordered_map>
Paul's avatar
Paul committed
65
#include <utility>
kahmed10's avatar
kahmed10 committed
66
#include <iostream>
Paul's avatar
Paul committed
67

Paul's avatar
Paul committed
68
namespace migraphx {
Paul's avatar
Paul committed
69
inline namespace MIGRAPHX_INLINE_NS {
Paul's avatar
Paul committed
70
71
namespace cpu {

Artur Wojcik's avatar
Artur Wojcik committed
72
#if !defined(_MSC_VER)
Paul's avatar
Paul committed
73
74
75
76
77
78
template <typename T>
T zero(const T&)
{
    return T(0);
}

Khalique's avatar
Khalique committed
79
80
81
82
template <class T>
typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::enable_if<true, T>>::
    type
    make_signed(T x)
Khalique's avatar
Khalique committed
83
84
85
86
{
    return x;
}

Scott Thornton's avatar
Scott Thornton committed
87
88
struct cpu_im2col
{
89
    op::im2col op;
Scott Thornton's avatar
Scott Thornton committed
90

91
92
93
94
95
96
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

Scott Thornton's avatar
Scott Thornton committed
97
    static std::string name() { return "cpu::im2col"; }
kahmed10's avatar
kahmed10 committed
98
99
100
101
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        return op.normalize_compute_shape(inputs);
    }
Scott Thornton's avatar
Scott Thornton committed
102

wsttiger's avatar
wsttiger committed
103
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
Scott Thornton's avatar
Scott Thornton committed
104
    {
Scott Thornton's avatar
Scott Thornton committed
105
        argument result{output_shape};
Scott Thornton's avatar
Scott Thornton committed
106
        auto input_shape   = args[0].get_shape();
Scott Thornton's avatar
Scott Thornton committed
107
108
        auto weights_shape = args[1].get_shape();
        visit_all(result, args[0])([&](auto col, auto input) {
Scott Thornton's avatar
Scott Thornton committed
109
110
            const std::size_t& height   = input_shape.lens()[2];
            const std::size_t& width    = input_shape.lens()[3];
Scott Thornton's avatar
Scott Thornton committed
111
112
113
            const std::size_t& channels = weights_shape.lens()[1];
            const std::size_t& kernel_h = weights_shape.lens()[2];
            const std::size_t& kernel_w = weights_shape.lens()[3];
Scott Thornton's avatar
Scott Thornton committed
114
115
            const std::size_t& pad_h    = op.padding[0];
            const std::size_t& pad_w    = op.padding[1];
Scott Thornton's avatar
Scott Thornton committed
116
117
118
            const std::size_t& stride_h = op.stride[0];
            const std::size_t& stride_w = op.stride[1];

Paul's avatar
Paul committed
119
120
            long kdiv2_h = long(kernel_h) / 2;
            long kdiv2_w = long(kernel_w) / 2;
Scott Thornton's avatar
Scott Thornton committed
121
            // calculate output sizes
Scott Thornton's avatar
Scott Thornton committed
122
123
            const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
            const std::size_t col_width  = (width - kernel_w + 2 * pad_w) / stride_w + 1;
wsttiger's avatar
wsttiger committed
124
            // account for padding for the starting position of the input pixels
Paul's avatar
Paul committed
125
            long iinput = kdiv2_h - long(pad_h);
wsttiger's avatar
wsttiger committed
126
            // loop over output pixels (ioutput, joutput)
Scott Thornton's avatar
Scott Thornton committed
127
128
            for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h)
            {
Paul's avatar
Paul committed
129
                long jinput = kdiv2_w - long(pad_w);
Scott Thornton's avatar
Scott Thornton committed
130
131
132
133
134
                for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w)
                {
                    // compute linear index for output
                    std::size_t ldx = ioutput * col_width + joutput;
                    std::size_t p   = 0;
wsttiger's avatar
wsttiger committed
135
136
137
                    dfor(channels,
                         kernel_h,
                         kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
Paul's avatar
Paul committed
138
139
                        auto idx    = iinput + long(koffset) - kdiv2_h;
                        auto jdx    = jinput + long(loffset) - kdiv2_w;
wsttiger's avatar
wsttiger committed
140
141
142
143
144
                        col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
                                          ? input(0, c, idx, jdx)
                                          : 0;
                        p++;
                    });
Scott Thornton's avatar
Scott Thornton committed
145
146
                }
            }
Scott Thornton's avatar
Scott Thornton committed
147
        });
Scott Thornton's avatar
Scott Thornton committed
148
149
150
        return result;
    }
};
151
MIGRAPHX_REGISTER_OP(cpu_im2col)
Scott Thornton's avatar
Scott Thornton committed
152

153
struct cpu_op
Paul's avatar
Paul committed
154
{
155
    operation op = op::identity{};
kahmed10's avatar
kahmed10 committed
156
157
158
159
160
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }
161
    std::string name() const { return "cpu::op"; }
Paul's avatar
Paul committed
162
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
Paul's avatar
Paul committed
163
    argument compute(context&, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
164
    {
Paul's avatar
Paul committed
165
        return op.compute(output_shape, args);
Paul's avatar
Paul committed
166
    }
167
168
169
170
171
172
173
174
175
176
177
    value to_value() const
    {
        value v;
        v["name"]     = op.name();
        v["operator"] = op.to_value();
        return v;
    }
    void from_value(const value& v)
    {
        op = make_op(v.at("name").to<std::string>(), v.at("operator"));
    }
178
    friend std::ostream& operator<<(std::ostream& os, const cpu_op& x)
Paul's avatar
Paul committed
179
    {
180
181
        os << "cpu::" << x.op;
        return os;
Paul's avatar
Paul committed
182
183
    }
};
184
MIGRAPHX_REGISTER_OP(cpu_op)
Paul's avatar
Paul committed
185

Khalique's avatar
Khalique committed
186
struct cpu_pad
187
{
Khalique's avatar
Khalique committed
188
    op::pad op;
189
190
191
192
193
194
195

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

kahmed10's avatar
kahmed10 committed
196
    std::string name() const { return "cpu::pad"; }
197
198
199
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
    {
Khalique's avatar
Khalique committed
200
        assert(output_shape.standard());
201
        argument result{output_shape};
202
203
204
205
        result.visit([&](auto output) {
            using type = typename decltype(output)::value_type;
            std::fill(output.begin(), output.end(), pad_clamp<type>(op.value));
        });
Khalique's avatar
Khalique committed
206
207

        visit_all(result, args[0])([&](auto output, auto input) {
208
            shape_for_each(input.get_shape(), [&](const auto& idx) {
Khalique's avatar
Khalique committed
209
                std::vector<std::size_t> new_idx(idx.size());
Khalique's avatar
Khalique committed
210
211
212
213
                std::transform(
                    idx.begin(), idx.end(), op.pads.begin(), new_idx.begin(), [](auto i, auto j) {
                        return i + j;
                    });
Khalique's avatar
Khalique committed
214
                output(new_idx.begin(), new_idx.end()) = input(idx.begin(), idx.end());
215
            });
Khalique's avatar
Khalique committed
216
217
        });

218
219
220
        return result;
    }
};
221
MIGRAPHX_REGISTER_OP(cpu_pad)
222

Shucai Xiao's avatar
Shucai Xiao committed
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
struct cpu_rnn_var_sl_last_output
{
    op::rnn_var_sl_last_output op;

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return migraphx::reflect(self.op, f);
    }

    std::string name() const { return "cpu::rnn_var_sl_last_output"; }

    shape compute_shape(std::vector<shape> inputs) const
    {
        return op.compute_shape(std::move(inputs));
    }

    argument compute(const shape& output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
        auto out_comp_lens = args[0].get_shape().lens();
        out_comp_lens[0]   = 1;
        shape out_comp_s{output_shape.type(), out_comp_lens};

        visit_all(result, args[0])([&](auto output, auto input) {
            args[1].visit([&](auto seq_lens) {
                par_for(output_shape.elements(), [&](auto i) {
                    auto idx = out_comp_s.multi(i);
                    auto b   = idx[2];
                    if(op.direction == op::rnn_direction::reverse or idx[1] == 1)
                    {
                        idx[0] = 0;
                    }
                    else
                    {
                        idx[0] = seq_lens[b] - 1;
                    }
                    output[i] = input(idx.begin(), idx.end());
                });
            });
        });

        return result;
    }
};
268
MIGRAPHX_REGISTER_OP(cpu_rnn_var_sl_last_output)
Shucai Xiao's avatar
Shucai Xiao committed
269

Paul's avatar
Paul committed
270
271
struct cpu_apply
{
Shucai Xiao's avatar
Shucai Xiao committed
272
    module* modl;
273
274
    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
    instruction_ref last{};
Paul's avatar
Paul committed
275

276
    void extend_op(const std::string& op_name, const std::string& cpu_name, bool allocate = true)
277
278
279
280
    {
        apply_map.emplace(op_name, [=](instruction_ref ins) {
            auto&& op = ins->get_operator();
            if(allocate)
281
                return replace(ins, make_op(cpu_name, op.to_value()));
Shucai Xiao's avatar
Shucai Xiao committed
282
            return modl->replace_instruction(ins, make_op(cpu_name, op.to_value()), ins->inputs());
283
284
285
        });
    }

286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
    void extend_dnnl_algos(const std::string& dnnl_name,
                           const std::vector<std::pair<std::string, std::string>>& algos)
    {
        for(auto&& pp : algos)
        {
            std::string op_name = pp.first;
            std::string algo    = pp.second;
            apply_map.emplace(op_name, [=](instruction_ref ins) {
                auto v = ins->get_operator().to_value();
                if(not v.is_object())
                    return ins;
                v["algo"] = algo;
                auto op   = make_op(dnnl_name, v);
                return replace(ins, op);
            });
        }
    }

304
305
306
307
308
309
310
311
312
    template <class M>
    auto fuse_match(M matcher, const operation& op, const std::vector<std::string>& bind_inputs)
    {
        return match::make_match_finder(matcher, [=](auto&, const auto& r) {
            auto ins = r.result;
            std::vector<instruction_ref> inputs;
            std::transform(bind_inputs.begin(),
                           bind_inputs.end(),
                           std::back_inserter(inputs),
313
                           [&](const auto& s) { return r.instructions[s]; });
314
            inputs.push_back(this->insert_allocation(ins, ins->get_shape()));
315
            modl->replace_instruction(ins, op, inputs);
316
317
318
        });
    }

Paul's avatar
Paul committed
319
320
    void init()
    {
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
        extend_dnnl_algos("dnnl::binary",
                          {
                              {"add", "binary_add"},
                              {"div", "binary_div"},
                              {"max", "binary_max"},
                              {"min", "binary_min"},
                              {"mul", "binary_mul"},
                          });

        extend_dnnl_algos("dnnl::eltwise",
                          {
                              {"abs", "eltwise_abs"},
                              {"elu", "eltwise_elu"},
                              {"exp", "eltwise_exp"},
                              {"log", "eltwise_log"},
                              {"relu", "eltwise_relu"},
                              {"sqrt", "eltwise_sqrt"},
                              {"tanh", "eltwise_tanh"},
                          });

        extend_dnnl_algos("dnnl::reduction",
                          {
                              {"reduce_max", "reduction_max"},
                              {"reduce_mean", "reduction_mean"},
                              {"reduce_min", "reduction_min"},
                              {"reduce_sum", "reduction_sum"},
                          });

        extend_op("concat", "dnnl::concat");
        extend_op("contiguous", "dnnl::reorder");
        extend_op("convolution", "dnnl::convolution");
352
#ifndef MIGRAPHX_ENABLE_ZENDNN
353
        extend_op("convolution_backwards", "dnnl::convolution_backwards");
354
        extend_op("dot", "dnnl::dot");
355
#endif
356
357
358
359
360
361
362
363
364
365
366
        extend_op("erf", "cpu::erf");
        extend_op("gather", "cpu::gather");
        extend_op("logsoftmax", "dnnl::logsoftmax");
        extend_op("lrn", "dnnl::lrn");
        extend_op("softmax", "dnnl::softmax");
        extend_op("sub", "cpu::sub");

        extend_op("im2col", "cpu::im2col", false);
        extend_op("leaky_relu", "cpu::leaky_relu", false);
        extend_op("pad", "cpu::pad", false);
        extend_op("rnn_var_sl_last_output", "cpu::rnn_var_sl_last_output", false);
Paul's avatar
Paul committed
367
368
369
370
371
    }

    void apply()
    {
        init();
372
373
374
375
376
377
378
379
380
        // Apply fusion matchers first
        match::find_matches(*modl,
                            fuse_match(match::gelu_erf(),
                                       make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_erf"}}),
                                       {"x"}),
                            fuse_match(match::gelu_tanh(),
                                       make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_tanh"}}),
                                       {"x"}),
                            fuse_match(match::layernorm(), make_op("dnnl::layernorm"), {"x"}));
381
        // Apply these operators first so the inputs can be const folded
Shucai Xiao's avatar
Shucai Xiao committed
382
        for(auto it : iterator_for(*modl))
Paul's avatar
Paul committed
383
        {
384
            if(it->name() == "pow")
385
            {
386
                apply_pow(it);
387
            }
388
389
390
391
        }
        for(auto it : iterator_for(*modl))
        {
            if(it->name() == "pooling")
Paul's avatar
Paul committed
392
393
394
            {
                apply_pooling(it);
            }
Paul's avatar
Paul committed
395
            else if(apply_map.count(it->name()) > 0)
Paul's avatar
Paul committed
396
            {
Paul's avatar
Paul committed
397
                apply_map.at(it->name())(it);
Paul's avatar
Paul committed
398
399
400
401
            }
        }
    }

402
    instruction_ref apply_pow(instruction_ref ins) const
403
    {
404
405
406
407
408
409
410
        auto beta = read_scalar<float>(ins->inputs()[1]);
        if(beta.empty())
            return ins;
        return replace(ins,
                       make_op("dnnl::eltwise",
                               {{"algo", "eltwise_pow"}, {"alpha", 1.0}, {"beta", beta.front()}}),
                       {ins->inputs().front()});
411
412
    }

413
    instruction_ref apply_pooling(instruction_ref ins) const
Paul's avatar
Paul committed
414
    {
415
416
417
418
419
420
        auto&& op = ins->get_operator();
        auto v    = op.to_value();
        if(has_op("dnnl::pooling") and ins->get_shape().type() == shape::type_t::float_type and
           not v["ceil_mode"].to<bool>())
            return replace(ins, make_op("dnnl::pooling", op.to_value()));
        return ins;
Paul's avatar
Paul committed
421
422
    }

423
424
425
426
427
428
429
430
431
432
433
434
435
    template <class T>
    static std::vector<T> read_scalar(instruction_ref ins)
    {
        if(ins->name() == "contiguous")
            return read_scalar<T>(ins->inputs().front());
        if(ins->get_shape().elements() != 1 and not ins->get_shape().scalar())
            return {};
        auto r = ins->eval();
        if(r.empty())
            return {};
        return {r.at<T>()};
    }

436
    instruction_ref replace(instruction_ref ins, const operation& op) const
Paul's avatar
Paul committed
437
    {
438
439
440
441
        return replace(ins, op, ins->inputs());
    }

    instruction_ref
442
    replace(instruction_ref ins, const operation& op, std::vector<instruction_ref> inputs) const
443
    {
444
        inputs.push_back(insert_allocation(ins, ins->get_shape()));
Shucai Xiao's avatar
Shucai Xiao committed
445
        return modl->replace_instruction(ins, op, inputs);
Paul's avatar
Paul committed
446
447
    }

448
    instruction_ref insert_allocation(instruction_ref ins, const shape& s) const
Paul's avatar
Paul committed
449
    {
450
        return modl->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}}));
Paul's avatar
Paul committed
451
452
    }
};
Artur Wojcik's avatar
Artur Wojcik committed
453
#endif
Paul's avatar
Paul committed
454

Artur Wojcik's avatar
Artur Wojcik committed
455
456
457
458
459
460
void lowering::apply(module& m) const
{
#if !defined(_MSC_VER)
    cpu_apply{&m}.apply();
#endif
}
Paul's avatar
Paul committed
461
462

} // namespace cpu
Paul's avatar
Paul committed
463
} // namespace MIGRAPHX_INLINE_NS
Paul's avatar
Paul committed
464
} // namespace migraphx