cpu_target.cpp 15.9 KB
Newer Older
Paul's avatar
Paul committed
1

Paul's avatar
Paul committed
2
3
4
5
6
#include <migraph/cpu/cpu_target.hpp>
#include <migraph/instruction.hpp>
#include <migraph/dfor.hpp>
#include <migraph/operators.hpp>
#include <migraph/shape_for_each.hpp>
Paul's avatar
Paul committed
7
#include <migraph/iterator_for.hpp>
Paul's avatar
Paul committed
8

Paul's avatar
Paul committed
9
namespace migraph {
Paul's avatar
Paul committed
10
namespace cpu {
Paul's avatar
Paul committed
11

12
template <typename T>
13
14
15
16
T zero(const T&)
{
    return T(0);
}
17

Paul's avatar
Paul committed
18
19
20
21
struct cpu_convolution
{
    convolution op;

Paul's avatar
Paul committed
22
23
    std::string name() const { return "cpu::convolution"; }
    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
Paul's avatar
Paul committed
24
    argument compute(context&, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
25
    {
Paul's avatar
Paul committed
26
        argument result{output_shape};
Paul's avatar
Paul committed
27
28
29
        visit_all(result, args[0], args[1])([&](auto output, auto input, auto weights) {
            auto in_h = input.get_shape().lens()[2];
            auto in_w = input.get_shape().lens()[3];
Paul's avatar
Paul committed
30

Paul's avatar
Paul committed
31
32
33
            auto wei_c = weights.get_shape().lens()[1];
            auto wei_h = weights.get_shape().lens()[2];
            auto wei_w = weights.get_shape().lens()[3];
Paul's avatar
Paul committed
34

Paul's avatar
Paul committed
35
36
37
38
            dfor(output_shape.lens()[0],
                 output_shape.lens()[1],
                 output_shape.lens()[2],
                 output_shape.lens()[3])(
Paul's avatar
Paul committed
39
40
41
                [&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
                    const int start_x = i * op.stride[0] - op.padding[0];
                    const int start_y = j * op.stride[1] - op.padding[1];
Paul's avatar
Paul committed
42

Paul's avatar
Paul committed
43
44
45
46
47
48
49
50
51
52
                    double acc = 0;
                    dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
                        const int in_x = start_x + x;
                        const int in_y = start_y + y;
                        if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
                        {
                            acc += input(o, k, in_x, in_y) * weights(w, k, x, y);
                        }
                    });
                    output(o, w, i, j) = acc;
Paul's avatar
Paul committed
53
54
55
56
57
58
                });
        });
        return result;
    }
};

Paul's avatar
Paul committed
59
60
61
struct max_pool
{
    static std::string name() { return "max"; }
Paul's avatar
Paul committed
62
    static double start() { return std::numeric_limits<double>::lowest(); }
Paul's avatar
Paul committed
63

Paul's avatar
Paul committed
64
65
66
67
68
    static double apply(double x, double y)
    {
        double m = std::max(x, y);
        return (m);
    }
Paul's avatar
Paul committed
69

Paul's avatar
Paul committed
70
    static double final(double x, double) { return (x); }
Paul's avatar
Paul committed
71
72
73
74
75
};

struct avg_pool
{
    static std::string name() { return "average"; }
Paul's avatar
Paul committed
76
    static double start() { return 0.0; }
Paul's avatar
Paul committed
77

Paul's avatar
Paul committed
78
    static double apply(double x, double y) { return x + y; }
Paul's avatar
Paul committed
79

Paul's avatar
Paul committed
80
    static double final(double x, double y) { return x / y; }
Paul's avatar
Paul committed
81
82
};

Paul's avatar
Paul committed
83
template <class Op>
Paul's avatar
Paul committed
84
85
86
87
88
89
struct cpu_pooling
{
    pooling op;

    std::string name() const { return "cpu::pooling_" + Op::name(); }
    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
Paul's avatar
Paul committed
90
    argument compute(context&, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
91
92
93
94
    {
        argument result{output_shape};
        visit_all(result, args[0])([&](auto output, auto input) {
            using type = typename decltype(output)::value_type;
Paul's avatar
Paul committed
95
96
            auto in_h  = input.get_shape().lens()[2];
            auto in_w  = input.get_shape().lens()[3];
Paul's avatar
Paul committed
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

            dfor(output_shape.lens()[0],
                 output_shape.lens()[1],
                 output_shape.lens()[2],
                 output_shape.lens()[3])(
                [&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
                    const int start_x0 = i * op.stride[0] - op.padding[0];
                    const int start_y0 = j * op.stride[1] - op.padding[1];

                    const int hend = std::min(start_x0 + op.lengths[0], in_h);
                    const int wend = std::min(start_y0 + op.lengths[1], in_w);

                    const int start_x = std::max(start_x0, 0);
                    const int start_y = std::max(start_y0, 0);

                    const int w_h       = (hend - start_x);
                    const int w_w       = (wend - start_y);
                    const int pool_size = std::max(w_h * w_w, 1);

                    double acc = Op::start();
                    dfor(w_h, w_w)([&](int x, int y) {
                        const int in_x = start_x + x;
                        const int in_y = start_y + y;
                        if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
                        {
                            acc = Op::apply(acc, input(o, w, in_x, in_y));
                        }
                    });
                    output(o, w, i, j) = type(Op::final(acc, pool_size));
                });
        });
        return result;
    }
};

132
133
134
struct cpu_transpose
{
    transpose op;
Paul's avatar
Paul committed
135
136

    std::string name() const { return "cpu::transpose"; }
137
    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
Paul's avatar
Paul committed
138
    argument compute(context&, shape output_shape, std::vector<argument> args) const
139
140
141
142
143
144
145
146
147
    {
        return {output_shape, std::move(args.front().data)};
    }
};

struct cpu_contiguous
{
    contiguous op;
    std::string name() const { return "cpu::contiguous"; }
Paul's avatar
Paul committed
148
    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
Paul's avatar
Paul committed
149
    argument compute(context&, shape output_shape, std::vector<argument> args) const
150
151
152
    {
        argument result{output_shape};
        visit_all(result, args[0])([&](auto output, auto input) {
Paul's avatar
Paul committed
153
154
155
            shape_for_each(output.get_shape(), [&](const auto& idx) {
                output(idx.begin(), idx.end()) = input(idx.begin(), idx.end());
            });
156
        });
157
        return result;
158
    }
159
};
160

161
162
struct cpu_reshape
{
163
    reshape op;
164
    std::string name() const { return "cpu::reshape"; }
165
    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
166

Paul's avatar
Paul committed
167
    argument compute(context&, shape output_shape, std::vector<argument> args) const
168
169
170
171
172
    {
        return {output_shape, std::move(args.front().data)};
    }
};

173
174
175
176
struct cpu_gemm
{
    gemm op;
    std::string name() const { return "cpu::gemm"; }
177
    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
178

Paul's avatar
Paul committed
179
    argument compute(context&, shape output_shape, std::vector<argument> args) const
180
    {
181
        argument result{output_shape};
Scott Thornton's avatar
Scott Thornton committed
182
183
184
185
186
187
188
189
        visit_all(result, args[0], args[1])([&](auto cmat, auto amat, auto bmat) {
            auto m = amat.get_shape().lens()[0];
            auto n = bmat.get_shape().lens()[1];
            auto k = bmat.get_shape().lens()[0];

            auto a = amat.data();
            auto b = bmat.data();
            auto c = cmat.data();
190
191
192
193
194
195
            for(int ii = 0; ii < m; ii++)
            {
                for(int jj = 0; jj < n; jj++)
                {
                    c[ii * n + jj] = 0;
                }
196
            }
197
198
199
200
201
202
203
204
205
206
207
            for(int ii = 0; ii < m; ii++)
            {
                for(int kk = 0; kk < k; kk++)
                {
                    auto aik  = a[ii * k + kk];
                    auto* bkj = &b[kk * n];
                    auto* cij = &c[ii * n];
                    for(int jj = 0; jj < n; jj++, cij++, bkj++)
                    {
                        *cij += aik * (*bkj);
                    }
208
209
210
                }
            }
        });
211
        return result;
212
213
214
    }
};

215
struct identity_op
Paul's avatar
Paul committed
216
{
217
218
219
220
221
    std::string name() const { return "cpu::identity"; }
    auto fcn() const
    {
        return [](auto x) { return x; };
    }
222
};
Paul's avatar
Paul committed
223

224
struct abs_op
225
{
226
227
228
229
230
    std::string name() const { return "cpu::abs"; }
    auto fcn() const
    {
        return [](auto x) { return std::abs(x); };
    }
231
232
};

233
struct exp_op
234
{
235
236
237
238
239
    std::string name() const { return "cpu::exp"; }
    auto fcn() const
    {
        return [](auto x) { return std::exp(x); };
    }
240
241
};

242
struct sin_op
243
{
244
245
246
247
248
    std::string name() const { return "cpu::sin"; }
    auto fcn() const
    {
        return [](auto x) { return std::sin(x); };
    }
249
250
};

251
struct cos_op
252
{
253
254
255
256
257
    std::string name() const { return "cpu::cos"; }
    auto fcn() const
    {
        return [](auto x) { return std::cos(x); };
    }
258
259
};

260
struct tan_op
261
{
262
263
264
265
266
    std::string name() const { return "cpu::tan"; }
    auto fcn() const
    {
        return [](auto x) { return std::tan(x); };
    }
267
268
};

269
struct asin_op
270
{
271
272
273
274
275
    std::string name() const { return "cpu::asin"; }
    auto fcn() const
    {
        return [](auto x) { return std::asin(x); };
    }
276
277
};

278
struct acos_op
279
{
280
281
282
283
284
    std::string name() const { return "cpu::acos"; }
    auto fcn() const
    {
        return [](auto x) { return std::acos(x); };
    }
285
286
};

287
struct atan_op
288
{
289
290
291
292
293
    std::string name() const { return "cpu::atan"; }
    auto fcn() const
    {
        return [](auto x) { return std::atan(x); };
    }
294
295
296
297
};

struct tanh_op
{
298
299
300
301
302
    std::string name() const { return "cpu::tanh"; }
    auto fcn() const
    {
        return [](auto x) { return std::tanh(x); };
    }
303
304
305
306
};

struct sigmoid_op
{
307
308
309
310
311
    std::string name() const { return "cpu::sigmoid"; }
    auto fcn() const
    {
        return [](auto x) { return 1.f / (1.f + std::exp(-x)); };
    }
312
313
314
315
};

struct neg_op
{
316
317
318
319
320
    std::string name() const { return "cpu::neg"; }
    auto fcn() const
    {
        return [](auto x) { return -x; };
    }
321
322
323
324
};

struct relu_op
{
325
326
327
328
329
    std::string name() const { return "cpu::relu"; }
    auto fcn() const
    {
        return [](auto x) { return x > 0 ? x : 0; };
    }
330
331
332
333
334
};

template <typename Op>
struct cpu_unary
{
335
336
337
    Op op;
    std::string name() const { return op.name(); }
    shape compute_shape(std::vector<shape> inputs) const { return inputs.front(); }
Paul's avatar
Paul committed
338
    argument compute(context&, shape output_shape, std::vector<argument> args) const
339
340
341
342
343
344
345
346
347
    {
        argument result{output_shape};
        result.visit([&](auto output) {
            args[0].visit([&](auto input) {
                std::transform(input.begin(), input.end(), output.begin(), op.fcn());
            });
        });
        return result;
    }
348
349
};

350
struct softmax2d
351
{
352
353
    std::string name() const { return "cpu::softmax2d"; }
    shape compute_shape(std::vector<shape> inputs) const { return inputs.front(); }
Paul's avatar
Paul committed
354
    argument compute(context&, shape output_shape, std::vector<argument> args) const
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
    {
        argument result{output_shape};
        visit_all(result, args[0])([&](auto output, auto input) {
            using value_type = typename decltype(input)::value_type;
            auto nb          = input.get_shape().lens()[0];
            auto nc          = input.get_shape().lens()[1];
            auto nh          = input.get_shape().lens()[2];
            auto nw          = input.get_shape().lens()[3];
            dfor(nb, nh, nw)([&](std::size_t b, std::size_t i, std::size_t j) {
                value_type cmax = std::numeric_limits<value_type>::lowest();
                for(int c = 0; c < nc; c++)
                {
                    cmax = std::max(cmax, input(b, c, i, j));
                }
                for(int c = 0; c < nc; c++)
                {
                    output(b, c, i, j) = std::exp(input(b, c, i, j) - cmax);
                }
                value_type sum = value_type(0);
                for(int c = 0; c < nc; c++)
                {
                    sum += output(b, c, i, j);
                }
                for(int c = 0; c < nc; c++)
                {
                    output(b, c, i, j) = output(b, c, i, j) / sum;
                }
            });
        });
        return result;
    }
386
387
388
389
390
};

struct add_op
{
    std::string name() const { return "add"; }
391
392
393
394
    auto fcn() const
    {
        return [](auto x, auto y) { return x + y; };
    }
395
396
397
398
399
};

struct sub_op
{
    std::string name() const { return "sub"; }
400
401
402
403
    auto fcn() const
    {
        return [](auto x, auto y) { return x - y; };
    }
404
405
406
407
408
};

struct mul_op
{
    std::string name() const { return "mul"; }
409
410
411
412
    auto fcn() const
    {
        return [](auto x, auto y) { return x * y; };
    }
413
414
415
416
417
};

struct div_op
{
    std::string name() const { return "div"; }
418
419
420
421
    auto fcn() const
    {
        return [](auto x, auto y) { return x / y; };
    }
422
423
424
425
426
};

template <typename Op>
struct cpu_binary
{
427
428
429
    Op op;
    std::string name() const { return op.name(); }
    shape compute_shape(std::vector<shape> inputs) const { return inputs.front(); }
Paul's avatar
Paul committed
430
    argument compute(context&, shape output_shape, std::vector<argument> args) const
431
432
433
    {
        argument result{output_shape};
        visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) {
Paul's avatar
Paul committed
434
435
436
437
438
439
440
            if(input1.get_shape().packed() and input2.get_shape().packed())
            {
                std::transform(
                    input1.begin(), input1.end(), input2.begin(), output.begin(), op.fcn());
            }
            else
            {
Paul's avatar
Paul committed
441
                shape_for_each(output.get_shape(), [&](const auto& idx) {
Paul's avatar
Paul committed
442
443
                    output(idx.begin(), idx.end()) =
                        op.fcn()(input1(idx.begin(), idx.end()), input2(idx.begin(), idx.end()));
Paul's avatar
Paul committed
444
445
                });
            }
446
447
448
        });
        return result;
    }
Paul's avatar
Paul committed
449
450
451
452
};

struct cpu_apply
{
Paul's avatar
Paul committed
453
    program* prog;
Paul's avatar
Paul committed
454
    std::unordered_map<std::string, std::function<void(instruction_ref)>> apply_map{};
Paul's avatar
Paul committed
455

Paul's avatar
Paul committed
456
    template <class T>
Paul's avatar
Paul committed
457
    auto simple_op()
Paul's avatar
Paul committed
458
    {
Paul's avatar
Paul committed
459
        return [this](instruction_ref ins) { apply_simple_op<T>(ins); };
Paul's avatar
Paul committed
460
461
    }

Paul's avatar
Paul committed
462
    template <class T, class Op>
Paul's avatar
Paul committed
463
    auto extend_op()
Paul's avatar
Paul committed
464
    {
Paul's avatar
Paul committed
465
        return [this](instruction_ref ins) { apply_extend_op<T, Op>(ins); };
Paul's avatar
Paul committed
466
467
    }

Paul's avatar
Paul committed
468
    void init()
469
    {
Paul's avatar
Paul committed
470
        apply_map["convolution"] = extend_op<cpu_convolution, convolution>();
Paul's avatar
Paul committed
471
472
473
474
475
        apply_map["gemm"]        = extend_op<cpu_gemm, gemm>();
        apply_map["reshape"]     = extend_op<cpu_reshape, reshape>();
        apply_map["contiguous"]  = extend_op<cpu_contiguous, contiguous>();
        apply_map["transpose"]   = extend_op<cpu_transpose, transpose>();

Paul's avatar
Paul committed
476
        apply_map["identity"] = simple_op<cpu_unary<identity_op>>();
Paul's avatar
Paul committed
477
478
479
480
481
482
483
        apply_map["tanh"]     = simple_op<cpu_unary<tanh_op>>();
        apply_map["sigmoid"]  = simple_op<cpu_unary<sigmoid_op>>();
        apply_map["exp"]      = simple_op<cpu_unary<exp_op>>();
        apply_map["neg"]      = simple_op<cpu_unary<neg_op>>();
        apply_map["sin"]      = simple_op<cpu_unary<sin_op>>();
        apply_map["cos"]      = simple_op<cpu_unary<cos_op>>();
        apply_map["tan"]      = simple_op<cpu_unary<tan_op>>();
Paul's avatar
Paul committed
484
        apply_map["add"]      = simple_op<cpu_binary<add_op>>();
Paul's avatar
Paul committed
485
486
487
        apply_map["sub"]      = simple_op<cpu_binary<sub_op>>();
        apply_map["mul"]      = simple_op<cpu_binary<mul_op>>();
        apply_map["div"]      = simple_op<cpu_binary<div_op>>();
Paul's avatar
Paul committed
488
489

        apply_map["softmax"] = simple_op<softmax2d>();
490
    }
Paul's avatar
Paul committed
491
492
493

    void apply()
    {
Paul's avatar
Paul committed
494
        init();
Paul's avatar
Paul committed
495
        for(auto it : iterator_for(*prog))
Paul's avatar
Paul committed
496
        {
Paul's avatar
Paul committed
497
            if(it->op.name() == "activation")
Paul's avatar
Paul committed
498
            {
Paul's avatar
Paul committed
499
500
                apply_activation(it);
            }
Paul's avatar
Paul committed
501
502
503
504
            else if(it->op.name() == "pooling")
            {
                apply_pooling(it);
            }
Paul's avatar
Paul committed
505
            else if(apply_map.count(it->op.name()) > 0)
506
            {
Paul's avatar
Paul committed
507
                apply_map.at(it->op.name())(it);
508
            }
Paul's avatar
Paul committed
509
510
511
        }
    }

Paul's avatar
Paul committed
512
    template <class T>
Paul's avatar
Paul committed
513
    void apply_simple_op(instruction_ref ins)
Paul's avatar
Paul committed
514
    {
Paul's avatar
Paul committed
515
        prog->replace_instruction(ins, T{}, ins->arguments);
Paul's avatar
Paul committed
516
517
    }

Paul's avatar
Paul committed
518
    template <class T, class Op>
Paul's avatar
Paul committed
519
    void apply_extend_op(instruction_ref ins)
520
    {
Paul's avatar
Paul committed
521
522
        auto&& op = any_cast<Op>(ins->op);
        prog->replace_instruction(ins, T{op}, ins->arguments);
523
524
    }

Paul's avatar
Paul committed
525
526
527
528
    void apply_activation(instruction_ref ins)
    {
        auto&& op = any_cast<activation>(ins->op);
        if(op.mode == "relu")
529
            prog->replace_instruction(ins, cpu_unary<relu_op>{}, ins->arguments);
Paul's avatar
Paul committed
530
    }
531

Paul's avatar
Paul committed
532
    void apply_pooling(instruction_ref ins)
533
    {
Paul's avatar
Paul committed
534
535
536
537
538
        auto&& op = any_cast<pooling>(ins->op);
        if(op.mode == "max")
            prog->replace_instruction(ins, cpu_pooling<max_pool>{op}, ins->arguments);
        else if(op.mode == "average")
            prog->replace_instruction(ins, cpu_pooling<avg_pool>{op}, ins->arguments);
539
    }
Paul's avatar
Paul committed
540
541
};

Paul's avatar
Paul committed
542
543
544
545
struct cpu_pass
{
    std::string name() const { return "cpu::pass"; }

Paul's avatar
Paul committed
546
    void apply(program& p) const { cpu_apply{&p}.apply(); }
Paul's avatar
Paul committed
547
548
};

Paul's avatar
Paul committed
549
std::string cpu_target::name() const { return "cpu"; }
Paul's avatar
Paul committed
550

Paul's avatar
Paul committed
551
std::vector<pass> cpu_target::get_passes(context&) const { return {cpu_pass{}}; }
Paul's avatar
Paul committed
552
553
554

} // namespace cpu

Paul's avatar
Paul committed
555
} // namespace migraph