cpu_target.cpp 20 KB
Newer Older
Paul's avatar
Paul committed
1
2
3
4
5
6

#include <rtg/cpu/cpu_target.hpp>
#include <rtg/instruction.hpp>
#include <rtg/dfor.hpp>
#include <rtg/operators.hpp>

Paul's avatar
Paul committed
7
8
namespace rtg {
namespace cpu {
Paul's avatar
Paul committed
9

10
template <typename T>
11
12
13
14
T zero(const T&)
{
    return T(0);
}
15

Paul's avatar
Paul committed
16
17
18
19
struct cpu_convolution
{
    convolution op;

Paul's avatar
Paul committed
20
21
    std::string name() const { return "cpu::convolution"; }
    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
Paul's avatar
Paul committed
22
    argument compute(shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
23
    {
Paul's avatar
Paul committed
24
        argument result{output_shape};
Paul's avatar
Paul committed
25
26
27
        visit_all(result, args[0], args[1])([&](auto output, auto input, auto weights) {
            auto in_h = input.get_shape().lens()[2];
            auto in_w = input.get_shape().lens()[3];
Paul's avatar
Paul committed
28

Paul's avatar
Paul committed
29
30
31
            auto wei_c = weights.get_shape().lens()[1];
            auto wei_h = weights.get_shape().lens()[2];
            auto wei_w = weights.get_shape().lens()[3];
Paul's avatar
Paul committed
32

Paul's avatar
Paul committed
33
34
35
36
            dfor(output_shape.lens()[0],
                 output_shape.lens()[1],
                 output_shape.lens()[2],
                 output_shape.lens()[3])(
Paul's avatar
Paul committed
37
38
39
                [&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
                    const int start_x = i * op.stride[0] - op.padding[0];
                    const int start_y = j * op.stride[1] - op.padding[1];
Paul's avatar
Paul committed
40

Paul's avatar
Paul committed
41
42
43
44
45
46
47
48
49
50
                    double acc = 0;
                    dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
                        const int in_x = start_x + x;
                        const int in_y = start_y + y;
                        if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
                        {
                            acc += input(o, k, in_x, in_y) * weights(w, k, x, y);
                        }
                    });
                    output(o, w, i, j) = acc;
Paul's avatar
Paul committed
51
52
53
54
55
56
                });
        });
        return result;
    }
};

Paul's avatar
Paul committed
57
58
59
struct max_pool
{
    static std::string name() { return "max"; }
Paul's avatar
Paul committed
60
    static double start() { return std::numeric_limits<double>::lowest(); }
Paul's avatar
Paul committed
61

Paul's avatar
Paul committed
62
    static double apply(double x, double y) { return x + y; }
Paul's avatar
Paul committed
63

Paul's avatar
Paul committed
64
    static double final(double x, double) { return (x); }
Paul's avatar
Paul committed
65
66
67
68
69
};

struct avg_pool
{
    static std::string name() { return "average"; }
Paul's avatar
Paul committed
70
    static double start() { return 0.0; }
Paul's avatar
Paul committed
71
72
73
74
75
76
77

    static double apply(double x, double y)
    {
        double m = std::max(x, y);
        return (m);
    }

Paul's avatar
Paul committed
78
    static double final(double x, double y) { return x / y; }
Paul's avatar
Paul committed
79
80
};

Paul's avatar
Paul committed
81
template <class Op>
Paul's avatar
Paul committed
82
83
84
85
86
87
88
89
90
91
92
struct cpu_pooling
{
    pooling op;

    std::string name() const { return "cpu::pooling_" + Op::name(); }
    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
    argument compute(shape output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
        visit_all(result, args[0])([&](auto output, auto input) {
            using type = typename decltype(output)::value_type;
Paul's avatar
Paul committed
93
94
            auto in_h  = input.get_shape().lens()[2];
            auto in_w  = input.get_shape().lens()[3];
Paul's avatar
Paul committed
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

            dfor(output_shape.lens()[0],
                 output_shape.lens()[1],
                 output_shape.lens()[2],
                 output_shape.lens()[3])(
                [&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
                    const int start_x0 = i * op.stride[0] - op.padding[0];
                    const int start_y0 = j * op.stride[1] - op.padding[1];

                    const int hend = std::min(start_x0 + op.lengths[0], in_h);
                    const int wend = std::min(start_y0 + op.lengths[1], in_w);

                    const int start_x = std::max(start_x0, 0);
                    const int start_y = std::max(start_y0, 0);

                    const int w_h       = (hend - start_x);
                    const int w_w       = (wend - start_y);
                    const int pool_size = std::max(w_h * w_w, 1);

                    double acc = Op::start();
                    dfor(w_h, w_w)([&](int x, int y) {
                        const int in_x = start_x + x;
                        const int in_y = start_y + y;
                        if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
                        {
                            acc = Op::apply(acc, input(o, w, in_x, in_y));
                        }
                    });
                    output(o, w, i, j) = type(Op::final(acc, pool_size));
                });
        });
        return result;
    }
};

130
131
132
struct cpu_transpose
{
    transpose op;
Paul's avatar
Paul committed
133
134

    std::string name() const { return "cpu::transpose"; }
135
136
    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
    argument compute(shape output_shape, std::vector<argument> args) const
137
138
139
140
141
142
143
144
145
    {
        return {output_shape, std::move(args.front().data)};
    }
};

struct cpu_contiguous
{
    contiguous op;
    std::string name() const { return "cpu::contiguous"; }
Paul's avatar
Paul committed
146
    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
147
    argument compute(shape output_shape, std::vector<argument> args) const
148
149
150
    {
        argument result{output_shape};
        visit_all(result, args[0])([&](auto output, auto input) {
151
            auto input_shape = args[0].get_shape();
Paul's avatar
Paul committed
152
            auto ndim        = output_shape.lens().size();
153
            using value_type = typename decltype(input)::value_type;
Paul's avatar
Paul committed
154
155
156
157
158
            value_type* ptr  = static_cast<value_type*>(output.data());
            if(ndim == 2)
            {
                dfor(input_shape.lens()[0], input_shape.lens()[1])(
                    [&](std::size_t i0, std::size_t i1) { *ptr++ = input(i0, i1); });
159
            }
Paul's avatar
Paul committed
160
161
162
            else if(ndim == 3)
            {
                dfor(input_shape.lens()[0], input_shape.lens()[1], input_shape.lens()[2])(
163
                    [&](std::size_t i0, std::size_t i1, std::size_t i2) {
Paul's avatar
Paul committed
164
                        *ptr++ = input(i0, i1, i2);
165
166
                    });
            }
Paul's avatar
Paul committed
167
168
            else if(ndim == 4)
            {
169
170
171
172
173
                dfor(input_shape.lens()[0],
                     input_shape.lens()[1],
                     input_shape.lens()[2],
                     input_shape.lens()[3])(
                    [&](std::size_t i0, std::size_t i1, std::size_t i2, std::size_t i3) {
Paul's avatar
Paul committed
174
                        *ptr++ = input(i0, i1, i2, i3);
175
176
                    });
            }
Paul's avatar
Paul committed
177
178
            else if(ndim == 5)
            {
179
180
181
182
183
                dfor(input_shape.lens()[0],
                     input_shape.lens()[1],
                     input_shape.lens()[2],
                     input_shape.lens()[3],
                     input_shape.lens()[4])(
Paul's avatar
Paul committed
184
185
186
187
188
                    [&](std::size_t i0,
                        std::size_t i1,
                        std::size_t i2,
                        std::size_t i3,
                        std::size_t i4) { *ptr++ = input(i0, i1, i2, i3, i4); });
189
            }
Paul's avatar
Paul committed
190
191
            else if(ndim == 6)
            {
192
193
194
195
196
197
                dfor(input_shape.lens()[0],
                     input_shape.lens()[1],
                     input_shape.lens()[2],
                     input_shape.lens()[3],
                     input_shape.lens()[4],
                     input_shape.lens()[5])(
Paul's avatar
Paul committed
198
199
200
201
202
203
                    [&](std::size_t i0,
                        std::size_t i1,
                        std::size_t i2,
                        std::size_t i3,
                        std::size_t i4,
                        std::size_t i5) { *ptr++ = input(i0, i1, i2, i3, i4, i5); });
204
            }
205
        });
206
        return result;
207
    }
208
};
209

210
211
struct cpu_reshape
{
212
    reshape op;
213
    std::string name() const { return "cpu::reshape"; }
214
    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
215

216
    argument compute(shape output_shape, std::vector<argument> args) const
217
218
219
220
221
    {
        return {output_shape, std::move(args.front().data)};
    }
};

222
223
224
225
struct cpu_gemm
{
    gemm op;
    std::string name() const { return "cpu::gemm"; }
226
    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
227

228
    argument compute(shape output_shape, std::vector<argument> args) const
229
    {
230
        argument result{output_shape};
Scott Thornton's avatar
Scott Thornton committed
231
232
233
234
235
236
237
238
        visit_all(result, args[0], args[1])([&](auto cmat, auto amat, auto bmat) {
            auto m = amat.get_shape().lens()[0];
            auto n = bmat.get_shape().lens()[1];
            auto k = bmat.get_shape().lens()[0];

            auto a = amat.data();
            auto b = bmat.data();
            auto c = cmat.data();
239
240
241
242
243
244
            for(int ii = 0; ii < m; ii++)
            {
                for(int jj = 0; jj < n; jj++)
                {
                    c[ii * n + jj] = 0;
                }
245
            }
246
247
248
249
250
251
252
253
254
255
256
            for(int ii = 0; ii < m; ii++)
            {
                for(int kk = 0; kk < k; kk++)
                {
                    auto aik  = a[ii * k + kk];
                    auto* bkj = &b[kk * n];
                    auto* cij = &c[ii * n];
                    for(int jj = 0; jj < n; jj++, cij++, bkj++)
                    {
                        *cij += aik * (*bkj);
                    }
257
258
259
                }
            }
        });
260
        return result;
261
262
263
    }
};

264
struct identity_op
Paul's avatar
Paul committed
265
{
266
267
268
269
270
    std::string name() const { return "cpu::identity"; }
    auto fcn() const
    {
        return [](auto x) { return x; };
    }
271
};
Paul's avatar
Paul committed
272

273
struct abs_op
274
{
275
276
277
278
279
    std::string name() const { return "cpu::abs"; }
    auto fcn() const
    {
        return [](auto x) { return std::abs(x); };
    }
280
281
};

282
struct exp_op
283
{
284
285
286
287
288
    std::string name() const { return "cpu::exp"; }
    auto fcn() const
    {
        return [](auto x) { return std::exp(x); };
    }
289
290
};

291
struct sin_op
292
{
293
294
295
296
297
    std::string name() const { return "cpu::sin"; }
    auto fcn() const
    {
        return [](auto x) { return std::sin(x); };
    }
298
299
};

300
struct cos_op
301
{
302
303
304
305
306
    std::string name() const { return "cpu::cos"; }
    auto fcn() const
    {
        return [](auto x) { return std::cos(x); };
    }
307
308
};

309
struct tan_op
310
{
311
312
313
314
315
    std::string name() const { return "cpu::tan"; }
    auto fcn() const
    {
        return [](auto x) { return std::tan(x); };
    }
316
317
};

318
struct asin_op
319
{
320
321
322
323
324
    std::string name() const { return "cpu::asin"; }
    auto fcn() const
    {
        return [](auto x) { return std::asin(x); };
    }
325
326
};

327
struct acos_op
328
{
329
330
331
332
333
    std::string name() const { return "cpu::acos"; }
    auto fcn() const
    {
        return [](auto x) { return std::acos(x); };
    }
334
335
};

336
struct atan_op
337
{
338
339
340
341
342
    std::string name() const { return "cpu::atan"; }
    auto fcn() const
    {
        return [](auto x) { return std::atan(x); };
    }
343
344
345
346
};

struct tanh_op
{
347
348
349
350
351
    std::string name() const { return "cpu::tanh"; }
    auto fcn() const
    {
        return [](auto x) { return std::tanh(x); };
    }
352
353
354
355
};

struct sigmoid_op
{
356
357
358
359
360
    std::string name() const { return "cpu::sigmoid"; }
    auto fcn() const
    {
        return [](auto x) { return 1.f / (1.f + std::exp(-x)); };
    }
361
362
363
364
};

struct neg_op
{
365
366
367
368
369
    std::string name() const { return "cpu::neg"; }
    auto fcn() const
    {
        return [](auto x) { return -x; };
    }
370
371
372
373
};

struct relu_op
{
374
375
376
377
378
    std::string name() const { return "cpu::relu"; }
    auto fcn() const
    {
        return [](auto x) { return x > 0 ? x : 0; };
    }
379
380
381
382
383
};

template <typename Op>
struct cpu_unary
{
384
385
386
387
388
389
390
391
392
393
394
395
396
    Op op;
    std::string name() const { return op.name(); }
    shape compute_shape(std::vector<shape> inputs) const { return inputs.front(); }
    argument compute(shape output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
        result.visit([&](auto output) {
            args[0].visit([&](auto input) {
                std::transform(input.begin(), input.end(), output.begin(), op.fcn());
            });
        });
        return result;
    }
397
398
};

399
struct softmax2d
400
{
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
    std::string name() const { return "cpu::softmax2d"; }
    shape compute_shape(std::vector<shape> inputs) const { return inputs.front(); }
    argument compute(shape output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
        visit_all(result, args[0])([&](auto output, auto input) {
            using value_type = typename decltype(input)::value_type;
            auto nb          = input.get_shape().lens()[0];
            auto nc          = input.get_shape().lens()[1];
            auto nh          = input.get_shape().lens()[2];
            auto nw          = input.get_shape().lens()[3];
            dfor(nb, nh, nw)([&](std::size_t b, std::size_t i, std::size_t j) {
                value_type cmax = std::numeric_limits<value_type>::lowest();
                for(int c = 0; c < nc; c++)
                {
                    cmax = std::max(cmax, input(b, c, i, j));
                }
                for(int c = 0; c < nc; c++)
                {
                    output(b, c, i, j) = std::exp(input(b, c, i, j) - cmax);
                }
                value_type sum = value_type(0);
                for(int c = 0; c < nc; c++)
                {
                    sum += output(b, c, i, j);
                }
                for(int c = 0; c < nc; c++)
                {
                    output(b, c, i, j) = output(b, c, i, j) / sum;
                }
            });
        });
        return result;
    }
435
436
};

437
438
439
440
441
442
443
444
445
446
struct add_with_broadcast
{
    add op;
    std::string name() const { return "add_with_broadcast"; }
    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
    argument compute(shape output_shape, std::vector<argument> args) const
    {
        size_t ndims = output_shape.lens().size();
        argument result{output_shape};
        visit_all(result, args[0], args[1])([&](auto output, auto input0, auto input1) {
Scott Thornton's avatar
Scott Thornton committed
447
            if(ndims == 0)
448
449
450
            {
                output(0) = input0(0) + input1(0);
            }
Scott Thornton's avatar
Scott Thornton committed
451
            if(ndims == 1)
452
            {
Scott Thornton's avatar
Scott Thornton committed
453
                for(size_t i = 0; i < output_shape.lens()[0]; i++)
454
455
456
457
                {
                    output(i) = input0(i) + input1(i);
                }
            }
Scott Thornton's avatar
Scott Thornton committed
458
            else if(ndims == 2)
459
460
            {
                dfor(output_shape.lens()[0],
Scott Thornton's avatar
Scott Thornton committed
461
462
                     output_shape.lens()[1])([&](std::size_t i0, std::size_t i1) {
                    output(i0, i1) = input0(i0, i1) + input1(i0, i1);
463
464
                });
            }
Scott Thornton's avatar
Scott Thornton committed
465
            else if(ndims == 3)
466
            {
Scott Thornton's avatar
Scott Thornton committed
467
                dfor(output_shape.lens()[0], output_shape.lens()[1], output_shape.lens()[2])(
468
                    [&](std::size_t i0, std::size_t i1, std::size_t i2) {
Scott Thornton's avatar
Scott Thornton committed
469
470
                        output(i0, i1, i2) = input0(i0, i1, i2) + input1(i0, i1, i2);
                    });
471
            }
Scott Thornton's avatar
Scott Thornton committed
472
            else if(ndims == 4)
473
474
475
476
477
478
            {
                dfor(output_shape.lens()[0],
                     output_shape.lens()[1],
                     output_shape.lens()[2],
                     output_shape.lens()[3])(
                    [&](std::size_t i0, std::size_t i1, std::size_t i2, std::size_t i3) {
Scott Thornton's avatar
Scott Thornton committed
479
480
                        output(i0, i1, i2, i3) = input0(i0, i1, i2, i3) + input1(i0, i1, i2, i3);
                    });
481
482
483
            }
            else
            {
Scott Thornton's avatar
Scott Thornton committed
484
                RTG_THROW("current not support tensors with ndim > 4");
485
486
487
488
489
490
            }
        });
        return result;
    }
};

491
492
493
struct add_op
{
    std::string name() const { return "add"; }
494
495
496
497
    auto fcn() const
    {
        return [](auto x, auto y) { return x + y; };
    }
498
499
500
501
502
};

struct sub_op
{
    std::string name() const { return "sub"; }
503
504
505
506
    auto fcn() const
    {
        return [](auto x, auto y) { return x - y; };
    }
507
508
509
510
511
};

struct mul_op
{
    std::string name() const { return "mul"; }
512
513
514
515
    auto fcn() const
    {
        return [](auto x, auto y) { return x * y; };
    }
516
517
518
519
520
};

struct div_op
{
    std::string name() const { return "div"; }
521
522
523
524
    auto fcn() const
    {
        return [](auto x, auto y) { return x / y; };
    }
525
526
527
528
529
};

template <typename Op>
struct cpu_binary
{
530
531
532
533
534
535
536
537
538
539
540
    Op op;
    std::string name() const { return op.name(); }
    shape compute_shape(std::vector<shape> inputs) const { return inputs.front(); }
    argument compute(shape output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
        visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) {
            std::transform(input1.begin(), input1.end(), input2.begin(), output.begin(), op.fcn());
        });
        return result;
    }
Paul's avatar
Paul committed
541
542
543
544
};

struct cpu_apply
{
Paul's avatar
Paul committed
545
    program* prog;
Paul's avatar
Paul committed
546
    std::unordered_map<std::string, std::function<void(instruction_ref)>> apply_map{};
Paul's avatar
Paul committed
547

Paul's avatar
Paul committed
548
    template <class T>
Paul's avatar
Paul committed
549
    auto simple_op()
Paul's avatar
Paul committed
550
    {
Paul's avatar
Paul committed
551
        return [this](instruction_ref ins) { apply_simple_op<T>(ins); };
Paul's avatar
Paul committed
552
553
    }

Paul's avatar
Paul committed
554
    template <class T, class Op>
Paul's avatar
Paul committed
555
    auto extend_op()
Paul's avatar
Paul committed
556
    {
Paul's avatar
Paul committed
557
        return [this](instruction_ref ins) { apply_extend_op<T, Op>(ins); };
Paul's avatar
Paul committed
558
559
    }

Paul's avatar
Paul committed
560
    void init()
561
    {
Paul's avatar
Paul committed
562
        apply_map["convolution"] = extend_op<cpu_convolution, convolution>();
Paul's avatar
Paul committed
563
564
565
566
567
        apply_map["gemm"]        = extend_op<cpu_gemm, gemm>();
        apply_map["reshape"]     = extend_op<cpu_reshape, reshape>();
        apply_map["contiguous"]  = extend_op<cpu_contiguous, contiguous>();
        apply_map["transpose"]   = extend_op<cpu_transpose, transpose>();

Paul's avatar
Paul committed
568
        apply_map["identity"] = simple_op<cpu_unary<identity_op>>();
Paul's avatar
Paul committed
569
570
571
572
573
574
575
        apply_map["tanh"]     = simple_op<cpu_unary<tanh_op>>();
        apply_map["sigmoid"]  = simple_op<cpu_unary<sigmoid_op>>();
        apply_map["exp"]      = simple_op<cpu_unary<exp_op>>();
        apply_map["neg"]      = simple_op<cpu_unary<neg_op>>();
        apply_map["sin"]      = simple_op<cpu_unary<sin_op>>();
        apply_map["cos"]      = simple_op<cpu_unary<cos_op>>();
        apply_map["tan"]      = simple_op<cpu_unary<tan_op>>();
Paul's avatar
Paul committed
576
577

        apply_map["softmax"] = simple_op<softmax2d>();
578
    }
Paul's avatar
Paul committed
579
580
581

    void apply()
    {
Paul's avatar
Paul committed
582
        init();
Paul's avatar
Paul committed
583
584
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
Paul's avatar
Paul committed
585
            if(it->op.name() == "activation")
Paul's avatar
Paul committed
586
            {
Paul's avatar
Paul committed
587
588
                apply_activation(it);
            }
Paul's avatar
Paul committed
589
590
591
592
            else if(it->op.name() == "pooling")
            {
                apply_pooling(it);
            }
Paul's avatar
Paul committed
593
            else if(apply_map.count(it->op.name()) > 0)
594
            {
Paul's avatar
Paul committed
595
                apply_map.at(it->op.name())(it);
596
            }
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
            else if(it->op.name() == "add")
            {
                apply_add(it);
            }
            else if(it->op.name() == "sub")
            {
                apply_sub(it);
            }
            else if(it->op.name() == "mul")
            {
                apply_mul(it);
            }
            else if(it->op.name() == "div")
            {
                apply_div(it);
            }
Paul's avatar
Paul committed
613
614
615
        }
    }

Paul's avatar
Paul committed
616
    template <class T>
Paul's avatar
Paul committed
617
    void apply_simple_op(instruction_ref ins)
Paul's avatar
Paul committed
618
    {
Paul's avatar
Paul committed
619
        prog->replace_instruction(ins, T{}, ins->arguments);
Paul's avatar
Paul committed
620
621
    }

Paul's avatar
Paul committed
622
    template <class T, class Op>
Paul's avatar
Paul committed
623
    void apply_extend_op(instruction_ref ins)
624
    {
Paul's avatar
Paul committed
625
626
        auto&& op = any_cast<Op>(ins->op);
        prog->replace_instruction(ins, T{op}, ins->arguments);
627
628
    }

Paul's avatar
Paul committed
629
630
631
632
    void apply_activation(instruction_ref ins)
    {
        auto&& op = any_cast<activation>(ins->op);
        if(op.mode == "relu")
633
            prog->replace_instruction(ins, cpu_unary<relu_op>{}, ins->arguments);
Paul's avatar
Paul committed
634
    }
635

Paul's avatar
Paul committed
636
    void apply_pooling(instruction_ref ins)
637
    {
Paul's avatar
Paul committed
638
639
640
641
642
        auto&& op = any_cast<pooling>(ins->op);
        if(op.mode == "max")
            prog->replace_instruction(ins, cpu_pooling<max_pool>{op}, ins->arguments);
        else if(op.mode == "average")
            prog->replace_instruction(ins, cpu_pooling<avg_pool>{op}, ins->arguments);
643
    }
644
645
646
647

    void apply_add(instruction_ref ins)
    {
        auto&& op = any_cast<add>(ins->op);
Scott Thornton's avatar
Scott Thornton committed
648
        // prog->replace_instruction(ins, cpu_binary<add_op>{}, ins->arguments);
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
        prog->replace_instruction(ins, add_with_broadcast{op}, ins->arguments);
    }

    void apply_sub(instruction_ref ins)
    {
        prog->replace_instruction(ins, cpu_binary<sub_op>{}, ins->arguments);
    }

    void apply_mul(instruction_ref ins)
    {
        prog->replace_instruction(ins, cpu_binary<mul_op>{}, ins->arguments);
    }

    void apply_div(instruction_ref ins)
    {
        prog->replace_instruction(ins, cpu_binary<div_op>{}, ins->arguments);
    }
Paul's avatar
Paul committed
666
667
};

Paul's avatar
Paul committed
668
std::string cpu_target::name() const { return "cpu"; }
Paul's avatar
Paul committed
669

Paul's avatar
Paul committed
670
void cpu_target::apply(program& p) const { cpu_apply{&p}.apply(); }
Paul's avatar
Paul committed
671
672
673
674

} // namespace cpu

} // namespace rtg