cpu_target.cpp 4.36 KB
Newer Older
Paul's avatar
Paul committed
1
2
3
4
5
6

#include <rtg/cpu/cpu_target.hpp>
#include <rtg/instruction.hpp>
#include <rtg/dfor.hpp>
#include <rtg/operators.hpp>

Paul's avatar
Paul committed
7
8
namespace rtg {
namespace cpu {
Paul's avatar
Paul committed
9
10
11
12
13

struct cpu_convolution
{
    convolution op;

Paul's avatar
Paul committed
14
15
    std::string name() const { return "cpu::convolution"; }
    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
Paul's avatar
Paul committed
16
    argument compute(shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
17
    {
Paul's avatar
Paul committed
18
        argument result{output_shape};
Paul's avatar
Paul committed
19
20
21
22
23
        visit_all(result, args[0], args[1])([&](auto output, auto input, auto weights) {
            auto in_n = input.get_shape().lens()[0];
            auto in_c = input.get_shape().lens()[1];
            auto in_h = input.get_shape().lens()[2];
            auto in_w = input.get_shape().lens()[3];
Paul's avatar
Paul committed
24

Paul's avatar
Paul committed
25
26
27
            auto wei_c = weights.get_shape().lens()[1];
            auto wei_h = weights.get_shape().lens()[2];
            auto wei_w = weights.get_shape().lens()[3];
Paul's avatar
Paul committed
28

Paul's avatar
Paul committed
29
30
31
32
            dfor(in_n, in_c, in_h, in_w)(
                [&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
                    const int start_x = i * op.stride[0] - op.padding[0];
                    const int start_y = j * op.stride[1] - op.padding[1];
Paul's avatar
Paul committed
33

Paul's avatar
Paul committed
34
35
36
37
38
39
40
41
42
43
                    double acc = 0;
                    dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
                        const int in_x = start_x + x;
                        const int in_y = start_y + y;
                        if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
                        {
                            acc += input(o, k, in_x, in_y) * weights(w, k, x, y);
                        }
                    });
                    output(o, w, i, j) = acc;
Paul's avatar
Paul committed
44
45
46
47
48
49
                });
        });
        return result;
    }
};

50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
struct cpu_gemm
{
    gemm op;
    std::string name() const { return "cpu::gemm"; }
    shape compute_shape(std::vector<shape> inputs) 
    {
        return op.compute_shape(inputs);
    }

    argument compute(shape output_shape, std::vector<argument> args) const 
    {
        argument C{output_shape};
        visit_all(C, args[0], args[1])([&](auto C, auto A, auto B) {
            auto M = A.get_shape().lens()[0];
            auto N = B.get_shape().lens()[1];
            auto K = B.get_shape().lens()[0];

            auto a = A.data();
            auto b = B.data();
            auto c = C.data();
            for (int ii = 0; ii < M; ii++) {
              for (int jj = 0; jj < N; jj++) {
                c[ii*N+jj] = 0;
              }
            }
            for (int ii = 0; ii < M; ii++) {
              for (int kk = 0; kk < K; kk++) {
                auto aik = a[ii*K+kk];
                auto* bkj = &b[kk*N];
                auto* cij = &c[ii*N];
                for (int jj = 0; jj < N; jj++, cij++, bkj++) {
                  *cij += aik*(*bkj);
                }
              }
            }
        });
    }
};

Paul's avatar
Paul committed
89
90
struct relu
{
Paul's avatar
Paul committed
91
92
    std::string name() const { return "cpu::relu"; }
    shape compute_shape(std::vector<shape> inputs) const { return inputs.front(); }
Paul's avatar
Paul committed
93

Paul's avatar
Paul committed
94
    argument compute(shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
95
    {
Paul's avatar
Paul committed
96
        argument result{output_shape};
Paul's avatar
Paul committed
97
98
99
100
101
102
103
104
105
106
107
108
109
        result.visit([&](auto output) {
            args[0].visit([&](auto input) {
                std::transform(input.begin(), input.end(), output.begin(), [](auto x) {
                    return x > 0 ? x : 0;
                });
            });
        });
        return result;
    }
};

struct cpu_apply
{
Paul's avatar
Paul committed
110
    program* prog;
Paul's avatar
Paul committed
111
112
113

    void apply()
    {
Paul's avatar
Paul committed
114
115
116
117
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
            if(it->op.name() == "convolution")
            {
Paul's avatar
Paul committed
118
                apply_convolution(it);
Paul's avatar
Paul committed
119
120
121
            }
            else if(it->op.name() == "activation")
            {
Paul's avatar
Paul committed
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
                apply_activation(it);
            }
        }
    }

    void apply_convolution(instruction_ref ins)
    {
        auto&& op = any_cast<convolution>(ins->op);
        prog->replace_instruction(ins, cpu_convolution{op}, ins->arguments);
    }

    void apply_activation(instruction_ref ins)
    {
        auto&& op = any_cast<activation>(ins->op);
        if(op.mode == "relu")
            prog->replace_instruction(ins, relu{}, ins->arguments);
    }
};

Paul's avatar
Paul committed
141
std::string cpu_target::name() const { return "cpu"; }
Paul's avatar
Paul committed
142

Paul's avatar
Paul committed
143
void cpu_target::apply(program& p) const { cpu_apply{&p}.apply(); }
Paul's avatar
Paul committed
144
145
146
147

} // namespace cpu

} // namespace rtg