lowering.cpp 10.7 KB
Newer Older
1
#include <rocblas.h>
Paul's avatar
Paul committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp>
17
#include <migraphx/gpu/quant_convolution.hpp>
Paul's avatar
Paul committed
18
19
#include <migraphx/gpu/contiguous.hpp>
#include <migraphx/gpu/relu.hpp>
Khalique's avatar
Khalique committed
20
21
#include <migraphx/gpu/sigmoid.hpp>
#include <migraphx/gpu/abs.hpp>
Paul's avatar
Paul committed
22
#include <migraphx/gpu/leaky_relu.hpp>
Khalique's avatar
Khalique committed
23
#include <migraphx/gpu/elu.hpp>
Paul's avatar
Paul committed
24
#include <migraphx/gpu/softmax.hpp>
25
#include <migraphx/gpu/logsoftmax.hpp>
Paul's avatar
Paul committed
26
#include <migraphx/gpu/add.hpp>
27
#include <migraphx/gpu/sub.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
28
29
#include <migraphx/gpu/exp.hpp>
#include <migraphx/gpu/log.hpp>
30
#include <migraphx/gpu/sin.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
31
32
#include <migraphx/gpu/cos.hpp>
#include <migraphx/gpu/tan.hpp>
33
34
#include <migraphx/gpu/sinh.hpp>
#include <migraphx/gpu/cosh.hpp>
35
#include <migraphx/gpu/tanh.hpp>
36
37
38
#include <migraphx/gpu/asin.hpp>
#include <migraphx/gpu/acos.hpp>
#include <migraphx/gpu/atan.hpp>
Paul's avatar
Paul committed
39
#include <migraphx/gpu/mul.hpp>
Khalique's avatar
Khalique committed
40
41
#include <migraphx/gpu/max.hpp>
#include <migraphx/gpu/min.hpp>
Paul's avatar
Paul committed
42
43
44
#include <migraphx/gpu/batchnorm.hpp>
#include <migraphx/gpu/pooling.hpp>
#include <migraphx/gpu/gemm.hpp>
45
#include <migraphx/gpu/quant_gemm.hpp>
Paul's avatar
Paul committed
46
#include <migraphx/gpu/concat.hpp>
47
#include <migraphx/gpu/pad.hpp>
48
#include <migraphx/gpu/gather.hpp>
Khalique's avatar
Khalique committed
49
#include <migraphx/gpu/lrn.hpp>
Paul's avatar
Paul committed
50
#include <utility>
51
#include <functional>
Khalique's avatar
Khalique committed
52
#include <algorithm>
Paul's avatar
Paul committed
53

Paul's avatar
Paul committed
54
namespace migraphx {
Paul's avatar
Paul committed
55
inline namespace MIGRAPHX_INLINE_NS {
Paul's avatar
Paul committed
56
namespace gpu {
Paul's avatar
Paul committed
57
58
59

struct miopen_apply
{
Paul's avatar
Paul committed
60
    program* prog = nullptr;
Paul's avatar
Paul committed
61
    context ctx{};
Shucai Xiao's avatar
Shucai Xiao committed
62
    std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
Shucai Xiao's avatar
Shucai Xiao committed
63
    instruction_ref last{};
Paul's avatar
Paul committed
64

Paul's avatar
Paul committed
65
66
67
68
69
70
71
    void check_shape(shape x, instruction_ref i)
    {
        assert(x == i->get_shape());
        (void)x;
        (void)i;
    }

72
73
    void init()
    {
Shucai Xiao's avatar
Shucai Xiao committed
74
        this->last = instruction::get_output_alias(std::prev(prog->end()));
75
76
77
78
79
80
81
82
83
        add_miopen_simple_op<miopen_relu>("relu", make_relu);
        add_miopen_simple_op<miopen_sigmoid>("sigmoid", make_sigmoid);
        add_miopen_simple_op<miopen_abs>("abs", make_abs);
        add_miopen_simple_op<miopen_tanh>("tanh", make_tanh);

        add_miopen_extend_op<miopen_leaky_relu, op::leaky_relu>("leaky_relu", make_leaky_relu);
        add_miopen_extend_op<miopen_elu, op::elu>("elu", make_elu);

        add_generic_op<hip_add>("add");
84
        add_generic_op<hip_sub>("sub");
85
86
        add_generic_op<hip_exp>("exp");
        add_generic_op<hip_log>("log");
87
88
89
90
91
92
93
94
95
        add_generic_op<hip_sin>("sin");
        add_generic_op<hip_cos>("cos");
        add_generic_op<hip_tan>("tan");
        add_generic_op<hip_sinh>("sinh");
        add_generic_op<hip_cosh>("cosh");
        add_generic_op<hip_asin>("asin");
        add_generic_op<hip_acos>("acos");
        add_generic_op<hip_atan>("atan");
        add_generic_op<hip_mul>("mul");
Khalique's avatar
Khalique committed
96
97
        add_generic_op<hip_max>("max");
        add_generic_op<hip_min>("min");
98
99
100
101
102

        add_extend_op<miopen_gemm, op::dot>("dot");
        add_extend_op<miopen_contiguous, op::contiguous>("contiguous");
        add_extend_op<hip_concat, op::concat>("concat");
        add_extend_op<miopen_softmax, op::softmax>("softmax");
103
        add_extend_op<hip_logsoftmax, op::logsoftmax>("logsoftmax");
Khalique's avatar
Khalique committed
104
        add_extend_op<hip_gather, op::gather>("gather");
105
        add_extend_op<hip_pad, op::pad>("pad");
106

Khalique's avatar
Khalique committed
107
        add_lrn_op();
108
        add_convolution_op();
109
        add_quant_convolution_op();
110
111
        add_pooling_op();
        add_batch_norm_inference_op();
112
        add_quant_gemm_op();
113
114
    }

Paul's avatar
Paul committed
115
116
    void apply()
    {
117
        init();
Paul's avatar
Paul committed
118
119
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
Paul's avatar
Paul committed
120
            auto s = it->get_shape();
121
            if(apply_map.count(it->name()) > 0)
122
            {
123
                check_shape(s, apply_map.at(it->name())(it));
Paul's avatar
Paul committed
124
            }
Paul's avatar
Paul committed
125
126
127
        }
    }

Paul's avatar
Paul committed
128
    instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "")
Paul's avatar
Paul committed
129
    {
Shucai Xiao's avatar
Shucai Xiao committed
130
        if(ins == last and tag.empty())
Paul's avatar
Paul committed
131
132
133
134
135
        {
            return prog->add_parameter("output", s);
        }
        else
        {
136
            auto result = prog->insert_instruction(ins, hip_allocate{s, std::move(tag)});
Paul's avatar
Paul committed
137
138
139
140
            return result;
        }
    }

Shucai Xiao's avatar
Shucai Xiao committed
141
    void add_convolution_op()
Paul's avatar
Paul committed
142
    {
143
144
        apply_map.emplace("convolution", [=](instruction_ref ins) {
            auto&& op = any_cast<op::convolution>(ins->get_operator());
Paul's avatar
Paul committed
145

146
            auto conv = miopen_convolution{op, make_conv(op)};
Paul's avatar
Paul committed
147
            auto ws   = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));
Paul's avatar
Paul committed
148

149
150
            auto workspace = insert_allocation(ins, ws, "workspace");
            auto output    = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
151

152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
            return prog->replace_instruction(
                ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
        });
    }

    void add_quant_convolution_op()
    {
        apply_map.emplace("quant_convolution", [=](instruction_ref ins) {
            auto&& op = any_cast<op::quant_convolution>(ins->get_operator());

            auto conv = miopen_quant_convolution{op, make_conv(op)};
            auto ws   = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));

            auto workspace = insert_allocation(ins, ws, "workspace");
            auto output    = insert_allocation(ins, ins->get_shape());

168
169
170
            return prog->replace_instruction(
                ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
        });
Paul's avatar
Paul committed
171
172
    }

Shucai Xiao's avatar
Shucai Xiao committed
173
    void add_pooling_op()
Paul's avatar
Paul committed
174
    {
175
176
177
178
        apply_map.emplace("pooling", [=](instruction_ref ins) {
            auto&& op   = any_cast<op::pooling>(ins->get_operator());
            auto pd     = make_pooling(op);
            auto output = insert_allocation(ins, ins->get_shape());
179

180
181
182
            return prog->replace_instruction(
                ins, miopen_pooling{op, std::move(pd)}, ins->inputs().at(0), output);
        });
Paul's avatar
Paul committed
183
    }
184

Khalique's avatar
Khalique committed
185
    void add_lrn_op()
Khalique's avatar
Khalique committed
186
    {
Khalique's avatar
Khalique committed
187
        apply_map.emplace("lrn", [=](instruction_ref ins) {
Khalique's avatar
Khalique committed
188
189
190
191
192
193
            auto&& op   = any_cast<op::lrn>(ins->get_operator());
            auto ldesc  = make_lrn(op);
            auto output = insert_allocation(ins, ins->get_shape());
            return prog->replace_instruction(
                ins, miopen_lrn{std::move(ldesc)}, ins->inputs().at(0), output);
        });
Khalique's avatar
Khalique committed
194
    }
Paul's avatar
Paul committed
195

Shucai Xiao's avatar
Shucai Xiao committed
196
    template <class T>
197
    void add_generic_op(std::string name)
Paul's avatar
Paul committed
198
    {
199
200
201
202
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto output                       = insert_allocation(ins, ins->get_shape());
            std::vector<instruction_ref> refs = ins->inputs();
            refs.push_back(output);
Paul's avatar
Paul committed
203

204
205
            return prog->replace_instruction(ins, T{}, refs);
        });
Paul's avatar
Paul committed
206
    }
Paul's avatar
Paul committed
207

Shucai Xiao's avatar
Shucai Xiao committed
208
    template <class T, class Op>
209
    void add_extend_op(std::string name)
Khalique's avatar
Khalique committed
210
    {
211
212
213
214
215
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto&& op                         = any_cast<Op>(ins->get_operator());
            auto output                       = insert_allocation(ins, ins->get_shape());
            std::vector<instruction_ref> refs = ins->inputs();
            refs.push_back(output);
Paul's avatar
Paul committed
216

217
218
            return prog->replace_instruction(ins, T{op}, refs);
        });
Khalique's avatar
Khalique committed
219
220
    }

Shucai Xiao's avatar
Shucai Xiao committed
221
    template <class T, class Op, class F>
222
    void add_miopen_extend_op(std::string name, F f)
Paul's avatar
Paul committed
223
    {
Shucai Xiao's avatar
Shucai Xiao committed
224
        apply_map.emplace(name, [=](instruction_ref ins) {
225
226
            auto&& op = any_cast<Op>(ins->get_operator());
            auto ad   = f(op.alpha);
227

228
            auto output = insert_allocation(ins, ins->get_shape());
Shucai Xiao's avatar
Shucai Xiao committed
229
            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
230
        });
231
    }
232

Shucai Xiao's avatar
Shucai Xiao committed
233
    template <class T, class F>
234
    void add_miopen_simple_op(std::string name, F f)
235
    {
Shucai Xiao's avatar
Shucai Xiao committed
236
237
        apply_map.emplace(name, [=](instruction_ref ins) {
            auto ad     = f();
238
            auto output = insert_allocation(ins, ins->get_shape());
Shucai Xiao's avatar
Shucai Xiao committed
239
            return prog->replace_instruction(ins, T{std::move(ad)}, ins->inputs().at(0), output);
240
        });
241
242
    }

Shucai Xiao's avatar
Shucai Xiao committed
243
    void add_batch_norm_inference_op()
244
    {
245
246
247
248
249
250
251
252
        apply_map.emplace("batch_norm_inference", [=](instruction_ref ins) {
            auto&& op       = any_cast<op::batch_norm_inference>(ins->get_operator());
            auto output     = insert_allocation(ins, ins->get_shape());
            shape old_shape = ins->inputs().at(1)->get_shape();
            std::vector<int64_t> new_shape{1, static_cast<int64_t>(old_shape.elements()), 1, 1};
            auto reshape_op = op::reshape{new_shape};
            std::vector<instruction_ref> reshapes;
            std::transform(ins->inputs().begin() + 1,
Shucai Xiao's avatar
Shucai Xiao committed
253
254
255
                           ins->inputs().end(),
                           std::back_inserter(reshapes),
                           [&](auto i) { return prog->insert_instruction(ins, reshape_op, i); });
256
            return prog->replace_instruction(ins,
Shucai Xiao's avatar
Shucai Xiao committed
257
258
259
260
261
262
263
                                             miopen_batch_norm_inference{op},
                                             ins->inputs().at(0),
                                             reshapes[0],
                                             reshapes[1],
                                             reshapes[2],
                                             reshapes[3],
                                             output);
264
        });
265
    }
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294

    void add_quant_gemm_op()
    {
        apply_map.emplace("quant_gemm", [=](instruction_ref ins) {
            auto&& op                         = any_cast<op::quant_dot>(ins->get_operator());
            auto output                       = insert_allocation(ins, ins->get_shape());
            std::vector<instruction_ref> refs = ins->inputs();
            refs.push_back(output);

            // Need another two buffers for packed data buffer 
            auto shape_a = refs.at(0)->get_shape();
            if (shape_a.transposed())
            {
                auto pack_a = insert_allocation(ins, shape_a);
                refs.push_back(pack_a);
                std::swap(refs.back(), refs.at(0));
            }

            auto shape_b = refs.at(1)->get_shape();
            if (!shape_b.transposed())
            {
                auto pack_b = insert_allocation(ins, shape_b);
                refs.push_back(pack_b);
                std::swap(refs.back(), refs.at(1));
            }

            return prog->replace_instruction(ins, miopen_quant_gemm{op}, refs);
        });
    }
Paul's avatar
Paul committed
295
296
};

Paul's avatar
Paul committed
297
void lowering::apply(program& p) const { miopen_apply{&p, ctx}.apply(); }
Paul's avatar
Paul committed
298
} // namespace gpu
Paul's avatar
Paul committed
299
} // namespace MIGRAPHX_INLINE_NS
Paul's avatar
Paul committed
300
} // namespace migraphx