"doc/git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "d25493840c375a7cbd6d6e7bbb78b598c76a9f7a"
lowering.cpp 15.8 KB
Newer Older
1
#include <rocblas.h>
Paul's avatar
Paul committed
2
#include <migraph/gpu/lowering.hpp>
Paul's avatar
Paul committed
3
4
5
#include <migraph/manage_ptr.hpp>
#include <migraph/instruction.hpp>
#include <migraph/operators.hpp>
Paul's avatar
Paul committed
6
#include <migraph/generate.hpp>
Paul's avatar
Paul committed
7
#include <migraph/shape_for_each.hpp>
Paul's avatar
Paul committed
8
9
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/hip.hpp>
Paul's avatar
Paul committed
10
#include <migraph/dfor.hpp>
Paul's avatar
Paul committed
11
#include <migraph/gpu/kernels.hpp>
Paul's avatar
Paul committed
12
#include <migraph/iterator_for.hpp>
Paul's avatar
Paul committed
13
14
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/context.hpp>
Paul's avatar
Paul committed
15
#include <utility>
Paul's avatar
Paul committed
16
17

namespace migraph {
Paul's avatar
Paul committed
18
namespace gpu {
Paul's avatar
Paul committed
19

20
21
22
23
24
25
struct miopen_batch_norm_inference
{
    batch_norm_inference op;

    std::string name() const { return "gpu::batch_norm_inference"; }

Paul's avatar
Paul committed
26
    shape compute_shape(const std::vector<shape>& inputs) const
27
28
29
30
31
32
    {
        check_shapes{inputs, *this}.has(6);
        return op.compute_shape(
            {inputs.at(0), inputs.at(1), inputs.at(2), inputs.at(3), inputs.at(4)});
    }

Paul's avatar
Paul committed
33
34
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
35
    {
wsttiger's avatar
wsttiger committed
36
37
        auto x_desc  = make_tensor(args[0].get_shape());
        auto y_desc  = make_tensor(output_shape);
38
        auto bn_desc = make_tensor(args[3].get_shape());
39
40
41
42
43
44
45
46
47
48
49

        float alpha = 1.0, beta = 0.0f;

        miopenBatchNormalizationForwardInference(ctx.handle.get(),
                                                 miopenBatchNormMode_t(op.bn_mode),
                                                 &alpha,
                                                 &beta,
                                                 x_desc.get(),
                                                 args[0].implicit(),
                                                 y_desc.get(),
                                                 args[5].implicit(),
50
                                                 bn_desc.get(),
51
52
                                                 args[1].implicit(),
                                                 args[2].implicit(),
53
54
                                                 args[3].implicit(),
                                                 args[4].implicit(),
55
                                                 op.epsilon);
56
57
58
59
60

        return args[5];
    }
};

Paul's avatar
Paul committed
61
62
63
struct miopen_convolution
{
    convolution op;
Paul's avatar
Paul committed
64
    shared<convolution_descriptor> cd;
Paul's avatar
Paul committed
65
    miopenConvFwdAlgorithm_t algo{};
Paul's avatar
Paul committed
66

Paul's avatar
Paul committed
67
    std::string name() const { return "gpu::convolution"; }
Paul's avatar
Paul committed
68
    shape compute_shape(const std::vector<shape>& inputs) const
Paul's avatar
Paul committed
69
    {
Paul's avatar
Paul committed
70
        check_shapes{inputs, *this}.has(4).standard();
Paul's avatar
Paul committed
71
        return op.compute_shape({inputs.at(0), inputs.at(1)});
Paul's avatar
Paul committed
72
    }
Paul's avatar
Paul committed
73
74
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
75
    {
Paul's avatar
Paul committed
76
77
        auto x_desc = make_tensor(args[0].get_shape());
        auto w_desc = make_tensor(args[1].get_shape());
Paul's avatar
Paul committed
78
79
        auto y_desc = make_tensor(output_shape);

Paul's avatar
Paul committed
80
        float alpha = 1, beta = 0;
Paul's avatar
Paul committed
81
        miopenConvolutionForward(ctx.handle.get(),
Paul's avatar
Paul committed
82
                                 &alpha,
Paul's avatar
Paul committed
83
                                 x_desc.get(),
Paul's avatar
Paul committed
84
                                 args[0].implicit(),
Paul's avatar
Paul committed
85
                                 w_desc.get(),
Paul's avatar
Paul committed
86
                                 args[1].implicit(),
Paul's avatar
Paul committed
87
                                 cd.get(),
Paul's avatar
Paul committed
88
                                 algo,
Paul's avatar
Paul committed
89
                                 &beta,
Paul's avatar
Paul committed
90
                                 y_desc.get(),
Paul's avatar
Paul committed
91
                                 args[3].implicit(),
Paul's avatar
Paul committed
92
                                 args[2].implicit(),
Paul's avatar
Paul committed
93
94
                                 args[2].get_shape().bytes());
        return args[3];
Paul's avatar
Paul committed
95
    }
Paul's avatar
Paul committed
96

Paul's avatar
Paul committed
97
    shape compile(context& ctx, const shape& output_shape, std::vector<instruction_ref> inputs)
Paul's avatar
Paul committed
98
    {
Paul's avatar
Paul committed
99
        shape workspace_shape{};
Paul's avatar
Paul committed
100
101
102
103
        auto x_desc = make_tensor(inputs[0]->get_shape());
        auto w_desc = make_tensor(inputs[1]->get_shape());
        auto y_desc = make_tensor(output_shape);

Paul's avatar
Paul committed
104
        std::size_t workspace_size = 0;
Paul's avatar
Paul committed
105
        miopenConvolutionForwardGetWorkSpaceSize(
Paul's avatar
Paul committed
106
            ctx.handle.get(), w_desc.get(), x_desc.get(), cd.get(), y_desc.get(), &workspace_size);
Paul's avatar
Paul committed
107
108
        workspace_shape = shape{shape::int8_type, {workspace_size}};

Paul's avatar
Paul committed
109
110
111
        auto x         = to_gpu(generate_argument(inputs[0]->get_shape()));
        auto w         = to_gpu(generate_argument(inputs[1]->get_shape()));
        auto y         = to_gpu(generate_argument(output_shape));
Paul's avatar
Paul committed
112
        auto workspace = allocate_gpu(workspace_shape);
Paul's avatar
Paul committed
113

Paul's avatar
Paul committed
114
        int algo_count = 1;
Paul's avatar
Paul committed
115
116
117
118
119
120
121
122
123
124
125
126
        miopenConvAlgoPerf_t perf;
        miopenFindConvolutionForwardAlgorithm(ctx.handle.get(),
                                              x_desc.get(),
                                              x.implicit(),
                                              w_desc.get(),
                                              w.implicit(),
                                              cd.get(),
                                              y_desc.get(),
                                              y.implicit(),
                                              1,
                                              &algo_count,
                                              &perf,
Paul's avatar
Paul committed
127
128
                                              workspace.implicit(),
                                              workspace_size,
Paul's avatar
Paul committed
129
130
                                              false);
        algo = perf.fwd_algo;
Paul's avatar
Paul committed
131
132
        return algo == miopenConvolutionFwdAlgoWinograd ? shape{shape::int8_type, {0}}
                                                        : workspace_shape;
Paul's avatar
Paul committed
133
    }
Paul's avatar
Paul committed
134
135
};

Paul's avatar
Paul committed
136
137
138
139
140
struct miopen_pooling
{
    pooling op;
    shared<pooling_descriptor> pd;

Paul's avatar
Paul committed
141
    std::string name() const { return "gpu::pooling"; }
Paul's avatar
Paul committed
142
    shape compute_shape(const std::vector<shape>& inputs) const
Paul's avatar
Paul committed
143
    {
Paul's avatar
Paul committed
144
        check_shapes{inputs, *this}.has(2).standard();
Paul's avatar
Paul committed
145
        return op.compute_shape({inputs.at(0)});
Paul's avatar
Paul committed
146
    }
Paul's avatar
Paul committed
147
148
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
149
    {
Paul's avatar
Paul committed
150
        auto x_desc = make_tensor(args[0].get_shape());
Paul's avatar
Paul committed
151
152
153
154
        auto y_desc = make_tensor(output_shape);

        float alpha = 1, beta = 0;

Paul's avatar
Paul committed
155
        miopenPoolingForward(ctx.handle.get(),
Paul's avatar
Paul committed
156
157
158
                             pd.get(),
                             &alpha,
                             x_desc.get(),
Paul's avatar
Paul committed
159
                             args[0].implicit(),
Paul's avatar
Paul committed
160
161
                             &beta,
                             y_desc.get(),
Paul's avatar
Paul committed
162
                             args[1].implicit(),
Paul's avatar
Paul committed
163
164
165
                             false,
                             nullptr,
                             0);
Paul's avatar
Paul committed
166

Paul's avatar
Paul committed
167
        return args[1];
Paul's avatar
Paul committed
168
169
170
    }
};

Paul's avatar
Paul committed
171
172
struct miopen_add
{
Paul's avatar
Paul committed
173
    std::string name() const { return "gpu::add"; }
Paul's avatar
Paul committed
174
    shape compute_shape(const std::vector<shape>& inputs) const
Paul's avatar
Paul committed
175
    {
Paul's avatar
Paul committed
176
        check_shapes{inputs, *this}.has(3).not_broadcasted();
Paul's avatar
Paul committed
177
        return inputs.at(0);
Paul's avatar
Paul committed
178
179
    }

Paul's avatar
Paul committed
180
181
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
182
    {
Paul's avatar
Paul committed
183
        if(args[1].get_shape().broadcasted())
Paul's avatar
Paul committed
184
        {
Paul's avatar
Paul committed
185
186
            argument result{output_shape};

Paul's avatar
Paul committed
187
            visit_all(result, from_gpu(args[0]), from_gpu(args[1]))(
Paul's avatar
Paul committed
188
189
                [&](auto output, auto input1, auto input2) {
                    shape_for_each(output.get_shape(), [&](const auto& idx) {
Paul's avatar
Paul committed
190
191
192
                        output(idx.begin(), idx.end()) =
                            input1(idx.begin(), idx.end()) + input2(idx.begin(), idx.end());
                    });
Paul's avatar
Paul committed
193
                });
Paul's avatar
Paul committed
194
            return to_gpu(result);
Paul's avatar
Paul committed
195
196
197
198
        }
        else
        {
            float alpha = 1, beta = 0;
Paul's avatar
Paul committed
199
200
            auto a_desc = make_tensor(args[0].get_shape());
            auto b_desc = make_tensor(args[1].get_shape());
Paul's avatar
Paul committed
201
            auto c_desc = make_tensor(output_shape);
Paul's avatar
Paul committed
202
            miopenOpTensor(ctx.handle.get(),
Paul's avatar
Paul committed
203
204
205
                           miopenTensorOpAdd,
                           &alpha,
                           a_desc.get(),
Paul's avatar
Paul committed
206
                           args[0].implicit(),
Paul's avatar
Paul committed
207
208
                           &alpha,
                           b_desc.get(),
Paul's avatar
Paul committed
209
                           args[1].implicit(),
Paul's avatar
Paul committed
210
211
                           &beta,
                           c_desc.get(),
Paul's avatar
Paul committed
212
213
                           args[2].implicit());
            return args[2];
Paul's avatar
Paul committed
214
215
216
217
        }
    }
};

Paul's avatar
Paul committed
218
219
220
struct miopen_gemm
{
    gemm op;
Paul's avatar
Paul committed
221
    std::string name() const { return "gpu::convolution"; }
Paul's avatar
Paul committed
222
    shape compute_shape(const std::vector<shape>& inputs) const
Paul's avatar
Paul committed
223
    {
Paul's avatar
Paul committed
224
225
        check_shapes{inputs, *this}.has(3);
        return op.compute_shape({inputs.at(0), inputs.at(1)});
Paul's avatar
Paul committed
226
    }
Paul's avatar
Paul committed
227
228
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
229
    {
230
231
        float alpha     = 1.0f;
        float beta      = 0.0f;
Paul's avatar
Paul committed
232
233
        bool transa     = args[0].get_shape().transposed();
        bool transb     = args[1].get_shape().transposed();
234
235
236
        rocblas_int lda = args[0].get_shape().strides()[transa ? 1 : 0];
        rocblas_int ldb = args[1].get_shape().strides()[transb ? 1 : 0];
        rocblas_int ldc = args[2].get_shape().strides()[0];
237
238
239
        rocblas_int m   = output_shape.lens()[0];
        rocblas_int n   = output_shape.lens()[1];
        rocblas_int k   = args[0].get_shape().lens()[1];
240
        rocblas_sgemm(ctx.rbhandle.get(),
241
242
                      transb ? rocblas_operation_transpose : rocblas_operation_none,
                      transa ? rocblas_operation_transpose : rocblas_operation_none,
243
244
245
246
247
248
249
250
251
252
253
254
                      n,
                      m,
                      k,
                      &alpha,
                      args[1].implicit(),
                      ldb,
                      args[0].implicit(),
                      lda,
                      &beta,
                      args[2].implicit(),
                      ldc);
        return args[2];
Paul's avatar
Paul committed
255
256
257
    }
};

258
259
260
struct miopen_contiguous
{
    contiguous op;
Paul's avatar
Paul committed
261
    std::string name() const { return "gpu::contiguous"; }
Paul's avatar
Paul committed
262
    shape compute_shape(const std::vector<shape>& inputs) const
263
264
265
266
    {
        check_shapes{inputs, *this}.has(2);
        return op.compute_shape({inputs.at(0)});
    }
Paul's avatar
Paul committed
267
    argument compute(context&, shape output_shape, const std::vector<argument>& args) const
268
    {
Paul's avatar
Paul committed
269
        hip_contiguous(std::move(output_shape), args.at(0), args.at(1));
270
        return args.at(1);
271
272
273
    }
};

Paul's avatar
Paul committed
274
275
276
struct miopen_relu
{
    shared<activation_descriptor> ad;
Paul's avatar
Paul committed
277
    std::string name() const { return "gpu::relu"; }
Paul's avatar
Paul committed
278
    shape compute_shape(const std::vector<shape>& inputs) const
Paul's avatar
Paul committed
279
    {
Paul's avatar
Paul committed
280
        check_shapes{inputs, *this}.has(2).not_broadcasted();
Paul's avatar
Paul committed
281
        return inputs.at(1);
Paul's avatar
Paul committed
282
283
    }

Paul's avatar
Paul committed
284
285
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
286
287
    {
        float alpha = 1, beta = 0;
Paul's avatar
Paul committed
288
        auto x_desc = make_tensor(args[0].get_shape());
Paul's avatar
Paul committed
289
        auto y_desc = make_tensor(output_shape);
Paul's avatar
Paul committed
290
        miopenActivationForward(ctx.handle.get(),
Paul's avatar
Paul committed
291
292
293
                                ad.get(),
                                &alpha,
                                x_desc.get(),
Paul's avatar
Paul committed
294
                                args[0].implicit(),
Paul's avatar
Paul committed
295
296
                                &beta,
                                y_desc.get(),
Paul's avatar
Paul committed
297
                                args[1].implicit());
Paul's avatar
Paul committed
298

Paul's avatar
Paul committed
299
        return args[1];
Paul's avatar
Paul committed
300
301
302
    }
};

Paul's avatar
Paul committed
303
304
struct miopen_apply
{
Paul's avatar
Paul committed
305
    program* prog = nullptr;
Paul's avatar
Paul committed
306
    context ctx{};
Paul's avatar
Paul committed
307

Paul's avatar
Paul committed
308
309
310
311
312
313
314
    void check_shape(shape x, instruction_ref i)
    {
        assert(x == i->get_shape());
        (void)x;
        (void)i;
    }

Paul's avatar
Paul committed
315
316
    void apply()
    {
Paul's avatar
Paul committed
317
318
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
Paul's avatar
Paul committed
319
            auto s = it->get_shape();
Paul's avatar
Paul committed
320
321
            if(it->op.name() == "convolution")
            {
Paul's avatar
Paul committed
322
                check_shape(s, apply_convolution(it));
Paul's avatar
Paul committed
323
324
325
            }
            else if(it->op.name() == "activation")
            {
Paul's avatar
Paul committed
326
                check_shape(s, apply_activation(it));
Paul's avatar
Paul committed
327
            }
Paul's avatar
Paul committed
328
329
            else if(it->op.name() == "pooling")
            {
Paul's avatar
Paul committed
330
                check_shape(s, apply_pooling(it));
Paul's avatar
Paul committed
331
            }
Paul's avatar
Paul committed
332
333
            else if(it->op.name() == "add")
            {
Paul's avatar
Paul committed
334
                check_shape(s, apply_add(it));
Paul's avatar
Paul committed
335
            }
Paul's avatar
Paul committed
336
337
            else if(it->op.name() == "gemm")
            {
Paul's avatar
Paul committed
338
                check_shape(s, apply_gemm(it));
Paul's avatar
Paul committed
339
            }
340
341
            else if(it->op.name() == "contiguous")
            {
Paul's avatar
Paul committed
342
                check_shape(s, apply_contiguous(it));
343
            }
344
345
            else if(it->op.name() == "batch_norm_inference")
            {
Paul's avatar
Paul committed
346
                check_shape(s, apply_batch_norm_inference(it));
347
            }
Paul's avatar
Paul committed
348
349
350
        }
    }

Paul's avatar
Paul committed
351
    instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "")
Paul's avatar
Paul committed
352
    {
Paul's avatar
Paul committed
353
        if(ins == --prog->end())
Paul's avatar
Paul committed
354
355
356
357
358
        {
            return prog->add_parameter("output", s);
        }
        else
        {
Paul's avatar
Paul committed
359
            auto is     = prog->add_outline(s);
Paul's avatar
Paul committed
360
            auto result = prog->insert_instruction(ins, hip_allocate{std::move(tag)}, is);
Paul's avatar
Paul committed
361
362
363
364
            return result;
        }
    }

Paul's avatar
Paul committed
365
    instruction_ref apply_convolution(instruction_ref ins)
Paul's avatar
Paul committed
366
    {
Paul's avatar
Paul committed
367
        auto&& op = any_cast<convolution>(ins->op);
Paul's avatar
Paul committed
368

Paul's avatar
Paul committed
369
        auto conv = miopen_convolution{op, make_conv(op)};
Paul's avatar
Paul committed
370
        auto ws   = conv.compile(ctx, ins->result, ins->arguments);
Paul's avatar
Paul committed
371

372
        auto workspace = insert_allocation(ins, ws, "workspace");
Paul's avatar
Paul committed
373
        auto output    = insert_allocation(ins, ins->result);
Paul's avatar
Paul committed
374

Paul's avatar
Paul committed
375
        return prog->replace_instruction(
Paul's avatar
Paul committed
376
            ins, conv, ins->arguments.at(0), ins->arguments.at(1), workspace, output);
Paul's avatar
Paul committed
377
378
    }

Paul's avatar
Paul committed
379
    instruction_ref apply_pooling(instruction_ref ins)
Paul's avatar
Paul committed
380
381
382
383
384
    {
        auto&& op   = any_cast<pooling>(ins->op);
        auto pd     = make_pooling(op);
        auto output = insert_allocation(ins, ins->result);

Paul's avatar
Paul committed
385
        return prog->replace_instruction(
Paul's avatar
Paul committed
386
            ins, miopen_pooling{op, std::move(pd)}, ins->arguments.at(0), output);
Paul's avatar
Paul committed
387
388
    }

Paul's avatar
Paul committed
389
    instruction_ref apply_activation(instruction_ref ins)
Paul's avatar
Paul committed
390
391
    {
        auto&& op = any_cast<activation>(ins->op);
Paul's avatar
Paul committed
392
393
        auto ad   = make_relu();
        if(op.mode == "relu")
Paul's avatar
Paul committed
394
395
        {
            auto output = insert_allocation(ins, ins->result);
Paul's avatar
Paul committed
396
            return prog->replace_instruction(
Paul's avatar
Paul committed
397
                ins, miopen_relu{std::move(ad)}, ins->arguments.at(0), output);
Paul's avatar
Paul committed
398
        }
Paul's avatar
Paul committed
399
        return ins;
Paul's avatar
Paul committed
400
    }
Paul's avatar
Paul committed
401

Paul's avatar
Paul committed
402
    instruction_ref apply_add(instruction_ref ins)
Paul's avatar
Paul committed
403
404
    {
        auto output = insert_allocation(ins, ins->result);
Paul's avatar
Paul committed
405
        return prog->replace_instruction(
Paul's avatar
Paul committed
406
            ins, miopen_add{}, ins->arguments.at(0), ins->arguments.at(1), output);
Paul's avatar
Paul committed
407
    }
Paul's avatar
Paul committed
408

Paul's avatar
Paul committed
409
    instruction_ref apply_gemm(instruction_ref ins)
Paul's avatar
Paul committed
410
    {
Paul's avatar
Paul committed
411
        auto&& op   = any_cast<gemm>(ins->op);
Paul's avatar
Paul committed
412
        auto output = insert_allocation(ins, ins->result);
Paul's avatar
Paul committed
413
        return prog->replace_instruction(
Paul's avatar
Paul committed
414
            ins, miopen_gemm{op}, ins->arguments.at(0), ins->arguments.at(1), output);
Paul's avatar
Paul committed
415
    }
416

Paul's avatar
Paul committed
417
    instruction_ref apply_contiguous(instruction_ref ins)
418
419
420
    {
        auto&& op   = any_cast<contiguous>(ins->op);
        auto output = insert_allocation(ins, ins->result);
Paul's avatar
Paul committed
421
        return prog->replace_instruction(ins, miopen_contiguous{op}, ins->arguments.at(0), output);
422
    }
423

Paul's avatar
Paul committed
424
    instruction_ref apply_batch_norm_inference(instruction_ref ins)
425
    {
wsttiger's avatar
wsttiger committed
426
427
        auto&& op       = any_cast<batch_norm_inference>(ins->op);
        auto output     = insert_allocation(ins, ins->result);
428
        shape old_shape = ins->arguments.at(1)->get_shape();
wsttiger's avatar
wsttiger committed
429
        std::vector<int64_t> new_shape{1, static_cast<int64_t>(old_shape.elements()), 1, 1};
Paul's avatar
Paul committed
430
431
        auto reshape_op = reshape{new_shape};
        std::vector<instruction_ref> reshapes;
Paul's avatar
Paul committed
432
433
434
435
        std::transform(ins->arguments.begin() + 1,
                       ins->arguments.end(),
                       std::back_inserter(reshapes),
                       [&](auto i) { return prog->insert_instruction(ins, reshape_op, i); });
Paul's avatar
Paul committed
436
        return prog->replace_instruction(ins,
Paul's avatar
Paul committed
437
438
439
440
441
442
443
                                         miopen_batch_norm_inference{op},
                                         ins->arguments.at(0),
                                         reshapes[0],
                                         reshapes[1],
                                         reshapes[2],
                                         reshapes[3],
                                         output);
444
    }
Paul's avatar
Paul committed
445
446
};

Paul's avatar
Paul committed
447
void lowering::apply(program& p) const { miopen_apply{&p, ctx}.apply(); }
Paul's avatar
Paul committed
448

Paul's avatar
Paul committed
449
} // namespace gpu
Paul's avatar
Paul committed
450

Paul's avatar
Paul committed
451
} // namespace migraph