lowering.cpp 14.8 KB
Newer Older
1
#include <rocblas.h>
Paul's avatar
Paul committed
2
#include <migraph/gpu/lowering.hpp>
Paul's avatar
Paul committed
3
4
5
#include <migraph/manage_ptr.hpp>
#include <migraph/instruction.hpp>
#include <migraph/operators.hpp>
Paul's avatar
Paul committed
6
#include <migraph/generate.hpp>
Paul's avatar
Paul committed
7
#include <migraph/shape_for_each.hpp>
Paul's avatar
Paul committed
8
9
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/hip.hpp>
Paul's avatar
Paul committed
10
#include <migraph/dfor.hpp>
Paul's avatar
Paul committed
11
#include <migraph/gpu/kernels.hpp>
Paul's avatar
Paul committed
12
#include <migraph/iterator_for.hpp>
Paul's avatar
Paul committed
13
14
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/context.hpp>
Paul's avatar
Paul committed
15
16

namespace migraph {
Paul's avatar
Paul committed
17
namespace gpu {
Paul's avatar
Paul committed
18

19
20
21
22
23
24
25
26
27
28
29
30
31
struct miopen_batch_norm_inference
{
    batch_norm_inference op;

    std::string name() const { return "gpu::batch_norm_inference"; }

    shape compute_shape(std::vector<shape> inputs) const
    {
        check_shapes{inputs, *this}.has(6);
        return op.compute_shape(
            {inputs.at(0), inputs.at(1), inputs.at(2), inputs.at(3), inputs.at(4)});
    }

32
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
33
    {
wsttiger's avatar
wsttiger committed
34
35
        auto x_desc  = make_tensor(args[0].get_shape());
        auto y_desc  = make_tensor(output_shape);
36
        auto bn_desc = make_tensor(args[3].get_shape());
37
38
39
40
41
42
43
44
45
46
47
48
49
50

        float alpha = 1.0, beta = 0.0f;

        // TODO: adityaatluri
        // create bn-scale-bias-mean-variance descriptor for
        // miopen call
        miopenBatchNormalizationForwardInference(ctx.handle.get(),
                                                 miopenBatchNormMode_t(op.bn_mode),
                                                 &alpha,
                                                 &beta,
                                                 x_desc.get(),
                                                 args[0].implicit(),
                                                 y_desc.get(),
                                                 args[5].implicit(),
51
                                                 bn_desc.get(),
52
53
54
55
                                                 args[3].implicit(),
                                                 args[4].implicit(),
                                                 args[1].implicit(),
                                                 args[2].implicit(),
56
                                                 op.epsilon);
57
58
59
60
61

        return args[5];
    }
};

Paul's avatar
Paul committed
62
63
64
struct miopen_convolution
{
    convolution op;
Paul's avatar
Paul committed
65
    shared<convolution_descriptor> cd;
Paul's avatar
Paul committed
66
    miopenConvFwdAlgorithm_t algo{};
Paul's avatar
Paul committed
67

Paul's avatar
Paul committed
68
    std::string name() const { return "gpu::convolution"; }
Paul's avatar
Paul committed
69
70
    shape compute_shape(std::vector<shape> inputs) const
    {
Paul's avatar
Paul committed
71
        check_shapes{inputs, *this}.has(3).standard();
Paul's avatar
Paul committed
72
        return op.compute_shape({inputs.at(0), inputs.at(1)});
Paul's avatar
Paul committed
73
    }
Paul's avatar
Paul committed
74
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
75
    {
Paul's avatar
Paul committed
76
77
        auto x_desc = make_tensor(args[0].get_shape());
        auto w_desc = make_tensor(args[1].get_shape());
Paul's avatar
Paul committed
78
79
        auto y_desc = make_tensor(output_shape);

Paul's avatar
Paul committed
80
        float alpha = 1, beta = 0;
Paul's avatar
Paul committed
81
        miopenConvolutionForward(ctx.handle.get(),
Paul's avatar
Paul committed
82
                                 &alpha,
Paul's avatar
Paul committed
83
                                 x_desc.get(),
Paul's avatar
Paul committed
84
                                 args[0].implicit(),
Paul's avatar
Paul committed
85
                                 w_desc.get(),
Paul's avatar
Paul committed
86
                                 args[1].implicit(),
Paul's avatar
Paul committed
87
                                 cd.get(),
Paul's avatar
Paul committed
88
                                 algo,
Paul's avatar
Paul committed
89
                                 &beta,
Paul's avatar
Paul committed
90
                                 y_desc.get(),
Paul's avatar
Paul committed
91
                                 args[2].implicit(),
Paul's avatar
Paul committed
92
93
                                 nullptr,
                                 0);
Paul's avatar
Paul committed
94
        return args[2];
Paul's avatar
Paul committed
95
    }
Paul's avatar
Paul committed
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124

    void compile(context& ctx, shape output_shape, std::vector<instruction_ref> inputs)
    {
        auto x_desc = make_tensor(inputs[0]->get_shape());
        auto w_desc = make_tensor(inputs[1]->get_shape());
        auto y_desc = make_tensor(output_shape);

        auto x = to_gpu(generate_argument(inputs[0]->get_shape()));
        auto w = to_gpu(generate_argument(inputs[1]->get_shape()));
        auto y = to_gpu(generate_argument(output_shape));

        int algo_count;
        miopenConvAlgoPerf_t perf;
        miopenFindConvolutionForwardAlgorithm(ctx.handle.get(),
                                              x_desc.get(),
                                              x.implicit(),
                                              w_desc.get(),
                                              w.implicit(),
                                              cd.get(),
                                              y_desc.get(),
                                              y.implicit(),
                                              1,
                                              &algo_count,
                                              &perf,
                                              nullptr,
                                              0,
                                              false);
        algo = perf.fwd_algo;
    }
Paul's avatar
Paul committed
125
126
};

Paul's avatar
Paul committed
127
128
129
130
131
struct miopen_pooling
{
    pooling op;
    shared<pooling_descriptor> pd;

Paul's avatar
Paul committed
132
    std::string name() const { return "gpu::pooling"; }
Paul's avatar
Paul committed
133
134
    shape compute_shape(std::vector<shape> inputs) const
    {
Paul's avatar
Paul committed
135
        check_shapes{inputs, *this}.has(2).standard();
Paul's avatar
Paul committed
136
137
        return op.compute_shape({inputs.at(1)});
    }
Paul's avatar
Paul committed
138
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
139
    {
Paul's avatar
Paul committed
140
        auto x_desc = make_tensor(args[0].get_shape());
Paul's avatar
Paul committed
141
142
143
144
        auto y_desc = make_tensor(output_shape);

        float alpha = 1, beta = 0;

Paul's avatar
Paul committed
145
        miopenPoolingForward(ctx.handle.get(),
Paul's avatar
Paul committed
146
147
148
                             pd.get(),
                             &alpha,
                             x_desc.get(),
Paul's avatar
Paul committed
149
                             args[0].implicit(),
Paul's avatar
Paul committed
150
151
                             &beta,
                             y_desc.get(),
Paul's avatar
Paul committed
152
                             args[1].implicit(),
Paul's avatar
Paul committed
153
154
155
                             false,
                             nullptr,
                             0);
Paul's avatar
Paul committed
156

Paul's avatar
Paul committed
157
        return args[1];
Paul's avatar
Paul committed
158
159
160
    }
};

Paul's avatar
Paul committed
161
162
struct miopen_add
{
Paul's avatar
Paul committed
163
    std::string name() const { return "gpu::add"; }
Paul's avatar
Paul committed
164
165
    shape compute_shape(std::vector<shape> inputs) const
    {
Paul's avatar
Paul committed
166
        check_shapes{inputs, *this}.has(3).not_broadcasted();
Paul's avatar
Paul committed
167
        return inputs.at(0);
Paul's avatar
Paul committed
168
169
    }

Paul's avatar
Paul committed
170
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
171
    {
Paul's avatar
Paul committed
172
        if(args[1].get_shape().broadcasted())
Paul's avatar
Paul committed
173
        {
Paul's avatar
Paul committed
174
175
            argument result{output_shape};

Paul's avatar
Paul committed
176
            visit_all(result, from_gpu(args[0]), from_gpu(args[1]))(
Paul's avatar
Paul committed
177
178
                [&](auto output, auto input1, auto input2) {
                    shape_for_each(output.get_shape(), [&](const auto& idx) {
Paul's avatar
Paul committed
179
180
181
                        output(idx.begin(), idx.end()) =
                            input1(idx.begin(), idx.end()) + input2(idx.begin(), idx.end());
                    });
Paul's avatar
Paul committed
182
                });
Paul's avatar
Paul committed
183
            return to_gpu(result);
Paul's avatar
Paul committed
184
185
186
187
        }
        else
        {
            float alpha = 1, beta = 0;
Paul's avatar
Paul committed
188
189
            auto a_desc = make_tensor(args[0].get_shape());
            auto b_desc = make_tensor(args[1].get_shape());
Paul's avatar
Paul committed
190
            auto c_desc = make_tensor(output_shape);
Paul's avatar
Paul committed
191
            miopenOpTensor(ctx.handle.get(),
Paul's avatar
Paul committed
192
193
194
                           miopenTensorOpAdd,
                           &alpha,
                           a_desc.get(),
Paul's avatar
Paul committed
195
                           args[0].implicit(),
Paul's avatar
Paul committed
196
197
                           &alpha,
                           b_desc.get(),
Paul's avatar
Paul committed
198
                           args[1].implicit(),
Paul's avatar
Paul committed
199
200
                           &beta,
                           c_desc.get(),
Paul's avatar
Paul committed
201
202
                           args[2].implicit());
            return args[2];
Paul's avatar
Paul committed
203
204
205
206
        }
    }
};

Paul's avatar
Paul committed
207
208
209
struct miopen_gemm
{
    gemm op;
Paul's avatar
Paul committed
210
    std::string name() const { return "gpu::convolution"; }
Paul's avatar
Paul committed
211
212
    shape compute_shape(std::vector<shape> inputs) const
    {
Paul's avatar
Paul committed
213
214
        check_shapes{inputs, *this}.has(3);
        return op.compute_shape({inputs.at(0), inputs.at(1)});
Paul's avatar
Paul committed
215
    }
Paul's avatar
Paul committed
216
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
217
    {
218
219
        float alpha     = 1.0f;
        float beta      = 0.0f;
Paul's avatar
Paul committed
220
221
        bool transa     = args[0].get_shape().transposed();
        bool transb     = args[1].get_shape().transposed();
222
223
224
        rocblas_int lda = args[0].get_shape().strides()[transa ? 1 : 0];
        rocblas_int ldb = args[1].get_shape().strides()[transb ? 1 : 0];
        rocblas_int ldc = args[2].get_shape().strides()[0];
225
226
227
        rocblas_int m   = output_shape.lens()[0];
        rocblas_int n   = output_shape.lens()[1];
        rocblas_int k   = args[0].get_shape().lens()[1];
228
        rocblas_sgemm(ctx.rbhandle.get(),
229
230
                      transb ? rocblas_operation_transpose : rocblas_operation_none,
                      transa ? rocblas_operation_transpose : rocblas_operation_none,
231
232
233
234
235
236
237
238
239
240
241
242
                      n,
                      m,
                      k,
                      &alpha,
                      args[1].implicit(),
                      ldb,
                      args[0].implicit(),
                      lda,
                      &beta,
                      args[2].implicit(),
                      ldc);
        return args[2];
Paul's avatar
Paul committed
243
244
245
    }
};

246
247
248
struct miopen_contiguous
{
    contiguous op;
Paul's avatar
Paul committed
249
    std::string name() const { return "gpu::contiguous"; }
250
251
252
253
254
    shape compute_shape(std::vector<shape> inputs) const
    {
        check_shapes{inputs, *this}.has(2);
        return op.compute_shape({inputs.at(0)});
    }
255
256
    argument compute(context&, shape output_shape, std::vector<argument> args) const
    {
257
258
        hip_contiguous(output_shape, args.at(0), args.at(1));
        return args.at(1);
259
260
261
    }
};

Paul's avatar
Paul committed
262
263
264
struct miopen_relu
{
    shared<activation_descriptor> ad;
Paul's avatar
Paul committed
265
    std::string name() const { return "gpu::relu"; }
Paul's avatar
Paul committed
266
    shape compute_shape(std::vector<shape> inputs) const
Paul's avatar
Paul committed
267
    {
Paul's avatar
Paul committed
268
        check_shapes{inputs, *this}.has(2).not_broadcasted();
Paul's avatar
Paul committed
269
        return inputs.at(1);
Paul's avatar
Paul committed
270
271
    }

Paul's avatar
Paul committed
272
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
273
274
    {
        float alpha = 1, beta = 0;
Paul's avatar
Paul committed
275
        auto x_desc = make_tensor(args[0].get_shape());
Paul's avatar
Paul committed
276
        auto y_desc = make_tensor(output_shape);
Paul's avatar
Paul committed
277
        miopenActivationForward(ctx.handle.get(),
Paul's avatar
Paul committed
278
279
280
                                ad.get(),
                                &alpha,
                                x_desc.get(),
Paul's avatar
Paul committed
281
                                args[0].implicit(),
Paul's avatar
Paul committed
282
283
                                &beta,
                                y_desc.get(),
Paul's avatar
Paul committed
284
                                args[1].implicit());
Paul's avatar
Paul committed
285

Paul's avatar
Paul committed
286
        return args[1];
Paul's avatar
Paul committed
287
288
289
    }
};

Paul's avatar
Paul committed
290
291
struct miopen_apply
{
Paul's avatar
Paul committed
292
    program* prog = nullptr;
Paul's avatar
Paul committed
293
    context ctx{};
Paul's avatar
Paul committed
294
295
296

    void apply()
    {
Paul's avatar
Paul committed
297
298
299
300
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
            if(it->op.name() == "convolution")
            {
Paul's avatar
Paul committed
301
                apply_convolution(it);
Paul's avatar
Paul committed
302
303
304
            }
            else if(it->op.name() == "activation")
            {
Paul's avatar
Paul committed
305
306
                apply_activation(it);
            }
Paul's avatar
Paul committed
307
308
309
310
            else if(it->op.name() == "pooling")
            {
                apply_pooling(it);
            }
Paul's avatar
Paul committed
311
312
313
314
            else if(it->op.name() == "add")
            {
                apply_add(it);
            }
Paul's avatar
Paul committed
315
316
317
318
            else if(it->op.name() == "gemm")
            {
                apply_gemm(it);
            }
319
320
321
322
            else if(it->op.name() == "contiguous")
            {
                apply_contiguous(it);
            }
323
324
325
326
327
328
            // TODO: adityaatluri
            // tagging to easily find where code changed
            else if(it->op.name() == "batch_norm_inference")
            {
                apply_batch_norm_inference(it);
            }
Paul's avatar
Paul committed
329
330
331
        }
    }

Paul's avatar
Paul committed
332
333
    instruction_ref insert_allocation(instruction_ref ins, const shape& s)
    {
Paul's avatar
Paul committed
334
        if(ins == --prog->end())
Paul's avatar
Paul committed
335
336
337
338
339
        {
            return prog->add_parameter("output", s);
        }
        else
        {
Paul's avatar
Paul committed
340
            auto is     = prog->add_outline(s);
Paul's avatar
Paul committed
341
342
343
344
345
            auto result = prog->insert_instruction(ins, hip_allocate{}, is);
            return result;
        }
    }

Paul's avatar
Paul committed
346
347
    void apply_convolution(instruction_ref ins)
    {
Paul's avatar
Paul committed
348
        auto&& op = any_cast<convolution>(ins->op);
Paul's avatar
Paul committed
349
350
        auto conv = miopen_convolution{op, make_conv(op)};
        conv.compile(ctx, ins->result, ins->arguments);
Paul's avatar
Paul committed
351
352
        auto output = insert_allocation(ins, ins->result);

Paul's avatar
Paul committed
353
        prog->replace_instruction(ins, conv, ins->arguments.at(0), ins->arguments.at(1), output);
Paul's avatar
Paul committed
354
355
    }

Paul's avatar
Paul committed
356
357
358
359
360
361
    void apply_pooling(instruction_ref ins)
    {
        auto&& op   = any_cast<pooling>(ins->op);
        auto pd     = make_pooling(op);
        auto output = insert_allocation(ins, ins->result);

Paul's avatar
Paul committed
362
        prog->replace_instruction(
Paul's avatar
Paul committed
363
            ins, miopen_pooling{op, std::move(pd)}, ins->arguments.at(0), output);
Paul's avatar
Paul committed
364
365
    }

Paul's avatar
Paul committed
366
    void apply_activation(instruction_ref ins)
Paul's avatar
Paul committed
367
368
    {
        auto&& op = any_cast<activation>(ins->op);
Paul's avatar
Paul committed
369
370
        auto ad   = make_relu();
        if(op.mode == "relu")
Paul's avatar
Paul committed
371
372
        {
            auto output = insert_allocation(ins, ins->result);
Paul's avatar
Paul committed
373
            prog->replace_instruction(
Paul's avatar
Paul committed
374
                ins, miopen_relu{std::move(ad)}, ins->arguments.at(0), output);
Paul's avatar
Paul committed
375
376
        }
    }
Paul's avatar
Paul committed
377
378
379
380
381

    void apply_add(instruction_ref ins)
    {
        auto output = insert_allocation(ins, ins->result);
        prog->replace_instruction(
Paul's avatar
Paul committed
382
            ins, miopen_add{}, ins->arguments.at(0), ins->arguments.at(1), output);
Paul's avatar
Paul committed
383
    }
Paul's avatar
Paul committed
384
385
386

    void apply_gemm(instruction_ref ins)
    {
Paul's avatar
Paul committed
387
        auto&& op   = any_cast<gemm>(ins->op);
Paul's avatar
Paul committed
388
389
        auto output = insert_allocation(ins, ins->result);
        prog->replace_instruction(
Paul's avatar
Paul committed
390
            ins, miopen_gemm{op}, ins->arguments.at(0), ins->arguments.at(1), output);
Paul's avatar
Paul committed
391
    }
392
393
394
395
396
397
398

    void apply_contiguous(instruction_ref ins)
    {
        auto&& op   = any_cast<contiguous>(ins->op);
        auto output = insert_allocation(ins, ins->result);
        prog->replace_instruction(ins, miopen_contiguous{op}, ins->arguments.at(0), output);
    }
399
400
401
402
403

    // TODO: adityaatluri
    // Not sure how to write this. Review and fix required
    void apply_batch_norm_inference(instruction_ref ins)
    {
wsttiger's avatar
wsttiger committed
404
405
        auto&& op       = any_cast<batch_norm_inference>(ins->op);
        auto output     = insert_allocation(ins, ins->result);
406
        shape old_shape = ins->arguments.at(1)->get_shape();
wsttiger's avatar
wsttiger committed
407
408
409
410
411
412
413
414
415
416
        std::vector<int64_t> new_shape{1, static_cast<int64_t>(old_shape.elements()), 1, 1};
        auto arg1 =
            prog->insert_instruction(ins, migraph::reshape{new_shape}, ins->arguments.at(1));
        auto arg2 =
            prog->insert_instruction(ins, migraph::reshape{new_shape}, ins->arguments.at(2));
        auto arg3 =
            prog->insert_instruction(ins, migraph::reshape{new_shape}, ins->arguments.at(3));
        auto arg4 =
            prog->insert_instruction(ins, migraph::reshape{new_shape}, ins->arguments.at(4));
        prog->replace_instruction(ins,
417
418
                                  miopen_batch_norm_inference{op},
                                  ins->arguments.at(0),
419
420
421
422
                                  arg1,
                                  arg2,
                                  arg3,
                                  arg4,
423
                                  output);
424
    }
Paul's avatar
Paul committed
425
426
};

Paul's avatar
Paul committed
427
void lowering::apply(program& p) const { miopen_apply{&p, ctx}.apply(); }
Paul's avatar
Paul committed
428

Paul's avatar
Paul committed
429
} // namespace gpu
Paul's avatar
Paul committed
430

Paul's avatar
Paul committed
431
} // namespace migraph