lowering.cpp 14.3 KB
Newer Older
1
#include <rocblas.h>
Paul's avatar
Paul committed
2
#include <migraph/gpu/lowering.hpp>
Paul's avatar
Paul committed
3
4
5
6
#include <migraph/manage_ptr.hpp>
#include <migraph/instruction.hpp>
#include <migraph/operators.hpp>
#include <migraph/shape_for_each.hpp>
Paul's avatar
Paul committed
7
8
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/hip.hpp>
Paul's avatar
Paul committed
9
#include <migraph/dfor.hpp>
Paul's avatar
Paul committed
10
#include <migraph/gpu/kernels.hpp>
Paul's avatar
Paul committed
11
#include <migraph/iterator_for.hpp>
Paul's avatar
Paul committed
12
13
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/context.hpp>
Paul's avatar
Paul committed
14
15

namespace migraph {
Paul's avatar
Paul committed
16
namespace gpu {
Paul's avatar
Paul committed
17

18
19
20
21
22
23
24
25
26
27
28
29
30
struct miopen_batch_norm_inference
{
    batch_norm_inference op;

    std::string name() const { return "gpu::batch_norm_inference"; }

    shape compute_shape(std::vector<shape> inputs) const
    {
        check_shapes{inputs, *this}.has(6);
        return op.compute_shape(
            {inputs.at(0), inputs.at(1), inputs.at(2), inputs.at(3), inputs.at(4)});
    }

31
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
32
    {
wsttiger's avatar
wsttiger committed
33
34
        auto x_desc  = make_tensor(args[0].get_shape());
        auto y_desc  = make_tensor(output_shape);
35
        auto bn_desc = make_tensor(args[3].get_shape());
36
37
38
39
40
41
42
43
44
45
46
47
48
49

        float alpha = 1.0, beta = 0.0f;

        // TODO: adityaatluri
        // create bn-scale-bias-mean-variance descriptor for
        // miopen call
        miopenBatchNormalizationForwardInference(ctx.handle.get(),
                                                 miopenBatchNormMode_t(op.bn_mode),
                                                 &alpha,
                                                 &beta,
                                                 x_desc.get(),
                                                 args[0].implicit(),
                                                 y_desc.get(),
                                                 args[5].implicit(),
50
                                                 bn_desc.get(),
51
52
53
54
                                                 args[3].implicit(),
                                                 args[4].implicit(),
                                                 args[1].implicit(),
                                                 args[2].implicit(),
55
                                                 op.epsilon);
56
57
58
59
60

        return args[5];
    }
};

Paul's avatar
Paul committed
61
62
63
struct miopen_convolution
{
    convolution op;
Paul's avatar
Paul committed
64
    shared<convolution_descriptor> cd;
Paul's avatar
Paul committed
65

Paul's avatar
Paul committed
66
    std::string name() const { return "gpu::convolution"; }
Paul's avatar
Paul committed
67
68
    shape compute_shape(std::vector<shape> inputs) const
    {
Paul's avatar
Paul committed
69
        check_shapes{inputs, *this}.has(3).standard();
Paul's avatar
Paul committed
70
        return op.compute_shape({inputs.at(0), inputs.at(1)});
Paul's avatar
Paul committed
71
    }
Paul's avatar
Paul committed
72
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
73
    {
Paul's avatar
Paul committed
74
75
        auto x_desc = make_tensor(args[0].get_shape());
        auto w_desc = make_tensor(args[1].get_shape());
Paul's avatar
Paul committed
76
77
        auto y_desc = make_tensor(output_shape);

Paul's avatar
Paul committed
78
        float alpha = 1, beta = 0;
Paul's avatar
Paul committed
79
80
        int algo_count;
        miopenConvAlgoPerf_t perf;
Paul's avatar
Paul committed
81
        miopenFindConvolutionForwardAlgorithm(ctx.handle.get(),
Paul's avatar
Paul committed
82
                                              x_desc.get(),
Paul's avatar
Paul committed
83
                                              args[0].implicit(),
Paul's avatar
Paul committed
84
                                              w_desc.get(),
Paul's avatar
Paul committed
85
                                              args[1].implicit(),
Paul's avatar
Paul committed
86
                                              cd.get(),
Paul's avatar
Paul committed
87
                                              y_desc.get(),
Paul's avatar
Paul committed
88
                                              args[2].implicit(),
Paul's avatar
Paul committed
89
90
91
                                              1,
                                              &algo_count,
                                              &perf,
Paul's avatar
Paul committed
92
93
                                              nullptr,
                                              0,
Paul's avatar
Paul committed
94
                                              false);
Paul's avatar
Paul committed
95
        miopenConvolutionForward(ctx.handle.get(),
Paul's avatar
Paul committed
96
                                 &alpha,
Paul's avatar
Paul committed
97
                                 x_desc.get(),
Paul's avatar
Paul committed
98
                                 args[0].implicit(),
Paul's avatar
Paul committed
99
                                 w_desc.get(),
Paul's avatar
Paul committed
100
                                 args[1].implicit(),
Paul's avatar
Paul committed
101
102
103
                                 cd.get(),
                                 perf.fwd_algo,
                                 &beta,
Paul's avatar
Paul committed
104
                                 y_desc.get(),
Paul's avatar
Paul committed
105
                                 args[2].implicit(),
Paul's avatar
Paul committed
106
107
                                 nullptr,
                                 0);
Paul's avatar
Paul committed
108
        return args[2];
Paul's avatar
Paul committed
109
110
111
    }
};

Paul's avatar
Paul committed
112
113
114
115
116
struct miopen_pooling
{
    pooling op;
    shared<pooling_descriptor> pd;

Paul's avatar
Paul committed
117
    std::string name() const { return "gpu::pooling"; }
Paul's avatar
Paul committed
118
119
    shape compute_shape(std::vector<shape> inputs) const
    {
Paul's avatar
Paul committed
120
        check_shapes{inputs, *this}.has(2).standard();
Paul's avatar
Paul committed
121
122
        return op.compute_shape({inputs.at(1)});
    }
Paul's avatar
Paul committed
123
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
124
    {
Paul's avatar
Paul committed
125
        auto x_desc = make_tensor(args[0].get_shape());
Paul's avatar
Paul committed
126
127
128
129
        auto y_desc = make_tensor(output_shape);

        float alpha = 1, beta = 0;

Paul's avatar
Paul committed
130
        miopenPoolingForward(ctx.handle.get(),
Paul's avatar
Paul committed
131
132
133
                             pd.get(),
                             &alpha,
                             x_desc.get(),
Paul's avatar
Paul committed
134
                             args[0].implicit(),
Paul's avatar
Paul committed
135
136
                             &beta,
                             y_desc.get(),
Paul's avatar
Paul committed
137
                             args[1].implicit(),
Paul's avatar
Paul committed
138
139
140
                             false,
                             nullptr,
                             0);
Paul's avatar
Paul committed
141

Paul's avatar
Paul committed
142
        return args[1];
Paul's avatar
Paul committed
143
144
145
    }
};

Paul's avatar
Paul committed
146
147
struct miopen_add
{
Paul's avatar
Paul committed
148
    std::string name() const { return "gpu::add"; }
Paul's avatar
Paul committed
149
150
    shape compute_shape(std::vector<shape> inputs) const
    {
Paul's avatar
Paul committed
151
        check_shapes{inputs, *this}.has(3).not_broadcasted();
Paul's avatar
Paul committed
152
        return inputs.at(0);
Paul's avatar
Paul committed
153
154
    }

Paul's avatar
Paul committed
155
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
156
    {
Paul's avatar
Paul committed
157
        if(args[1].get_shape().broadcasted())
Paul's avatar
Paul committed
158
        {
Paul's avatar
Paul committed
159
160
            argument result{output_shape};

Paul's avatar
Paul committed
161
            visit_all(result, from_gpu(args[0]), from_gpu(args[1]))(
Paul's avatar
Paul committed
162
163
                [&](auto output, auto input1, auto input2) {
                    shape_for_each(output.get_shape(), [&](const auto& idx) {
Paul's avatar
Paul committed
164
165
166
                        output(idx.begin(), idx.end()) =
                            input1(idx.begin(), idx.end()) + input2(idx.begin(), idx.end());
                    });
Paul's avatar
Paul committed
167
                });
Paul's avatar
Paul committed
168
            return to_gpu(result);
Paul's avatar
Paul committed
169
170
171
172
        }
        else
        {
            float alpha = 1, beta = 0;
Paul's avatar
Paul committed
173
174
            auto a_desc = make_tensor(args[0].get_shape());
            auto b_desc = make_tensor(args[1].get_shape());
Paul's avatar
Paul committed
175
            auto c_desc = make_tensor(output_shape);
Paul's avatar
Paul committed
176
            miopenOpTensor(ctx.handle.get(),
Paul's avatar
Paul committed
177
178
179
                           miopenTensorOpAdd,
                           &alpha,
                           a_desc.get(),
Paul's avatar
Paul committed
180
                           args[0].implicit(),
Paul's avatar
Paul committed
181
182
                           &alpha,
                           b_desc.get(),
Paul's avatar
Paul committed
183
                           args[1].implicit(),
Paul's avatar
Paul committed
184
185
                           &beta,
                           c_desc.get(),
Paul's avatar
Paul committed
186
187
                           args[2].implicit());
            return args[2];
Paul's avatar
Paul committed
188
189
190
191
        }
    }
};

Paul's avatar
Paul committed
192
193
194
struct miopen_gemm
{
    gemm op;
Paul's avatar
Paul committed
195
    std::string name() const { return "gpu::convolution"; }
Paul's avatar
Paul committed
196
197
    shape compute_shape(std::vector<shape> inputs) const
    {
Paul's avatar
Paul committed
198
199
        check_shapes{inputs, *this}.has(3);
        return op.compute_shape({inputs.at(0), inputs.at(1)});
Paul's avatar
Paul committed
200
    }
Paul's avatar
Paul committed
201
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
202
    {
203
204
        float alpha     = 1.0f;
        float beta      = 0.0f;
Paul's avatar
Paul committed
205
206
        bool transa     = args[0].get_shape().transposed();
        bool transb     = args[1].get_shape().transposed();
207
208
209
        rocblas_int lda = args[0].get_shape().strides()[transa ? 1 : 0];
        rocblas_int ldb = args[1].get_shape().strides()[transb ? 1 : 0];
        rocblas_int ldc = args[2].get_shape().strides()[0];
210
211
212
        rocblas_int m   = output_shape.lens()[0];
        rocblas_int n   = output_shape.lens()[1];
        rocblas_int k   = args[0].get_shape().lens()[1];
213
        rocblas_sgemm(ctx.rbhandle.get(),
214
215
                      transb ? rocblas_operation_transpose : rocblas_operation_none,
                      transa ? rocblas_operation_transpose : rocblas_operation_none,
216
217
218
219
220
221
222
223
224
225
226
227
                      n,
                      m,
                      k,
                      &alpha,
                      args[1].implicit(),
                      ldb,
                      args[0].implicit(),
                      lda,
                      &beta,
                      args[2].implicit(),
                      ldc);
        return args[2];
Paul's avatar
Paul committed
228
229
230
    }
};

231
232
233
struct miopen_contiguous
{
    contiguous op;
Paul's avatar
Paul committed
234
    std::string name() const { return "gpu::contiguous"; }
235
236
237
238
239
    shape compute_shape(std::vector<shape> inputs) const
    {
        check_shapes{inputs, *this}.has(2);
        return op.compute_shape({inputs.at(0)});
    }
240
241
    argument compute(context&, shape output_shape, std::vector<argument> args) const
    {
242
243
        hip_contiguous(output_shape, args.at(0), args.at(1));
        return args.at(1);
244
245
246
    }
};

Paul's avatar
Paul committed
247
248
249
struct miopen_relu
{
    shared<activation_descriptor> ad;
Paul's avatar
Paul committed
250
    std::string name() const { return "gpu::relu"; }
Paul's avatar
Paul committed
251
    shape compute_shape(std::vector<shape> inputs) const
Paul's avatar
Paul committed
252
    {
Paul's avatar
Paul committed
253
        check_shapes{inputs, *this}.has(2).not_broadcasted();
Paul's avatar
Paul committed
254
        return inputs.at(1);
Paul's avatar
Paul committed
255
256
    }

Paul's avatar
Paul committed
257
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
258
259
    {
        float alpha = 1, beta = 0;
Paul's avatar
Paul committed
260
        auto x_desc = make_tensor(args[0].get_shape());
Paul's avatar
Paul committed
261
        auto y_desc = make_tensor(output_shape);
Paul's avatar
Paul committed
262
        miopenActivationForward(ctx.handle.get(),
Paul's avatar
Paul committed
263
264
265
                                ad.get(),
                                &alpha,
                                x_desc.get(),
Paul's avatar
Paul committed
266
                                args[0].implicit(),
Paul's avatar
Paul committed
267
268
                                &beta,
                                y_desc.get(),
Paul's avatar
Paul committed
269
                                args[1].implicit());
Paul's avatar
Paul committed
270

Paul's avatar
Paul committed
271
        return args[1];
Paul's avatar
Paul committed
272
273
274
    }
};

Paul's avatar
Paul committed
275
276
struct miopen_apply
{
Paul's avatar
Paul committed
277
    program* prog = nullptr;
Paul's avatar
Paul committed
278
279
280

    void apply()
    {
Paul's avatar
Paul committed
281
282
283
284
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
            if(it->op.name() == "convolution")
            {
Paul's avatar
Paul committed
285
                apply_convolution(it);
Paul's avatar
Paul committed
286
287
288
            }
            else if(it->op.name() == "activation")
            {
Paul's avatar
Paul committed
289
290
                apply_activation(it);
            }
Paul's avatar
Paul committed
291
292
293
294
            else if(it->op.name() == "pooling")
            {
                apply_pooling(it);
            }
Paul's avatar
Paul committed
295
296
297
298
            else if(it->op.name() == "add")
            {
                apply_add(it);
            }
Paul's avatar
Paul committed
299
300
301
302
            else if(it->op.name() == "gemm")
            {
                apply_gemm(it);
            }
303
304
305
306
            else if(it->op.name() == "contiguous")
            {
                apply_contiguous(it);
            }
307
308
309
310
311
312
            // TODO: adityaatluri
            // tagging to easily find where code changed
            else if(it->op.name() == "batch_norm_inference")
            {
                apply_batch_norm_inference(it);
            }
Paul's avatar
Paul committed
313
314
315
        }
    }

Paul's avatar
Paul committed
316
317
    instruction_ref insert_allocation(instruction_ref ins, const shape& s)
    {
Paul's avatar
Paul committed
318
        if(ins == --prog->end())
Paul's avatar
Paul committed
319
320
321
322
323
        {
            return prog->add_parameter("output", s);
        }
        else
        {
Paul's avatar
Paul committed
324
            auto is     = prog->add_outline(s);
Paul's avatar
Paul committed
325
326
327
328
329
            auto result = prog->insert_instruction(ins, hip_allocate{}, is);
            return result;
        }
    }

Paul's avatar
Paul committed
330
331
    void apply_convolution(instruction_ref ins)
    {
Paul's avatar
Paul committed
332
333
        auto&& op   = any_cast<convolution>(ins->op);
        auto cd     = make_conv(op);
Paul's avatar
Paul committed
334
335
        auto output = insert_allocation(ins, ins->result);

Paul's avatar
Paul committed
336
337
338
339
340
        prog->replace_instruction(ins,
                                  miopen_convolution{op, std::move(cd)},
                                  ins->arguments.at(0),
                                  ins->arguments.at(1),
                                  output);
Paul's avatar
Paul committed
341
342
    }

Paul's avatar
Paul committed
343
344
345
346
347
348
    void apply_pooling(instruction_ref ins)
    {
        auto&& op   = any_cast<pooling>(ins->op);
        auto pd     = make_pooling(op);
        auto output = insert_allocation(ins, ins->result);

Paul's avatar
Paul committed
349
        prog->replace_instruction(
Paul's avatar
Paul committed
350
            ins, miopen_pooling{op, std::move(pd)}, ins->arguments.at(0), output);
Paul's avatar
Paul committed
351
352
    }

Paul's avatar
Paul committed
353
    void apply_activation(instruction_ref ins)
Paul's avatar
Paul committed
354
355
    {
        auto&& op = any_cast<activation>(ins->op);
Paul's avatar
Paul committed
356
357
        auto ad   = make_relu();
        if(op.mode == "relu")
Paul's avatar
Paul committed
358
359
        {
            auto output = insert_allocation(ins, ins->result);
Paul's avatar
Paul committed
360
            prog->replace_instruction(
Paul's avatar
Paul committed
361
                ins, miopen_relu{std::move(ad)}, ins->arguments.at(0), output);
Paul's avatar
Paul committed
362
363
        }
    }
Paul's avatar
Paul committed
364
365
366
367
368

    void apply_add(instruction_ref ins)
    {
        auto output = insert_allocation(ins, ins->result);
        prog->replace_instruction(
Paul's avatar
Paul committed
369
            ins, miopen_add{}, ins->arguments.at(0), ins->arguments.at(1), output);
Paul's avatar
Paul committed
370
    }
Paul's avatar
Paul committed
371
372
373

    void apply_gemm(instruction_ref ins)
    {
Paul's avatar
Paul committed
374
        auto&& op   = any_cast<gemm>(ins->op);
Paul's avatar
Paul committed
375
376
        auto output = insert_allocation(ins, ins->result);
        prog->replace_instruction(
Paul's avatar
Paul committed
377
            ins, miopen_gemm{op}, ins->arguments.at(0), ins->arguments.at(1), output);
Paul's avatar
Paul committed
378
    }
379
380
381
382
383
384
385

    void apply_contiguous(instruction_ref ins)
    {
        auto&& op   = any_cast<contiguous>(ins->op);
        auto output = insert_allocation(ins, ins->result);
        prog->replace_instruction(ins, miopen_contiguous{op}, ins->arguments.at(0), output);
    }
386
387
388
389
390

    // TODO: adityaatluri
    // Not sure how to write this. Review and fix required
    void apply_batch_norm_inference(instruction_ref ins)
    {
wsttiger's avatar
wsttiger committed
391
392
        auto&& op       = any_cast<batch_norm_inference>(ins->op);
        auto output     = insert_allocation(ins, ins->result);
393
        shape old_shape = ins->arguments.at(1)->get_shape();
wsttiger's avatar
wsttiger committed
394
395
396
397
398
399
400
401
402
403
        std::vector<int64_t> new_shape{1, static_cast<int64_t>(old_shape.elements()), 1, 1};
        auto arg1 =
            prog->insert_instruction(ins, migraph::reshape{new_shape}, ins->arguments.at(1));
        auto arg2 =
            prog->insert_instruction(ins, migraph::reshape{new_shape}, ins->arguments.at(2));
        auto arg3 =
            prog->insert_instruction(ins, migraph::reshape{new_shape}, ins->arguments.at(3));
        auto arg4 =
            prog->insert_instruction(ins, migraph::reshape{new_shape}, ins->arguments.at(4));
        prog->replace_instruction(ins,
404
405
                                  miopen_batch_norm_inference{op},
                                  ins->arguments.at(0),
406
407
408
409
                                  arg1,
                                  arg2,
                                  arg3,
                                  arg4,
410
                                  output);
411
    }
Paul's avatar
Paul committed
412
413
};

Paul's avatar
Paul committed
414
void lowering::apply(program& p) const { miopen_apply{&p}.apply(); }
Paul's avatar
Paul committed
415

Paul's avatar
Paul committed
416
} // namespace gpu
Paul's avatar
Paul committed
417

Paul's avatar
Paul committed
418
} // namespace migraph