lowering.cpp 13.1 KB
Newer Older
1
#include <rocblas.h>
Paul's avatar
Paul committed
2
#include <migraph/gpu/lowering.hpp>
Paul's avatar
Paul committed
3
4
5
6
#include <migraph/manage_ptr.hpp>
#include <migraph/instruction.hpp>
#include <migraph/operators.hpp>
#include <migraph/shape_for_each.hpp>
Paul's avatar
Paul committed
7
8
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/hip.hpp>
Paul's avatar
Paul committed
9
#include <migraph/dfor.hpp>
Paul's avatar
Paul committed
10
#include <migraph/gpu/kernels.hpp>
Paul's avatar
Paul committed
11
#include <migraph/iterator_for.hpp>
Paul's avatar
Paul committed
12
13
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/context.hpp>
Paul's avatar
Paul committed
14
15

namespace migraph {
Paul's avatar
Paul committed
16
namespace gpu {
Paul's avatar
Paul committed
17

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
struct miopen_batch_norm_inference
{
    batch_norm_inference op;

    std::string name() const { return "gpu::batch_norm_inference"; }

    shape compute_shape(std::vector<shape> inputs) const
    {
        check_shapes{inputs, *this}.has(6);
        return op.compute_shape(
            {inputs.at(0), inputs.at(1), inputs.at(2), inputs.at(3), inputs.at(4)});
    }

    argument compute(context&, shape output_shape, std::vector<argument> args) const
    {
        auto x_desc = make_tensor(args[0].get_shape());
        auto y_desc = make_tensor(output_shape);

        float alpha = 1.0, beta = 0.0f;

        // TODO: adityaatluri
        // create bn-scale-bias-mean-variance descriptor for
        // miopen call
        miopenBatchNormalizationForwardInference(ctx.handle.get(),
                                                 miopenBatchNormMode_t(op.bn_mode),
                                                 &alpha,
                                                 &beta,
                                                 x_desc.get(),
                                                 args[0].implicit(),
                                                 y_desc.get(),
                                                 args[5].implicit(),
                                                 bn_desc,
                                                 args[3].implicit(),
                                                 args[4].implicit(),
                                                 args[1].implicit(),
                                                 args[2].implicit(),
                                                 op.mode.epsilon);

        return args[5];
    }
};

Paul's avatar
Paul committed
60
61
62
struct miopen_convolution
{
    convolution op;
Paul's avatar
Paul committed
63
    shared<convolution_descriptor> cd;
Paul's avatar
Paul committed
64

Paul's avatar
Paul committed
65
    std::string name() const { return "gpu::convolution"; }
Paul's avatar
Paul committed
66
67
    shape compute_shape(std::vector<shape> inputs) const
    {
Paul's avatar
Paul committed
68
69
        check_shapes{inputs, *this}.has(3);
        return op.compute_shape({inputs.at(0), inputs.at(1)});
Paul's avatar
Paul committed
70
    }
Paul's avatar
Paul committed
71
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
72
    {
Paul's avatar
Paul committed
73
74
        auto x_desc = make_tensor(args[0].get_shape());
        auto w_desc = make_tensor(args[1].get_shape());
Paul's avatar
Paul committed
75
76
        auto y_desc = make_tensor(output_shape);

Paul's avatar
Paul committed
77
        float alpha = 1, beta = 0;
Paul's avatar
Paul committed
78
79
        int algo_count;
        miopenConvAlgoPerf_t perf;
Paul's avatar
Paul committed
80
        miopenFindConvolutionForwardAlgorithm(ctx.handle.get(),
Paul's avatar
Paul committed
81
                                              x_desc.get(),
Paul's avatar
Paul committed
82
                                              args[0].implicit(),
Paul's avatar
Paul committed
83
                                              w_desc.get(),
Paul's avatar
Paul committed
84
                                              args[1].implicit(),
Paul's avatar
Paul committed
85
                                              cd.get(),
Paul's avatar
Paul committed
86
                                              y_desc.get(),
Paul's avatar
Paul committed
87
                                              args[2].implicit(),
Paul's avatar
Paul committed
88
89
90
                                              1,
                                              &algo_count,
                                              &perf,
Paul's avatar
Paul committed
91
92
                                              nullptr,
                                              0,
Paul's avatar
Paul committed
93
                                              false);
Paul's avatar
Paul committed
94
        miopenConvolutionForward(ctx.handle.get(),
Paul's avatar
Paul committed
95
                                 &alpha,
Paul's avatar
Paul committed
96
                                 x_desc.get(),
Paul's avatar
Paul committed
97
                                 args[0].implicit(),
Paul's avatar
Paul committed
98
                                 w_desc.get(),
Paul's avatar
Paul committed
99
                                 args[1].implicit(),
Paul's avatar
Paul committed
100
101
102
                                 cd.get(),
                                 perf.fwd_algo,
                                 &beta,
Paul's avatar
Paul committed
103
                                 y_desc.get(),
Paul's avatar
Paul committed
104
                                 args[2].implicit(),
Paul's avatar
Paul committed
105
106
                                 nullptr,
                                 0);
Paul's avatar
Paul committed
107
        return args[2];
Paul's avatar
Paul committed
108
109
110
    }
};

Paul's avatar
Paul committed
111
112
113
114
115
struct miopen_pooling
{
    pooling op;
    shared<pooling_descriptor> pd;

Paul's avatar
Paul committed
116
    std::string name() const { return "gpu::pooling"; }
Paul's avatar
Paul committed
117
118
    shape compute_shape(std::vector<shape> inputs) const
    {
Paul's avatar
Paul committed
119
        check_shapes{inputs, *this}.has(2);
Paul's avatar
Paul committed
120
121
        return op.compute_shape({inputs.at(1)});
    }
Paul's avatar
Paul committed
122
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
123
    {
Paul's avatar
Paul committed
124
        auto x_desc = make_tensor(args[0].get_shape());
Paul's avatar
Paul committed
125
126
127
128
        auto y_desc = make_tensor(output_shape);

        float alpha = 1, beta = 0;

Paul's avatar
Paul committed
129
        miopenPoolingForward(ctx.handle.get(),
Paul's avatar
Paul committed
130
131
132
                             pd.get(),
                             &alpha,
                             x_desc.get(),
Paul's avatar
Paul committed
133
                             args[0].implicit(),
Paul's avatar
Paul committed
134
135
                             &beta,
                             y_desc.get(),
Paul's avatar
Paul committed
136
                             args[1].implicit(),
Paul's avatar
Paul committed
137
138
139
                             false,
                             nullptr,
                             0);
Paul's avatar
Paul committed
140

Paul's avatar
Paul committed
141
        return args[1];
Paul's avatar
Paul committed
142
143
144
    }
};

Paul's avatar
Paul committed
145
146
struct miopen_add
{
Paul's avatar
Paul committed
147
    std::string name() const { return "gpu::add"; }
Paul's avatar
Paul committed
148
149
    shape compute_shape(std::vector<shape> inputs) const
    {
Paul's avatar
Paul committed
150
151
        check_shapes{inputs, *this}.has(3);
        return inputs.at(0);
Paul's avatar
Paul committed
152
153
    }

Paul's avatar
Paul committed
154
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
155
    {
Paul's avatar
Paul committed
156
        if(args[1].get_shape().broadcasted())
Paul's avatar
Paul committed
157
        {
Paul's avatar
Paul committed
158
159
            argument result{output_shape};

Paul's avatar
Paul committed
160
            visit_all(result, from_gpu(args[0]), from_gpu(args[1]))(
Paul's avatar
Paul committed
161
162
                [&](auto output, auto input1, auto input2) {
                    shape_for_each(output.get_shape(), [&](const auto& idx) {
Paul's avatar
Paul committed
163
164
165
                        output(idx.begin(), idx.end()) =
                            input1(idx.begin(), idx.end()) + input2(idx.begin(), idx.end());
                    });
Paul's avatar
Paul committed
166
                });
Paul's avatar
Paul committed
167
            return to_gpu(result);
Paul's avatar
Paul committed
168
169
170
171
        }
        else
        {
            float alpha = 1, beta = 0;
Paul's avatar
Paul committed
172
173
            auto a_desc = make_tensor(args[0].get_shape());
            auto b_desc = make_tensor(args[1].get_shape());
Paul's avatar
Paul committed
174
            auto c_desc = make_tensor(output_shape);
Paul's avatar
Paul committed
175
            miopenOpTensor(ctx.handle.get(),
Paul's avatar
Paul committed
176
177
178
                           miopenTensorOpAdd,
                           &alpha,
                           a_desc.get(),
Paul's avatar
Paul committed
179
                           args[0].implicit(),
Paul's avatar
Paul committed
180
181
                           &alpha,
                           b_desc.get(),
Paul's avatar
Paul committed
182
                           args[1].implicit(),
Paul's avatar
Paul committed
183
184
                           &beta,
                           c_desc.get(),
Paul's avatar
Paul committed
185
186
                           args[2].implicit());
            return args[2];
Paul's avatar
Paul committed
187
188
189
190
        }
    }
};

Paul's avatar
Paul committed
191
192
193
struct miopen_gemm
{
    gemm op;
Paul's avatar
Paul committed
194
    std::string name() const { return "gpu::convolution"; }
Paul's avatar
Paul committed
195
196
    shape compute_shape(std::vector<shape> inputs) const
    {
Paul's avatar
Paul committed
197
198
        check_shapes{inputs, *this}.has(3);
        return op.compute_shape({inputs.at(0), inputs.at(1)});
Paul's avatar
Paul committed
199
    }
Paul's avatar
Paul committed
200
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
201
    {
202
203
204
205
206
207
208
209
        float alpha     = 1.0f;
        float beta      = 0.0f;
        rocblas_int lda = args[0].get_shape().lens()[1];
        rocblas_int ldb = args[1].get_shape().lens()[1];
        rocblas_int ldc = args[2].get_shape().lens()[1];
        rocblas_int m   = output_shape.lens()[0];
        rocblas_int n   = output_shape.lens()[1];
        rocblas_int k   = args[0].get_shape().lens()[1];
210
        rocblas_sgemm(ctx.rbhandle.get(),
211
212
213
214
215
216
217
218
219
220
221
222
223
224
                      rocblas_operation_none,
                      rocblas_operation_none,
                      n,
                      m,
                      k,
                      &alpha,
                      args[1].implicit(),
                      ldb,
                      args[0].implicit(),
                      lda,
                      &beta,
                      args[2].implicit(),
                      ldc);
        return args[2];
Paul's avatar
Paul committed
225
226
227
    }
};

228
229
230
struct miopen_contiguous
{
    contiguous op;
Paul's avatar
Paul committed
231
    std::string name() const { return "gpu::contiguous"; }
232
233
234
235
236
    shape compute_shape(std::vector<shape> inputs) const
    {
        check_shapes{inputs, *this}.has(2);
        return op.compute_shape({inputs.at(0)});
    }
237
238
    argument compute(context&, shape output_shape, std::vector<argument> args) const
    {
239
240
        hip_contiguous(output_shape, args.at(0), args.at(1));
        return args.at(1);
241
242
243
    }
};

Paul's avatar
Paul committed
244
245
246
struct miopen_relu
{
    shared<activation_descriptor> ad;
Paul's avatar
Paul committed
247
    std::string name() const { return "gpu::relu"; }
Paul's avatar
Paul committed
248
    shape compute_shape(std::vector<shape> inputs) const
Paul's avatar
Paul committed
249
    {
Paul's avatar
Paul committed
250
        check_shapes{inputs, *this}.has(2);
Paul's avatar
Paul committed
251
        return inputs.at(1);
Paul's avatar
Paul committed
252
253
    }

Paul's avatar
Paul committed
254
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
Paul's avatar
Paul committed
255
256
    {
        float alpha = 1, beta = 0;
Paul's avatar
Paul committed
257
        auto x_desc = make_tensor(args[0].get_shape());
Paul's avatar
Paul committed
258
        auto y_desc = make_tensor(output_shape);
Paul's avatar
Paul committed
259
        miopenActivationForward(ctx.handle.get(),
Paul's avatar
Paul committed
260
261
262
                                ad.get(),
                                &alpha,
                                x_desc.get(),
Paul's avatar
Paul committed
263
                                args[0].implicit(),
Paul's avatar
Paul committed
264
265
                                &beta,
                                y_desc.get(),
Paul's avatar
Paul committed
266
                                args[1].implicit());
Paul's avatar
Paul committed
267

Paul's avatar
Paul committed
268
        return args[1];
Paul's avatar
Paul committed
269
270
271
    }
};

Paul's avatar
Paul committed
272
273
struct miopen_apply
{
Paul's avatar
Paul committed
274
    program* prog = nullptr;
Paul's avatar
Paul committed
275
276
277

    void apply()
    {
Paul's avatar
Paul committed
278
279
280
281
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
            if(it->op.name() == "convolution")
            {
Paul's avatar
Paul committed
282
                apply_convolution(it);
Paul's avatar
Paul committed
283
284
285
            }
            else if(it->op.name() == "activation")
            {
Paul's avatar
Paul committed
286
287
                apply_activation(it);
            }
Paul's avatar
Paul committed
288
289
290
291
            else if(it->op.name() == "pooling")
            {
                apply_pooling(it);
            }
Paul's avatar
Paul committed
292
293
294
295
            else if(it->op.name() == "add")
            {
                apply_add(it);
            }
Paul's avatar
Paul committed
296
297
298
299
            else if(it->op.name() == "gemm")
            {
                apply_gemm(it);
            }
300
301
302
303
            else if(it->op.name() == "contiguous")
            {
                apply_contiguous(it);
            }
304
305
306
307
308
309
            // TODO: adityaatluri
            // tagging to easily find where code changed
            else if(it->op.name() == "batch_norm_inference")
            {
                apply_batch_norm_inference(it);
            }
Paul's avatar
Paul committed
310
311
312
        }
    }

Paul's avatar
Paul committed
313
314
    instruction_ref insert_allocation(instruction_ref ins, const shape& s)
    {
Paul's avatar
Paul committed
315
        if(ins == --prog->end())
Paul's avatar
Paul committed
316
317
318
319
320
        {
            return prog->add_parameter("output", s);
        }
        else
        {
Paul's avatar
Paul committed
321
            auto is     = prog->add_outline(s);
Paul's avatar
Paul committed
322
323
324
325
326
            auto result = prog->insert_instruction(ins, hip_allocate{}, is);
            return result;
        }
    }

Paul's avatar
Paul committed
327
328
    void apply_convolution(instruction_ref ins)
    {
Paul's avatar
Paul committed
329
330
        auto&& op   = any_cast<convolution>(ins->op);
        auto cd     = make_conv(op);
Paul's avatar
Paul committed
331
332
        auto output = insert_allocation(ins, ins->result);

Paul's avatar
Paul committed
333
334
335
336
337
        prog->replace_instruction(ins,
                                  miopen_convolution{op, std::move(cd)},
                                  ins->arguments.at(0),
                                  ins->arguments.at(1),
                                  output);
Paul's avatar
Paul committed
338
339
    }

Paul's avatar
Paul committed
340
341
342
343
344
345
    void apply_pooling(instruction_ref ins)
    {
        auto&& op   = any_cast<pooling>(ins->op);
        auto pd     = make_pooling(op);
        auto output = insert_allocation(ins, ins->result);

Paul's avatar
Paul committed
346
        prog->replace_instruction(
Paul's avatar
Paul committed
347
            ins, miopen_pooling{op, std::move(pd)}, ins->arguments.at(0), output);
Paul's avatar
Paul committed
348
349
    }

Paul's avatar
Paul committed
350
    void apply_activation(instruction_ref ins)
Paul's avatar
Paul committed
351
352
    {
        auto&& op = any_cast<activation>(ins->op);
Paul's avatar
Paul committed
353
354
        auto ad   = make_relu();
        if(op.mode == "relu")
Paul's avatar
Paul committed
355
356
        {
            auto output = insert_allocation(ins, ins->result);
Paul's avatar
Paul committed
357
            prog->replace_instruction(
Paul's avatar
Paul committed
358
                ins, miopen_relu{std::move(ad)}, ins->arguments.at(0), output);
Paul's avatar
Paul committed
359
360
        }
    }
Paul's avatar
Paul committed
361
362
363
364
365

    void apply_add(instruction_ref ins)
    {
        auto output = insert_allocation(ins, ins->result);
        prog->replace_instruction(
Paul's avatar
Paul committed
366
            ins, miopen_add{}, ins->arguments.at(0), ins->arguments.at(1), output);
Paul's avatar
Paul committed
367
    }
Paul's avatar
Paul committed
368
369
370

    void apply_gemm(instruction_ref ins)
    {
Paul's avatar
Paul committed
371
        auto&& op   = any_cast<gemm>(ins->op);
Paul's avatar
Paul committed
372
373
        auto output = insert_allocation(ins, ins->result);
        prog->replace_instruction(
Paul's avatar
Paul committed
374
            ins, miopen_gemm{op}, ins->arguments.at(0), ins->arguments.at(1), output);
Paul's avatar
Paul committed
375
    }
376
377
378
379
380
381
382

    void apply_contiguous(instruction_ref ins)
    {
        auto&& op   = any_cast<contiguous>(ins->op);
        auto output = insert_allocation(ins, ins->result);
        prog->replace_instruction(ins, miopen_contiguous{op}, ins->arguments.at(0), output);
    }
383
384
385
386
387
388
389
390
391
392

    // TODO: adityaatluri
    // Not sure how to write this. Review and fix required
    void apply_batch_norm_inference(instruction_ref ins)
    {
        auto&& op   = any_cast<batch_norm_inference>(ins->op);
        auto output = insert_allocation(ins, ins->result);
        prog->replace_instruction(
            ins, miopen_batch_norm_inference{op}, ins->arguments.at(0), output);
    }
Paul's avatar
Paul committed
393
394
};

Paul's avatar
Paul committed
395
void lowering::apply(program& p) const { miopen_apply{&p}.apply(); }
Paul's avatar
Paul committed
396

Paul's avatar
Paul committed
397
} // namespace gpu
Paul's avatar
Paul committed
398

Paul's avatar
Paul committed
399
} // namespace migraph