lowering.cpp 17.3 KB
Newer Older
1
#include <rocblas.h>
Paul's avatar
Paul committed
2
#include <migraph/gpu/lowering.hpp>
Paul's avatar
Paul committed
3
4
5
#include <migraph/manage_ptr.hpp>
#include <migraph/instruction.hpp>
#include <migraph/operators.hpp>
Paul's avatar
Paul committed
6
#include <migraph/generate.hpp>
Paul's avatar
Paul committed
7
#include <migraph/shape_for_each.hpp>
Paul's avatar
Paul committed
8
9
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/hip.hpp>
Paul's avatar
Paul committed
10
#include <migraph/dfor.hpp>
11
#include <migraph/gpu/device/contiguous.hpp>
Paul's avatar
Paul committed
12
#include <migraph/gpu/device/add.hpp>
Paul's avatar
Paul committed
13
#include <migraph/iterator_for.hpp>
Paul's avatar
Paul committed
14
15
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/context.hpp>
Paul's avatar
Paul committed
16
#include <utility>
Paul's avatar
Paul committed
17
18

namespace migraph {
Paul's avatar
Paul committed
19
namespace gpu {
Paul's avatar
Paul committed
20

21
22
struct miopen_batch_norm_inference
{
wsttiger's avatar
wsttiger committed
23
    op::batch_norm_inference op;
24
25
26

    std::string name() const { return "gpu::batch_norm_inference"; }

Paul's avatar
Paul committed
27
    shape compute_shape(const std::vector<shape>& inputs) const
28
29
30
31
32
33
    {
        check_shapes{inputs, *this}.has(6);
        return op.compute_shape(
            {inputs.at(0), inputs.at(1), inputs.at(2), inputs.at(3), inputs.at(4)});
    }

Paul's avatar
Paul committed
34
35
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
36
    {
wsttiger's avatar
wsttiger committed
37
38
        auto x_desc  = make_tensor(args[0].get_shape());
        auto y_desc  = make_tensor(output_shape);
39
        auto bn_desc = make_tensor(args[3].get_shape());
40
41
42
43
44
45
46
47
48
49
50

        float alpha = 1.0, beta = 0.0f;

        miopenBatchNormalizationForwardInference(ctx.handle.get(),
                                                 miopenBatchNormMode_t(op.bn_mode),
                                                 &alpha,
                                                 &beta,
                                                 x_desc.get(),
                                                 args[0].implicit(),
                                                 y_desc.get(),
                                                 args[5].implicit(),
51
                                                 bn_desc.get(),
52
53
                                                 args[1].implicit(),
                                                 args[2].implicit(),
Paul's avatar
Paul committed
54
55
                                                 args[3].implicit(),
                                                 args[4].implicit(),
56
                                                 op.epsilon);
57
58
59
60
61

        return args[5];
    }
};

Paul's avatar
Paul committed
62
63
struct miopen_convolution
{
wsttiger's avatar
wsttiger committed
64
    op::convolution op;
Paul's avatar
Paul committed
65
    shared<convolution_descriptor> cd;
Paul's avatar
Paul committed
66
    miopenConvFwdAlgorithm_t algo{};
Paul's avatar
Paul committed
67

Paul's avatar
Paul committed
68
    std::string name() const { return "gpu::convolution"; }
Paul's avatar
Paul committed
69
    shape compute_shape(const std::vector<shape>& inputs) const
Paul's avatar
Paul committed
70
    {
Paul's avatar
Paul committed
71
        check_shapes{inputs, *this}.has(4).standard();
Paul's avatar
Paul committed
72
        return op.compute_shape({inputs.at(0), inputs.at(1)});
Paul's avatar
Paul committed
73
    }
Paul's avatar
Paul committed
74
75
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
76
    {
Paul's avatar
Paul committed
77
78
        auto x_desc = make_tensor(args[0].get_shape());
        auto w_desc = make_tensor(args[1].get_shape());
Paul's avatar
Paul committed
79
80
        auto y_desc = make_tensor(output_shape);

Paul's avatar
Paul committed
81
        float alpha = 1, beta = 0;
Paul's avatar
Paul committed
82
        miopenConvolutionForward(ctx.handle.get(),
Paul's avatar
Paul committed
83
                                 &alpha,
Paul's avatar
Paul committed
84
                                 x_desc.get(),
Paul's avatar
Paul committed
85
                                 args[0].implicit(),
Paul's avatar
Paul committed
86
                                 w_desc.get(),
Paul's avatar
Paul committed
87
                                 args[1].implicit(),
Paul's avatar
Paul committed
88
                                 cd.get(),
Paul's avatar
Paul committed
89
                                 algo,
Paul's avatar
Paul committed
90
                                 &beta,
Paul's avatar
Paul committed
91
                                 y_desc.get(),
Paul's avatar
Paul committed
92
                                 args[3].implicit(),
Paul's avatar
Paul committed
93
                                 args[2].implicit(),
Paul's avatar
Paul committed
94
95
                                 args[2].get_shape().bytes());
        return args[3];
Paul's avatar
Paul committed
96
    }
Paul's avatar
Paul committed
97

Paul's avatar
Paul committed
98
    shape compile(context& ctx, const shape& output_shape, std::vector<instruction_ref> inputs)
Paul's avatar
Paul committed
99
    {
Paul's avatar
Paul committed
100
        shape workspace_shape{};
Paul's avatar
Paul committed
101
102
103
104
        auto x_desc = make_tensor(inputs[0]->get_shape());
        auto w_desc = make_tensor(inputs[1]->get_shape());
        auto y_desc = make_tensor(output_shape);

Paul's avatar
Paul committed
105
        std::size_t workspace_size = 0;
Paul's avatar
Paul committed
106
        miopenConvolutionForwardGetWorkSpaceSize(
Paul's avatar
Paul committed
107
            ctx.handle.get(), w_desc.get(), x_desc.get(), cd.get(), y_desc.get(), &workspace_size);
Paul's avatar
Paul committed
108
109
        workspace_shape = shape{shape::int8_type, {workspace_size}};

mei-ye's avatar
mei-ye committed
110
        auto x = to_gpu(generate_argument(inputs[0]->get_shape()));
mei-ye's avatar
mei-ye committed
111
        gpu_sync();
mei-ye's avatar
mei-ye committed
112
        auto w = to_gpu(generate_argument(inputs[1]->get_shape()));
mei-ye's avatar
mei-ye committed
113
        gpu_sync();
mei-ye's avatar
mei-ye committed
114
        auto y = to_gpu(generate_argument(output_shape));
mei-ye's avatar
mei-ye committed
115
        gpu_sync();
Paul's avatar
Paul committed
116
        auto workspace = allocate_gpu(workspace_shape);
Paul's avatar
Paul committed
117

Paul's avatar
Paul committed
118
        int algo_count = 1;
Paul's avatar
Paul committed
119
120
121
122
123
124
125
126
127
128
129
130
        miopenConvAlgoPerf_t perf;
        miopenFindConvolutionForwardAlgorithm(ctx.handle.get(),
                                              x_desc.get(),
                                              x.implicit(),
                                              w_desc.get(),
                                              w.implicit(),
                                              cd.get(),
                                              y_desc.get(),
                                              y.implicit(),
                                              1,
                                              &algo_count,
                                              &perf,
Paul's avatar
Paul committed
131
132
                                              workspace.implicit(),
                                              workspace_size,
Paul's avatar
Paul committed
133
134
                                              false);
        algo = perf.fwd_algo;
135
        return shape{shape::int8_type, {perf.memory}};
Paul's avatar
Paul committed
136
    }
Paul's avatar
Paul committed
137
138
139
140
141
142
143
144
145

    friend std::ostream& operator<<(std::ostream& os, const miopen_convolution& self)
    {
        os << self.name() << "[";
        os << self.op << ", ";
        os << "algo=" << self.algo;
        os << "]";
        return os;
    }
Paul's avatar
Paul committed
146
147
};

Paul's avatar
Paul committed
148
149
struct miopen_pooling
{
wsttiger's avatar
wsttiger committed
150
    op::pooling op;
Paul's avatar
Paul committed
151
152
    shared<pooling_descriptor> pd;

Paul's avatar
Paul committed
153
    std::string name() const { return "gpu::pooling"; }
Paul's avatar
Paul committed
154
    shape compute_shape(const std::vector<shape>& inputs) const
Paul's avatar
Paul committed
155
    {
Paul's avatar
Paul committed
156
        check_shapes{inputs, *this}.has(2).standard();
Paul's avatar
Paul committed
157
        return op.compute_shape({inputs.at(0)});
Paul's avatar
Paul committed
158
    }
Paul's avatar
Paul committed
159
160
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
161
    {
Paul's avatar
Paul committed
162
        auto x_desc = make_tensor(args[0].get_shape());
Paul's avatar
Paul committed
163
164
165
166
        auto y_desc = make_tensor(output_shape);

        float alpha = 1, beta = 0;

Paul's avatar
Paul committed
167
        miopenPoolingForward(ctx.handle.get(),
Paul's avatar
Paul committed
168
169
170
                             pd.get(),
                             &alpha,
                             x_desc.get(),
Paul's avatar
Paul committed
171
                             args[0].implicit(),
Paul's avatar
Paul committed
172
173
                             &beta,
                             y_desc.get(),
Paul's avatar
Paul committed
174
                             args[1].implicit(),
Paul's avatar
Paul committed
175
176
177
                             false,
                             nullptr,
                             0);
Paul's avatar
Paul committed
178

Paul's avatar
Paul committed
179
        return args[1];
Paul's avatar
Paul committed
180
181
182
    }
};

Paul's avatar
Paul committed
183
struct hip_add
Paul's avatar
Paul committed
184
{
Paul's avatar
Paul committed
185
    std::string name() const { return "gpu::add"; }
Paul's avatar
Paul committed
186
    shape compute_shape(const std::vector<shape>& inputs) const
Paul's avatar
Paul committed
187
    {
Paul's avatar
Paul committed
188
        // check_shapes{inputs, *this}.has(3).standard();
Paul's avatar
Paul committed
189
        check_shapes{inputs, *this}.has(3);
Paul's avatar
Paul committed
190
        return inputs.at(0);
Paul's avatar
Paul committed
191
192
    }

Paul's avatar
Paul committed
193
    argument compute(context&, const shape&, const std::vector<argument>& args) const
Paul's avatar
Paul committed
194
    {
Paul's avatar
Paul committed
195
        device::add(args[2], args[0], args[1]);
Paul's avatar
Paul committed
196
        return args[2];
Paul's avatar
Paul committed
197
198
199
200
201
    }
};

struct miopen_add
{
Paul's avatar
Paul committed
202
    std::string name() const { return "gpu::add"; }
Paul's avatar
Paul committed
203
    shape compute_shape(const std::vector<shape>& inputs) const
Paul's avatar
Paul committed
204
    {
Paul's avatar
Paul committed
205
        check_shapes{inputs, *this}.has(3).not_broadcasted();
Paul's avatar
Paul committed
206
        return inputs.at(0);
Paul's avatar
Paul committed
207
208
    }

Paul's avatar
Paul committed
209
210
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
211
    {
Paul's avatar
Paul committed
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
        float alpha = 1, beta = 0;
        auto a_desc = make_tensor(args[0].get_shape());
        auto b_desc = make_tensor(args[1].get_shape());
        auto c_desc = make_tensor(output_shape);
        miopenOpTensor(ctx.handle.get(),
                       miopenTensorOpAdd,
                       &alpha,
                       a_desc.get(),
                       args[0].implicit(),
                       &alpha,
                       b_desc.get(),
                       args[1].implicit(),
                       &beta,
                       c_desc.get(),
                       args[2].implicit());
        return args[2];
Paul's avatar
Paul committed
228
229
230
    }
};

Paul's avatar
Paul committed
231
232
struct miopen_gemm
{
wsttiger's avatar
wsttiger committed
233
    op::gemm op;
234
    std::string name() const { return "gpu::gemm"; }
Paul's avatar
Paul committed
235
    shape compute_shape(const std::vector<shape>& inputs) const
Paul's avatar
Paul committed
236
    {
Paul's avatar
Paul committed
237
238
        check_shapes{inputs, *this}.has(3);
        return op.compute_shape({inputs.at(0), inputs.at(1)});
Paul's avatar
Paul committed
239
    }
Paul's avatar
Paul committed
240
241
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
242
    {
243
244
        float alpha     = 1.0f;
        float beta      = 0.0f;
Paul's avatar
Paul committed
245
246
        bool transa     = args[0].get_shape().transposed();
        bool transb     = args[1].get_shape().transposed();
247
248
249
        rocblas_int lda = args[0].get_shape().strides()[transa ? 1 : 0];
        rocblas_int ldb = args[1].get_shape().strides()[transb ? 1 : 0];
        rocblas_int ldc = args[2].get_shape().strides()[0];
250
251
252
        rocblas_int m   = output_shape.lens()[0];
        rocblas_int n   = output_shape.lens()[1];
        rocblas_int k   = args[0].get_shape().lens()[1];
253
        rocblas_sgemm(ctx.rbhandle.get(),
254
255
                      transb ? rocblas_operation_transpose : rocblas_operation_none,
                      transa ? rocblas_operation_transpose : rocblas_operation_none,
256
257
258
259
260
261
262
263
264
265
266
267
                      n,
                      m,
                      k,
                      &alpha,
                      args[1].implicit(),
                      ldb,
                      args[0].implicit(),
                      lda,
                      &beta,
                      args[2].implicit(),
                      ldc);
        return args[2];
Paul's avatar
Paul committed
268
269
270
    }
};

271
272
struct miopen_contiguous
{
wsttiger's avatar
wsttiger committed
273
    op::contiguous op;
Paul's avatar
Paul committed
274
    std::string name() const { return "gpu::contiguous"; }
Paul's avatar
Paul committed
275
    shape compute_shape(const std::vector<shape>& inputs) const
276
277
278
279
    {
        check_shapes{inputs, *this}.has(2);
        return op.compute_shape({inputs.at(0)});
    }
Paul's avatar
Paul committed
280
    argument compute(context&, shape output_shape, const std::vector<argument>& args) const
281
    {
Paul's avatar
Paul committed
282
283
        assert(output_shape == args[1].get_shape());
        assert(output_shape.standard());
Paul's avatar
Paul committed
284
        (void)output_shape;
285
        device::contiguous(args.at(1), args.at(0));
286
        return args.at(1);
287
288
289
    }
};

Paul's avatar
Paul committed
290
291
292
struct miopen_relu
{
    shared<activation_descriptor> ad;
Paul's avatar
Paul committed
293
    std::string name() const { return "gpu::relu"; }
Paul's avatar
Paul committed
294
    shape compute_shape(const std::vector<shape>& inputs) const
Paul's avatar
Paul committed
295
    {
Paul's avatar
Paul committed
296
        check_shapes{inputs, *this}.has(2).not_broadcasted();
Paul's avatar
Paul committed
297
        return inputs.at(1);
Paul's avatar
Paul committed
298
299
    }

Paul's avatar
Paul committed
300
301
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
302
303
    {
        float alpha = 1, beta = 0;
Paul's avatar
Paul committed
304
        auto x_desc = make_tensor(args[0].get_shape());
Paul's avatar
Paul committed
305
        auto y_desc = make_tensor(output_shape);
Paul's avatar
Paul committed
306
        miopenActivationForward(ctx.handle.get(),
Paul's avatar
Paul committed
307
308
309
                                ad.get(),
                                &alpha,
                                x_desc.get(),
Paul's avatar
Paul committed
310
                                args[0].implicit(),
Paul's avatar
Paul committed
311
312
                                &beta,
                                y_desc.get(),
Paul's avatar
Paul committed
313
                                args[1].implicit());
Paul's avatar
Paul committed
314

Paul's avatar
Paul committed
315
        return args[1];
Paul's avatar
Paul committed
316
317
318
    }
};

Paul's avatar
Paul committed
319
320
struct miopen_softmax
{
wsttiger's avatar
wsttiger committed
321
    op::softmax op;
Paul's avatar
Paul committed
322
323
324
325
    std::string name() const { return "gpu::softmax"; }
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        check_shapes{inputs, *this}.has(2).standard();
Paul's avatar
Paul committed
326
        return op.compute_shape({inputs.at(0)});
Paul's avatar
Paul committed
327
328
329
330
331
332
333
334
335
    }

    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
    {
        float alpha = 1, beta = 0;
        auto x_desc = make_tensor(args[0].get_shape());
        auto y_desc = make_tensor(output_shape);
        miopenSoftmaxForward(ctx.handle.get(),
Paul's avatar
Paul committed
336
337
338
339
340
341
                             &alpha,
                             x_desc.get(),
                             args[0].implicit(),
                             &beta,
                             y_desc.get(),
                             args[1].implicit());
Paul's avatar
Paul committed
342
343
344
345
346

        return args[1];
    }
};

Paul's avatar
Paul committed
347
348
struct miopen_apply
{
Paul's avatar
Paul committed
349
    program* prog = nullptr;
Paul's avatar
Paul committed
350
    context ctx{};
Paul's avatar
Paul committed
351

Paul's avatar
Paul committed
352
353
354
355
356
357
358
    void check_shape(shape x, instruction_ref i)
    {
        assert(x == i->get_shape());
        (void)x;
        (void)i;
    }

Paul's avatar
Paul committed
359
360
    void apply()
    {
Paul's avatar
Paul committed
361
362
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
Paul's avatar
Paul committed
363
            auto s = it->get_shape();
Paul's avatar
Paul committed
364
            if(it->name() == "convolution")
Paul's avatar
Paul committed
365
            {
Paul's avatar
Paul committed
366
                check_shape(s, apply_convolution(it));
Paul's avatar
Paul committed
367
            }
Paul's avatar
Paul committed
368
            else if(it->name() == "activation")
Paul's avatar
Paul committed
369
            {
Paul's avatar
Paul committed
370
                check_shape(s, apply_activation(it));
Paul's avatar
Paul committed
371
            }
Paul's avatar
Paul committed
372
            else if(it->name() == "pooling")
Paul's avatar
Paul committed
373
            {
Paul's avatar
Paul committed
374
                check_shape(s, apply_pooling(it));
Paul's avatar
Paul committed
375
            }
Paul's avatar
Paul committed
376
            else if(it->name() == "add")
Paul's avatar
Paul committed
377
            {
Paul's avatar
Paul committed
378
                check_shape(s, apply_add(it));
Paul's avatar
Paul committed
379
            }
Paul's avatar
Paul committed
380
            else if(it->name() == "gemm")
Paul's avatar
Paul committed
381
            {
Paul's avatar
Paul committed
382
                check_shape(s, apply_gemm(it));
Paul's avatar
Paul committed
383
            }
Paul's avatar
Paul committed
384
            else if(it->name() == "contiguous")
385
            {
Paul's avatar
Paul committed
386
                check_shape(s, apply_contiguous(it));
387
            }
Paul's avatar
Paul committed
388
            else if(it->name() == "batch_norm_inference")
389
            {
Paul's avatar
Paul committed
390
                check_shape(s, apply_batch_norm_inference(it));
391
            }
Paul's avatar
Paul committed
392
393
394
395
            else if(it->name() == "softmax")
            {
                check_shape(s, apply_softmax(it));
            }
Paul's avatar
Paul committed
396
397
398
        }
    }

Paul's avatar
Paul committed
399
    instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "")
Paul's avatar
Paul committed
400
    {
Paul's avatar
Paul committed
401
        if(ins == --prog->end() and tag.empty())
Paul's avatar
Paul committed
402
403
404
405
406
        {
            return prog->add_parameter("output", s);
        }
        else
        {
Paul's avatar
Paul committed
407
            auto is     = prog->add_outline(s);
Paul's avatar
Paul committed
408
            auto result = prog->insert_instruction(ins, hip_allocate{std::move(tag)}, is);
Paul's avatar
Paul committed
409
410
411
412
            return result;
        }
    }

Paul's avatar
Paul committed
413
    instruction_ref apply_convolution(instruction_ref ins)
Paul's avatar
Paul committed
414
    {
wsttiger's avatar
wsttiger committed
415
        auto&& op = any_cast<op::convolution>(ins->get_operator());
Paul's avatar
Paul committed
416

Paul's avatar
Paul committed
417
        auto conv = miopen_convolution{op, make_conv(op)};
Paul's avatar
Paul committed
418
        auto ws   = conv.compile(ctx, ins->get_shape(), ins->inputs());
Paul's avatar
Paul committed
419

420
        auto workspace = insert_allocation(ins, ws, "workspace");
Paul's avatar
Paul committed
421
        auto output    = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
422

Paul's avatar
Paul committed
423
        return prog->replace_instruction(
Paul's avatar
Paul committed
424
            ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
Paul's avatar
Paul committed
425
426
    }

Paul's avatar
Paul committed
427
    instruction_ref apply_pooling(instruction_ref ins)
Paul's avatar
Paul committed
428
    {
wsttiger's avatar
wsttiger committed
429
        auto&& op   = any_cast<op::pooling>(ins->get_operator());
Paul's avatar
Paul committed
430
        auto pd     = make_pooling(op);
Paul's avatar
Paul committed
431
        auto output = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
432

Paul's avatar
Paul committed
433
        return prog->replace_instruction(
Paul's avatar
Paul committed
434
            ins, miopen_pooling{op, std::move(pd)}, ins->inputs().at(0), output);
Paul's avatar
Paul committed
435
436
    }

Paul's avatar
Paul committed
437
    instruction_ref apply_activation(instruction_ref ins)
Paul's avatar
Paul committed
438
    {
wsttiger's avatar
wsttiger committed
439
        auto&& op = any_cast<op::activation>(ins->get_operator());
Paul's avatar
Paul committed
440
441
        auto ad   = make_relu();
        if(op.mode == "relu")
Paul's avatar
Paul committed
442
        {
Paul's avatar
Paul committed
443
            auto output = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
444
            return prog->replace_instruction(
Paul's avatar
Paul committed
445
                ins, miopen_relu{std::move(ad)}, ins->inputs().at(0), output);
Paul's avatar
Paul committed
446
        }
Paul's avatar
Paul committed
447
        return ins;
Paul's avatar
Paul committed
448
    }
Paul's avatar
Paul committed
449

Paul's avatar
Paul committed
450
451
    instruction_ref apply_softmax(instruction_ref ins)
    {
wsttiger's avatar
wsttiger committed
452
        auto&& op   = any_cast<op::softmax>(ins->get_operator());
Paul's avatar
Paul committed
453
        auto output = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
454
        return prog->replace_instruction(ins, miopen_softmax{op}, ins->inputs().at(0), output);
Paul's avatar
Paul committed
455
456
    }

Paul's avatar
Paul committed
457
    instruction_ref apply_add(instruction_ref ins)
Paul's avatar
Paul committed
458
    {
Paul's avatar
Paul committed
459
        auto output = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
460
        return prog->replace_instruction(
Paul's avatar
Paul committed
461
            ins, hip_add{}, ins->inputs().at(0), ins->inputs().at(1), output);
Paul's avatar
Paul committed
462
    }
Paul's avatar
Paul committed
463

Paul's avatar
Paul committed
464
    instruction_ref apply_gemm(instruction_ref ins)
Paul's avatar
Paul committed
465
    {
wsttiger's avatar
wsttiger committed
466
        auto&& op   = any_cast<op::gemm>(ins->get_operator());
Paul's avatar
Paul committed
467
        auto output = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
468
        return prog->replace_instruction(
Paul's avatar
Paul committed
469
            ins, miopen_gemm{op}, ins->inputs().at(0), ins->inputs().at(1), output);
Paul's avatar
Paul committed
470
    }
471

Paul's avatar
Paul committed
472
    instruction_ref apply_contiguous(instruction_ref ins)
473
    {
wsttiger's avatar
wsttiger committed
474
        auto&& op   = any_cast<op::contiguous>(ins->get_operator());
Paul's avatar
Paul committed
475
        auto output = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
476
        return prog->replace_instruction(ins, miopen_contiguous{op}, ins->inputs().at(0), output);
477
    }
478

Paul's avatar
Paul committed
479
    instruction_ref apply_batch_norm_inference(instruction_ref ins)
480
    {
wsttiger's avatar
wsttiger committed
481
        auto&& op       = any_cast<op::batch_norm_inference>(ins->get_operator());
Paul's avatar
Paul committed
482
        auto output     = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
483
        shape old_shape = ins->inputs().at(1)->get_shape();
wsttiger's avatar
wsttiger committed
484
        std::vector<int64_t> new_shape{1, static_cast<int64_t>(old_shape.elements()), 1, 1};
wsttiger's avatar
wsttiger committed
485
        auto reshape_op = op::reshape{new_shape};
Paul's avatar
Paul committed
486
        std::vector<instruction_ref> reshapes;
Paul's avatar
Paul committed
487
488
        std::transform(ins->inputs().begin() + 1,
                       ins->inputs().end(),
Paul's avatar
Paul committed
489
490
                       std::back_inserter(reshapes),
                       [&](auto i) { return prog->insert_instruction(ins, reshape_op, i); });
Paul's avatar
Paul committed
491
        return prog->replace_instruction(ins,
Paul's avatar
Paul committed
492
                                         miopen_batch_norm_inference{op},
Paul's avatar
Paul committed
493
                                         ins->inputs().at(0),
Paul's avatar
Paul committed
494
495
496
497
498
                                         reshapes[0],
                                         reshapes[1],
                                         reshapes[2],
                                         reshapes[3],
                                         output);
499
    }
Paul's avatar
Paul committed
500
501
};

Paul's avatar
Paul committed
502
void lowering::apply(program& p) const { miopen_apply{&p, ctx}.apply(); }
Paul's avatar
Paul committed
503
} // namespace gpu
Paul's avatar
Paul committed
504
} // namespace migraph