lowering.cpp 11.5 KB
Newer Older
1
#include <rocblas.h>
Paul's avatar
Paul committed
2
#include <migraph/gpu/lowering.hpp>
Paul's avatar
Paul committed
3
4
5
#include <migraph/manage_ptr.hpp>
#include <migraph/instruction.hpp>
#include <migraph/operators.hpp>
Paul's avatar
Paul committed
6
#include <migraph/generate.hpp>
Paul's avatar
Paul committed
7
#include <migraph/shape_for_each.hpp>
Paul's avatar
Paul committed
8
9
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/hip.hpp>
Paul's avatar
Paul committed
10
#include <migraph/dfor.hpp>
11
#include <migraph/gpu/device/contiguous.hpp>
Paul's avatar
Paul committed
12
#include <migraph/gpu/device/add.hpp>
Paul's avatar
Paul committed
13
#include <migraph/iterator_for.hpp>
Paul's avatar
Paul committed
14
15
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/context.hpp>
wsttiger's avatar
wsttiger committed
16
17
18
#include <migraph/gpu/convolution.hpp>
#include <migraph/gpu/pooling.hpp>
#include <migraph/gpu/gemm.hpp>
Paul's avatar
Paul committed
19
#include <utility>
Paul's avatar
Paul committed
20
21

namespace migraph {
Paul's avatar
Paul committed
22
namespace gpu {
Paul's avatar
Paul committed
23

24
25
26
27
28
29
struct miopen_batch_norm_inference
{
    batch_norm_inference op;

    std::string name() const { return "gpu::batch_norm_inference"; }

Paul's avatar
Paul committed
30
    shape compute_shape(const std::vector<shape>& inputs) const
31
32
33
34
35
36
    {
        check_shapes{inputs, *this}.has(6);
        return op.compute_shape(
            {inputs.at(0), inputs.at(1), inputs.at(2), inputs.at(3), inputs.at(4)});
    }

Paul's avatar
Paul committed
37
38
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
39
    {
wsttiger's avatar
wsttiger committed
40
41
        auto x_desc  = make_tensor(args[0].get_shape());
        auto y_desc  = make_tensor(output_shape);
42
        auto bn_desc = make_tensor(args[3].get_shape());
43
44
45
46
47
48
49
50
51
52
53

        float alpha = 1.0, beta = 0.0f;

        miopenBatchNormalizationForwardInference(ctx.handle.get(),
                                                 miopenBatchNormMode_t(op.bn_mode),
                                                 &alpha,
                                                 &beta,
                                                 x_desc.get(),
                                                 args[0].implicit(),
                                                 y_desc.get(),
                                                 args[5].implicit(),
54
                                                 bn_desc.get(),
55
56
                                                 args[1].implicit(),
                                                 args[2].implicit(),
Paul's avatar
Paul committed
57
58
                                                 args[3].implicit(),
                                                 args[4].implicit(),
59
                                                 op.epsilon);
60
61
62
63
64

        return args[5];
    }
};

Paul's avatar
Paul committed
65
struct hip_add
Paul's avatar
Paul committed
66
{
Paul's avatar
Paul committed
67
    std::string name() const { return "gpu::add"; }
Paul's avatar
Paul committed
68
    shape compute_shape(const std::vector<shape>& inputs) const
Paul's avatar
Paul committed
69
    {
Paul's avatar
Paul committed
70
        // check_shapes{inputs, *this}.has(3).standard();
Paul's avatar
Paul committed
71
        check_shapes{inputs, *this}.has(3);
Paul's avatar
Paul committed
72
        return inputs.at(0);
Paul's avatar
Paul committed
73
74
    }

Paul's avatar
Paul committed
75
    argument compute(context&, const shape&, const std::vector<argument>& args) const
Paul's avatar
Paul committed
76
    {
Paul's avatar
Paul committed
77
        device::add(args[2], args[0], args[1]);
Paul's avatar
Paul committed
78
        return args[2];
Paul's avatar
Paul committed
79
80
81
82
83
    }
};

struct miopen_add
{
Paul's avatar
Paul committed
84
    std::string name() const { return "gpu::add"; }
Paul's avatar
Paul committed
85
    shape compute_shape(const std::vector<shape>& inputs) const
Paul's avatar
Paul committed
86
    {
Paul's avatar
Paul committed
87
        check_shapes{inputs, *this}.has(3).not_broadcasted();
Paul's avatar
Paul committed
88
        return inputs.at(0);
Paul's avatar
Paul committed
89
90
    }

Paul's avatar
Paul committed
91
92
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
93
    {
Paul's avatar
Paul committed
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
        float alpha = 1, beta = 0;
        auto a_desc = make_tensor(args[0].get_shape());
        auto b_desc = make_tensor(args[1].get_shape());
        auto c_desc = make_tensor(output_shape);
        miopenOpTensor(ctx.handle.get(),
                       miopenTensorOpAdd,
                       &alpha,
                       a_desc.get(),
                       args[0].implicit(),
                       &alpha,
                       b_desc.get(),
                       args[1].implicit(),
                       &beta,
                       c_desc.get(),
                       args[2].implicit());
        return args[2];
Paul's avatar
Paul committed
110
111
112
    }
};

113
114
115
struct miopen_contiguous
{
    contiguous op;
Paul's avatar
Paul committed
116
    std::string name() const { return "gpu::contiguous"; }
Paul's avatar
Paul committed
117
    shape compute_shape(const std::vector<shape>& inputs) const
118
119
120
121
    {
        check_shapes{inputs, *this}.has(2);
        return op.compute_shape({inputs.at(0)});
    }
Paul's avatar
Paul committed
122
    argument compute(context&, shape output_shape, const std::vector<argument>& args) const
123
    {
Paul's avatar
Paul committed
124
125
        assert(output_shape == args[1].get_shape());
        assert(output_shape.standard());
Paul's avatar
Paul committed
126
        (void)output_shape;
127
        device::contiguous(args.at(1), args.at(0));
128
        return args.at(1);
129
130
131
    }
};

Paul's avatar
Paul committed
132
133
134
struct miopen_relu
{
    shared<activation_descriptor> ad;
Paul's avatar
Paul committed
135
    std::string name() const { return "gpu::relu"; }
Paul's avatar
Paul committed
136
    shape compute_shape(const std::vector<shape>& inputs) const
Paul's avatar
Paul committed
137
    {
Paul's avatar
Paul committed
138
        check_shapes{inputs, *this}.has(2).not_broadcasted();
Paul's avatar
Paul committed
139
        return inputs.at(1);
Paul's avatar
Paul committed
140
141
    }

Paul's avatar
Paul committed
142
143
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
Paul's avatar
Paul committed
144
145
    {
        float alpha = 1, beta = 0;
Paul's avatar
Paul committed
146
        auto x_desc = make_tensor(args[0].get_shape());
Paul's avatar
Paul committed
147
        auto y_desc = make_tensor(output_shape);
Paul's avatar
Paul committed
148
        miopenActivationForward(ctx.handle.get(),
Paul's avatar
Paul committed
149
150
151
                                ad.get(),
                                &alpha,
                                x_desc.get(),
Paul's avatar
Paul committed
152
                                args[0].implicit(),
Paul's avatar
Paul committed
153
154
                                &beta,
                                y_desc.get(),
Paul's avatar
Paul committed
155
                                args[1].implicit());
Paul's avatar
Paul committed
156

Paul's avatar
Paul committed
157
        return args[1];
Paul's avatar
Paul committed
158
159
160
    }
};

Paul's avatar
Paul committed
161
162
163
164
165
166
167
struct miopen_softmax
{
    softmax op;
    std::string name() const { return "gpu::softmax"; }
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        check_shapes{inputs, *this}.has(2).standard();
Paul's avatar
Paul committed
168
        return op.compute_shape({inputs.at(0)});
Paul's avatar
Paul committed
169
170
171
172
173
174
175
176
177
    }

    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
    {
        float alpha = 1, beta = 0;
        auto x_desc = make_tensor(args[0].get_shape());
        auto y_desc = make_tensor(output_shape);
        miopenSoftmaxForward(ctx.handle.get(),
Paul's avatar
Paul committed
178
179
180
181
182
183
                             &alpha,
                             x_desc.get(),
                             args[0].implicit(),
                             &beta,
                             y_desc.get(),
                             args[1].implicit());
Paul's avatar
Paul committed
184
185
186
187
188

        return args[1];
    }
};

Paul's avatar
Paul committed
189
190
struct miopen_apply
{
Paul's avatar
Paul committed
191
    program* prog = nullptr;
Paul's avatar
Paul committed
192
    context ctx{};
Paul's avatar
Paul committed
193

Paul's avatar
Paul committed
194
195
196
197
198
199
200
    void check_shape(shape x, instruction_ref i)
    {
        assert(x == i->get_shape());
        (void)x;
        (void)i;
    }

Paul's avatar
Paul committed
201
202
    void apply()
    {
Paul's avatar
Paul committed
203
204
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
Paul's avatar
Paul committed
205
            auto s = it->get_shape();
Paul's avatar
Paul committed
206
            if(it->name() == "convolution")
Paul's avatar
Paul committed
207
            {
Paul's avatar
Paul committed
208
                check_shape(s, apply_convolution(it));
Paul's avatar
Paul committed
209
            }
Paul's avatar
Paul committed
210
            else if(it->name() == "activation")
Paul's avatar
Paul committed
211
            {
Paul's avatar
Paul committed
212
                check_shape(s, apply_activation(it));
Paul's avatar
Paul committed
213
            }
Paul's avatar
Paul committed
214
            else if(it->name() == "pooling")
Paul's avatar
Paul committed
215
            {
Paul's avatar
Paul committed
216
                check_shape(s, apply_pooling(it));
Paul's avatar
Paul committed
217
            }
Paul's avatar
Paul committed
218
            else if(it->name() == "add")
Paul's avatar
Paul committed
219
            {
Paul's avatar
Paul committed
220
                check_shape(s, apply_add(it));
Paul's avatar
Paul committed
221
            }
Paul's avatar
Paul committed
222
            else if(it->name() == "gemm")
Paul's avatar
Paul committed
223
            {
Paul's avatar
Paul committed
224
                check_shape(s, apply_gemm(it));
Paul's avatar
Paul committed
225
            }
Paul's avatar
Paul committed
226
            else if(it->name() == "contiguous")
227
            {
Paul's avatar
Paul committed
228
                check_shape(s, apply_contiguous(it));
229
            }
Paul's avatar
Paul committed
230
            else if(it->name() == "batch_norm_inference")
231
            {
Paul's avatar
Paul committed
232
                check_shape(s, apply_batch_norm_inference(it));
233
            }
Paul's avatar
Paul committed
234
235
236
237
            else if(it->name() == "softmax")
            {
                check_shape(s, apply_softmax(it));
            }
Paul's avatar
Paul committed
238
239
240
        }
    }

Paul's avatar
Paul committed
241
    instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "")
Paul's avatar
Paul committed
242
    {
Paul's avatar
Paul committed
243
        if(ins == --prog->end() and tag.empty())
Paul's avatar
Paul committed
244
245
246
247
248
        {
            return prog->add_parameter("output", s);
        }
        else
        {
Paul's avatar
Paul committed
249
            auto is     = prog->add_outline(s);
Paul's avatar
Paul committed
250
            auto result = prog->insert_instruction(ins, hip_allocate{std::move(tag)}, is);
Paul's avatar
Paul committed
251
252
253
254
            return result;
        }
    }

Paul's avatar
Paul committed
255
    instruction_ref apply_convolution(instruction_ref ins)
Paul's avatar
Paul committed
256
    {
257
        auto&& op = any_cast<convolution>(ins->get_operator());
Paul's avatar
Paul committed
258

Paul's avatar
Paul committed
259
        auto conv = miopen_convolution{op, make_conv(op)};
Paul's avatar
Paul committed
260
        auto ws   = conv.compile(ctx, ins->get_shape(), ins->inputs());
Paul's avatar
Paul committed
261

262
        auto workspace = insert_allocation(ins, ws, "workspace");
Paul's avatar
Paul committed
263
        auto output    = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
264

Paul's avatar
Paul committed
265
        return prog->replace_instruction(
Paul's avatar
Paul committed
266
            ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
Paul's avatar
Paul committed
267
268
    }

Paul's avatar
Paul committed
269
    instruction_ref apply_pooling(instruction_ref ins)
Paul's avatar
Paul committed
270
    {
271
        auto&& op   = any_cast<pooling>(ins->get_operator());
Paul's avatar
Paul committed
272
        auto pd     = make_pooling(op);
Paul's avatar
Paul committed
273
        auto output = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
274

Paul's avatar
Paul committed
275
        return prog->replace_instruction(
Paul's avatar
Paul committed
276
            ins, miopen_pooling{op, std::move(pd)}, ins->inputs().at(0), output);
Paul's avatar
Paul committed
277
278
    }

Paul's avatar
Paul committed
279
    instruction_ref apply_activation(instruction_ref ins)
Paul's avatar
Paul committed
280
    {
281
        auto&& op = any_cast<activation>(ins->get_operator());
Paul's avatar
Paul committed
282
283
        auto ad   = make_relu();
        if(op.mode == "relu")
Paul's avatar
Paul committed
284
        {
Paul's avatar
Paul committed
285
            auto output = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
286
            return prog->replace_instruction(
Paul's avatar
Paul committed
287
                ins, miopen_relu{std::move(ad)}, ins->inputs().at(0), output);
Paul's avatar
Paul committed
288
        }
Paul's avatar
Paul committed
289
        return ins;
Paul's avatar
Paul committed
290
    }
Paul's avatar
Paul committed
291

Paul's avatar
Paul committed
292
293
    instruction_ref apply_softmax(instruction_ref ins)
    {
Paul's avatar
Paul committed
294
        auto&& op   = any_cast<softmax>(ins->get_operator());
Paul's avatar
Paul committed
295
        auto output = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
296
        return prog->replace_instruction(ins, miopen_softmax{op}, ins->inputs().at(0), output);
Paul's avatar
Paul committed
297
298
    }

Paul's avatar
Paul committed
299
    instruction_ref apply_add(instruction_ref ins)
Paul's avatar
Paul committed
300
    {
Paul's avatar
Paul committed
301
        auto output = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
302
        return prog->replace_instruction(
Paul's avatar
Paul committed
303
            ins, hip_add{}, ins->inputs().at(0), ins->inputs().at(1), output);
Paul's avatar
Paul committed
304
    }
Paul's avatar
Paul committed
305

Paul's avatar
Paul committed
306
    instruction_ref apply_gemm(instruction_ref ins)
Paul's avatar
Paul committed
307
    {
308
        auto&& op   = any_cast<gemm>(ins->get_operator());
Paul's avatar
Paul committed
309
        auto output = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
310
        return prog->replace_instruction(
Paul's avatar
Paul committed
311
            ins, miopen_gemm{op}, ins->inputs().at(0), ins->inputs().at(1), output);
Paul's avatar
Paul committed
312
    }
313

Paul's avatar
Paul committed
314
    instruction_ref apply_contiguous(instruction_ref ins)
315
    {
316
        auto&& op   = any_cast<contiguous>(ins->get_operator());
Paul's avatar
Paul committed
317
        auto output = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
318
        return prog->replace_instruction(ins, miopen_contiguous{op}, ins->inputs().at(0), output);
319
    }
320

Paul's avatar
Paul committed
321
    instruction_ref apply_batch_norm_inference(instruction_ref ins)
322
    {
323
        auto&& op       = any_cast<batch_norm_inference>(ins->get_operator());
Paul's avatar
Paul committed
324
        auto output     = insert_allocation(ins, ins->get_shape());
Paul's avatar
Paul committed
325
        shape old_shape = ins->inputs().at(1)->get_shape();
wsttiger's avatar
wsttiger committed
326
        std::vector<int64_t> new_shape{1, static_cast<int64_t>(old_shape.elements()), 1, 1};
Paul's avatar
Paul committed
327
328
        auto reshape_op = reshape{new_shape};
        std::vector<instruction_ref> reshapes;
Paul's avatar
Paul committed
329
330
        std::transform(ins->inputs().begin() + 1,
                       ins->inputs().end(),
Paul's avatar
Paul committed
331
332
                       std::back_inserter(reshapes),
                       [&](auto i) { return prog->insert_instruction(ins, reshape_op, i); });
Paul's avatar
Paul committed
333
        return prog->replace_instruction(ins,
Paul's avatar
Paul committed
334
                                         miopen_batch_norm_inference{op},
Paul's avatar
Paul committed
335
                                         ins->inputs().at(0),
Paul's avatar
Paul committed
336
337
338
339
340
                                         reshapes[0],
                                         reshapes[1],
                                         reshapes[2],
                                         reshapes[3],
                                         output);
341
    }
Paul's avatar
Paul committed
342
343
};

Paul's avatar
Paul committed
344
void lowering::apply(program& p) const { miopen_apply{&p, ctx}.apply(); }
Paul's avatar
Paul committed
345

Paul's avatar
Paul committed
346
} // namespace gpu
Paul's avatar
Paul committed
347

Paul's avatar
Paul committed
348
} // namespace migraph