"host/online_compile/include/hipoc_program.hpp" did not exist on "1685048a6725e531b577510295d2d62664c15962"
quantization.cpp 16.4 KB
Newer Older
Shucai Xiao's avatar
Shucai Xiao committed
1
#include <migraphx/quantization.hpp>
2
3
4
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
5
#include <migraphx/op/convert.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
6
#include <migraphx/op/dot.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
7
#include <migraphx/op/mul.hpp>
8
#include <migraphx/op/add.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
9
10
11
12
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/multibroadcast.hpp>
13
#include <migraphx/stringutils.hpp>
14
#include <migraphx/ranges.hpp>
15
16
17
18
19
#include <utility>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

Shucai Xiao's avatar
Shucai Xiao committed
20
instruction_ref insert_quant_ins(program& prog,
Shucai Xiao's avatar
Shucai Xiao committed
21
22
23
24
25
                                 instruction_ref& ins,
                                 shape::type_t type,
                                 std::unordered_map<instruction_ref, instruction_ref>& map_ins,
                                 float scale = 1.0f,
                                 float shift = 0.0f)
26
{
Shucai Xiao's avatar
Shucai Xiao committed
27
    if(map_ins.count(ins) > 0)
28
    {
Shucai Xiao's avatar
Shucai Xiao committed
29
        return map_ins[ins];
30
31
    }

Shucai Xiao's avatar
Shucai Xiao committed
32
33
34
35
36
    if(ins->name() == "undefined")
    {
        return ins;
    }

Shucai Xiao's avatar
Shucai Xiao committed
37
    assert(ins->get_shape().type() == shape::float_type ||
Shucai Xiao's avatar
Shucai Xiao committed
38
39
40
           ins->get_shape().type() == shape::double_type ||
           ins->get_shape().type() == shape::int32_type);
    instruction_ref quant_ins{};
Shucai Xiao's avatar
Shucai Xiao committed
41
    quant_ins    = prog.insert_instruction(std::next(ins), op::convert{type, scale, shift}, ins);
Shucai Xiao's avatar
Shucai Xiao committed
42
    map_ins[ins] = quant_ins;
43

Shucai Xiao's avatar
Shucai Xiao committed
44
    return quant_ins;
45
46
}

Shucai Xiao's avatar
Shucai Xiao committed
47
48
49
// This function is to convert any instructions specified in the input
// from double or float to float16 by inserting a convert operator.
// For the conversion, there could be cases of overflowing, but it
Shucai Xiao's avatar
Shucai Xiao committed
50
// is very rare in the area of deeping learning, so we just do a
Shucai Xiao's avatar
Shucai Xiao committed
51
// truncate of the input to get the fp16.
52
void quantize(program& prog, const std::vector<std::string>& ins_names)
53
{
54
    std::unordered_map<instruction_ref, instruction_ref> map_fp16;
Shucai Xiao's avatar
Shucai Xiao committed
55
    for(auto ins : iterator_for(prog))
56
    {
57
        // all indicates every instruction is converted
Shucai Xiao's avatar
Shucai Xiao committed
58
        if((not contains(ins_names, "all")) and (not contains(ins_names, ins->name())))
59
60
61
        {
            continue;
        }
62

63
        shape::type_t orig_type = ins->get_shape().type();
Shucai Xiao's avatar
Shucai Xiao committed
64
        // process all inputs, if input is a fp32 or fp64, convert it
65
        // to a fp16 by adding a convert operator.
66
        auto inputs = ins->inputs();
67
        std::vector<instruction_ref> converted_inputs;
Shucai Xiao's avatar
Shucai Xiao committed
68
        for(auto input : inputs)
69
70
        {
            auto s = input->get_shape();
Shucai Xiao's avatar
Shucai Xiao committed
71
            if(s.type() == shape::float_type || s.type() == shape::double_type)
72
            {
73
                // if the input is a convert operator, uses its input
74
75
                // as its current input
                instruction_ref input_fp16{};
76
                if(input->name() == "convert")
77
78
79
80
81
                {
                    input_fp16 = input->inputs().front();
                }
                else
                {
Shucai Xiao's avatar
Shucai Xiao committed
82
                    input_fp16 = insert_quant_ins(prog, input, shape::half_type, map_fp16);
83
                }
84
                converted_inputs.push_back(input_fp16);
85
            }
86
87
88
89
90
91
            else
            {
                converted_inputs.push_back(input);
            }
        }

92
        // no change for the input, go to the next instruction
Shucai Xiao's avatar
Shucai Xiao committed
93
        if(inputs == converted_inputs)
94
        {
95
            continue;
Shucai Xiao's avatar
Shucai Xiao committed
96
97
98
99
100
101
        }

        auto op        = ins->get_operator();
        auto ins_shape = compute_shape(op, converted_inputs);
        if(ins_shape.type() != orig_type)
        {
Shucai Xiao's avatar
Shucai Xiao committed
102
103
104
105
106
            // check the dead code case to avoid assert
            bool output_empty = ins->outputs().empty();
            auto ins_orig_type =
                prog.insert_instruction(std::next(ins), op::convert{orig_type}, ins);
            if(!output_empty)
107
            {
Shucai Xiao's avatar
Shucai Xiao committed
108
                prog.replace_instruction(ins, ins_orig_type);
Shucai Xiao's avatar
Shucai Xiao committed
109
            }
Shucai Xiao's avatar
Shucai Xiao committed
110
111
112
113
114
115
116
117
118
        }

        prog.replace_instruction(ins, op, converted_inputs);
    }
}

void quantize(program& prog) { quantize(prog, {"all"}); }

// int8 quantization is different from fp16 since int8 can only handle value
Shucai Xiao's avatar
Shucai Xiao committed
119
// -128 ~ 127. To convert the float or double to int8, we need a scale and
Shucai Xiao's avatar
Shucai Xiao committed
120
// a shift, then the convert can be done as v_int8 = fp * scale + shift.
Shucai Xiao's avatar
Shucai Xiao committed
121
// To simplify the changes, we consider shift as 0.0f for now.
Shucai Xiao's avatar
Shucai Xiao committed
122
123
124
125
void quantize_int8(program& prog, const std::vector<std::string>& ins_names)
{
    // For now, we only support the int8 quantization of gemm and convolution
    std::vector<std::string> op_names = {"dot", "convolution"};
Shucai Xiao's avatar
Shucai Xiao committed
126
    if(!std::all_of(ins_names.begin(), ins_names.end(), [&](auto name) {
Shucai Xiao's avatar
Shucai Xiao committed
127
           return (std::find(op_names.begin(), op_names.end(), name) != op_names.end());
Shucai Xiao's avatar
Shucai Xiao committed
128
       }))
Shucai Xiao's avatar
Shucai Xiao committed
129
130
131
132
133
    {
        MIGRAPHX_THROW("QUANTIZE_INT8: only support DOT and CONVOLUTION operation");
    }

    // tmp value used just testing
134
    std::vector<std::pair<float, float>> int8_param{{127.0f, 0.0f}, {127.0f, 0.0f}, {128.0f, 0.0f}};
Shucai Xiao's avatar
Shucai Xiao committed
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150

    std::unordered_map<instruction_ref, instruction_ref> map_quant_ins;
    for(auto ins : iterator_for(prog))
    {
        if(not contains(ins_names, ins->name()))
        {
            continue;
        }

        shape::type_t orig_type = ins->get_shape().type();

        // for the dot operator, there could be 2 or 3 input arguments
        // if the 3rd argument is available, convert it to an int32.
        std::vector<instruction_ref> converted_inputs;

        // process all inputs, if input is a fp32 or fp64, convert it
Shucai Xiao's avatar
Shucai Xiao committed
151
        // to a int8 type by adding a convert operator and replace
Shucai Xiao's avatar
Shucai Xiao committed
152
        // the operator with the corresponding int8 version
Shucai Xiao's avatar
Shucai Xiao committed
153
        auto inputs             = ins->inputs();
Shucai Xiao's avatar
Shucai Xiao committed
154
155
156
        std::size_t param_index = 0;
        for(auto input : inputs)
        {
Shucai Xiao's avatar
Shucai Xiao committed
157
158
            // In general, the target_type is int8, but for the dot
            // operation, if it has 3 inputs, then the last one should
Shucai Xiao's avatar
Shucai Xiao committed
159
160
            // be converted to int32_type
            shape::type_t quant_type = shape::int8_type;
Shucai Xiao's avatar
Shucai Xiao committed
161
            auto param               = int8_param[param_index++];
Shucai Xiao's avatar
Shucai Xiao committed
162
            if(ins->name() == "dot" and inputs.size() == 3 and input == inputs.back())
Shucai Xiao's avatar
Shucai Xiao committed
163
            {
Shucai Xiao's avatar
Shucai Xiao committed
164
165
                quant_type = shape::int32_type;
            }
Shucai Xiao's avatar
Shucai Xiao committed
166

Shucai Xiao's avatar
Shucai Xiao committed
167
            auto s = input->get_shape();
168
            if((s.type() == shape::float_type || s.type() == shape::double_type ||
Shucai Xiao's avatar
Shucai Xiao committed
169
170
                s.type() == shape::int32_type) &&
               s.type() != quant_type)
Shucai Xiao's avatar
Shucai Xiao committed
171
172
173
174
175
176
177
            {
                // if the input is a convert operator, uses its input
                // as its current input
                instruction_ref quant_input{};
                if(input->name() == "convert")
                {
                    auto tmp_ins = input->inputs().front();
Shucai Xiao's avatar
Shucai Xiao committed
178
                    if(tmp_ins->get_shape().type() == quant_type)
Shucai Xiao's avatar
Shucai Xiao committed
179
180
181
182
183
                    {
                        quant_input = input->inputs().front();
                    }
                    else
                    {
Shucai Xiao's avatar
Shucai Xiao committed
184
185
                        quant_input = insert_quant_ins(
                            prog, input, quant_type, map_quant_ins, param.first, param.second);
Shucai Xiao's avatar
Shucai Xiao committed
186
187
188
                    }
                }
                else
189
                {
Shucai Xiao's avatar
Shucai Xiao committed
190
191
                    quant_input = insert_quant_ins(
                        prog, input, quant_type, map_quant_ins, param.first, param.second);
192
                }
Shucai Xiao's avatar
Shucai Xiao committed
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
                converted_inputs.push_back(quant_input);
            }
            else
            {
                converted_inputs.push_back(input);
            }
        }

        // no change for the input, go to the next instruction
        if(inputs == converted_inputs)
        {
            continue;
        }

        // When converting from other types to int8_type, there are parameters
        // used as scale and shift(.0f), which will generate results diffrent from
        // the original results. To adjust the output to be "correct(approximatly
Shucai Xiao's avatar
Shucai Xiao committed
210
        // equal)", we need additional calculation for the adjustment
Shucai Xiao's avatar
Shucai Xiao committed
211
        if(ins->name() == "dot")
Shucai Xiao's avatar
Shucai Xiao committed
212
        {
Shucai Xiao's avatar
Shucai Xiao committed
213
            auto dot_op     = any_cast<op::dot>(ins->get_operator());
214
            float new_alpha = dot_op.alpha / (int8_param[0].first * int8_param[1].first);
Shucai Xiao's avatar
Shucai Xiao committed
215
216
            float new_beta  = dot_op.beta;
            // We need additional checking about the quant_alpha value. If
217
218
219
            // abs(quant_alpha) > 50 (some tmp value set here), we can convert
            // it to an integer as the new_alpha in the quant_dot
            float threshold = 50.0f;
Shucai Xiao's avatar
Shucai Xiao committed
220
            if(fabs(new_alpha) >= threshold && fabs(new_beta) >= threshold)
221
222
            {
                int32_t quant_alpha = static_cast<int32_t>(new_alpha);
Shucai Xiao's avatar
Shucai Xiao committed
223
224
225
                int32_t quant_beta  = static_cast<int32_t>(new_beta);
                shape quant_shape   = compute_shape(op::quant_dot{1, 0}, converted_inputs);
                if(quant_shape.type() == orig_type)
226
                {
Shucai Xiao's avatar
Shucai Xiao committed
227
228
                    prog.replace_instruction(
                        ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
229
230
231
                }
                else
                {
Shucai Xiao's avatar
Shucai Xiao committed
232
233
                    auto quant_dot = prog.insert_instruction(
                        ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
234
235
236
                    prog.replace_instruction(ins, op::convert{orig_type}, quant_dot);
                }
            }
Shucai Xiao's avatar
Shucai Xiao committed
237
238
            // only alpha can be quantized, quantization of beta will cause
            // big error, so we have to manually do the multiplication and
239
            // addition
Shucai Xiao's avatar
Shucai Xiao committed
240
            else if(fabs(new_alpha) >= threshold)
241
242
            {
                int32_t quant_alpha = static_cast<int32_t>(new_alpha);
Shucai Xiao's avatar
Shucai Xiao committed
243
244
                int32_t quant_beta  = 0;
                if(orig_type == shape::int32_type)
245
                {
Shucai Xiao's avatar
Shucai Xiao committed
246
                    if(inputs.size() == 2 or dot_op.beta == 0.0f)
247
                    {
Shucai Xiao's avatar
Shucai Xiao committed
248
249
                        prog.replace_instruction(
                            ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
250
251
252
253
                    }
                    // if there are 3 inputs, we need to consider the third argument
                    else
                    {
Shucai Xiao's avatar
Shucai Xiao committed
254
255
                        auto q_dot = prog.insert_instruction(
                            ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
256
257
                        std::vector<float> vec_beta(q_dot->get_shape().elements(), dot_op.beta);
                        auto l_beta = prog.add_literal(literal{orig_type, vec_beta});
Shucai Xiao's avatar
Shucai Xiao committed
258
259
                        auto beta_c =
                            prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
260
261
262
263
264
                        prog.replace_instruction(ins, op::add{}, q_dot, beta_c);
                    }
                }
                else
                {
Shucai Xiao's avatar
Shucai Xiao committed
265
                    if(inputs.size() == 2 or dot_op.beta == 0.0f)
266
                    {
Shucai Xiao's avatar
Shucai Xiao committed
267
268
                        auto q_dot = prog.insert_instruction(
                            ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
269
270
271
272
273
                        prog.replace_instruction(ins, op::convert{orig_type}, q_dot);
                    }
                    // if there are 3 inputs, we need to consider the third argument
                    else
                    {
Shucai Xiao's avatar
Shucai Xiao committed
274
275
                        auto q_dot = prog.insert_instruction(
                            ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
276
277
278
                        auto oq_dot = prog.insert_instruction(ins, op::convert{orig_type}, q_dot);
                        std::vector<float> vec_beta(q_dot->get_shape().elements(), dot_op.beta);
                        auto l_beta = prog.add_literal(literal{oq_dot->get_shape(), vec_beta});
Shucai Xiao's avatar
Shucai Xiao committed
279
280
                        auto beta_c =
                            prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
281
282
283
284
285
286
287
288
                        prog.replace_instruction(ins, op::add{}, q_dot, beta_c);
                    }
                }
            }
            else
            {
                auto q_dot = prog.insert_instruction(ins, op::quant_dot{1, 0}, converted_inputs);
                std::vector<float> vec_alpha(q_dot->get_shape().elements(), new_alpha);
Shucai Xiao's avatar
Shucai Xiao committed
289
                if(orig_type == shape::int32_type)
290
291
                {
                    auto l_alpha = prog.add_literal(literal(ins->get_shape(), vec_alpha));
Shucai Xiao's avatar
Shucai Xiao committed
292
                    if(converted_inputs.size() == 2 or dot_op.beta == 0.0f)
293
294
295
296
297
298
299
                    {
                        prog.replace_instruction(ins, op::mul{}, l_alpha, q_dot);
                    }
                    // case of 3 arguments
                    else
                    {
                        std::vector<float> vec_beta(ins->get_shape().elements(), new_beta);
Shucai Xiao's avatar
Shucai Xiao committed
300
                        auto l_beta   = prog.add_literal(literal(ins->get_shape(), vec_beta));
301
                        auto alpha_ab = prog.insert_instruction(ins, op::mul{}, l_alpha, q_dot);
Shucai Xiao's avatar
Shucai Xiao committed
302
303
                        auto beta_c =
                            prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
304
305
306
307
308
                        prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
                    }
                }
                else
                {
Shucai Xiao's avatar
Shucai Xiao committed
309
                    auto oq_dot  = prog.insert_instruction(ins, op::convert{orig_type}, q_dot);
310
                    auto l_alpha = prog.add_literal(literal(ins->get_shape(), vec_alpha));
Shucai Xiao's avatar
Shucai Xiao committed
311
                    if(converted_inputs.size() == 2 or dot_op.beta == 0.0f)
312
313
314
315
316
317
318
                    {
                        prog.replace_instruction(ins, op::mul{}, l_alpha, oq_dot);
                    }
                    // case of 3 arguments
                    else
                    {
                        std::vector<float> vec_beta(ins->get_shape().elements(), new_beta);
Shucai Xiao's avatar
Shucai Xiao committed
319
                        auto l_beta   = prog.add_literal(literal(ins->get_shape(), vec_beta));
320
                        auto alpha_ab = prog.insert_instruction(ins, op::mul{}, l_alpha, oq_dot);
Shucai Xiao's avatar
Shucai Xiao committed
321
322
                        auto beta_c =
                            prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
323
324
325
326
                        prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
                    }
                }
            }
Shucai Xiao's avatar
Shucai Xiao committed
327
        }
Shucai Xiao's avatar
Shucai Xiao committed
328
        else if(ins->name() == "convolution")
Shucai Xiao's avatar
Shucai Xiao committed
329
        {
Shucai Xiao's avatar
Shucai Xiao committed
330
            // Current MIOpen convolution does not support alpha and beta,
Shucai Xiao's avatar
Shucai Xiao committed
331
            // so we need a separate multiply to adjust the output
Shucai Xiao's avatar
Shucai Xiao committed
332
333
334
335
336
337
            auto conv_op       = any_cast<op::convolution>(ins->get_operator());
            auto padding       = conv_op.padding;
            auto stride        = conv_op.stride;
            auto dilation      = conv_op.dilation;
            auto padding_mode  = conv_op.padding_mode;
            auto group         = conv_op.group;
Shucai Xiao's avatar
Shucai Xiao committed
338
            auto adjust_factor = 1.0 / (int8_param[0].first * int8_param[1].first);
Shucai Xiao's avatar
Shucai Xiao committed
339

Shucai Xiao's avatar
Shucai Xiao committed
340
341
342
            shape quant_shape =
                compute_shape(op::quant_convolution{padding, stride, dilation, padding_mode, group},
                              converted_inputs);
343
            std::vector<float> vec_factor(quant_shape.elements(), adjust_factor);
Shucai Xiao's avatar
Shucai Xiao committed
344
345
            auto fl = prog.add_literal(literal{{orig_type, quant_shape.lens()}, vec_factor});
            if(quant_shape.type() == orig_type)
346
            {
Shucai Xiao's avatar
Shucai Xiao committed
347
                if(adjust_factor == 1.0f)
348
                {
Shucai Xiao's avatar
Shucai Xiao committed
349
350
351
352
                    prog.replace_instruction(
                        ins,
                        op::quant_convolution{padding, stride, dilation, padding_mode, group},
                        converted_inputs);
353
354
355
                }
                else
                {
Shucai Xiao's avatar
Shucai Xiao committed
356
                    auto quant_conv = prog.insert_instruction(
Shucai Xiao's avatar
Shucai Xiao committed
357
358
359
                        ins,
                        op::quant_convolution{padding, stride, dilation, padding_mode, group},
                        converted_inputs);
360
361
362
363
364
                    prog.replace_instruction(ins, op::mul{}, quant_conv, fl);
                }
            }
            else
            {
Shucai Xiao's avatar
Shucai Xiao committed
365
366
367
368
369
                auto quant_conv = prog.insert_instruction(
                    ins,
                    op::quant_convolution{padding, stride, dilation, padding_mode, group},
                    converted_inputs);
                if(adjust_factor == 1.0f)
370
371
372
373
374
375
376
377
378
                {
                    prog.replace_instruction(ins, op::convert{orig_type}, quant_conv);
                }
                else
                {
                    auto oq_conv = prog.insert_instruction(ins, op::convert{orig_type}, quant_conv);
                    prog.replace_instruction(ins, op::mul{}, oq_conv, fl);
                }
            }
Shucai Xiao's avatar
Shucai Xiao committed
379
380
381
382
383
        }
        else
        {
            MIGRAPHX_THROW("INT8_QUANTIZE: does not support operator" + ins->name());
        }
384
385
386
387
388
    }
}

} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx