quantization.cpp 17.6 KB
Newer Older
Shucai Xiao's avatar
Shucai Xiao committed
1
#include <migraphx/quantization.hpp>
2
3
4
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
5
#include <migraphx/op/convert.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
6
#include <migraphx/op/dot.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
7
#include <migraphx/op/mul.hpp>
8
#include <migraphx/op/add.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
9
10
11
12
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/multibroadcast.hpp>
13
#include <migraphx/stringutils.hpp>
14
#include <migraphx/ranges.hpp>
15
16
17
18
19
#include <utility>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

Shucai Xiao's avatar
Shucai Xiao committed
20
instruction_ref insert_quant_ins(program& prog,
Shucai Xiao's avatar
Shucai Xiao committed
21
22
23
24
25
                                 instruction_ref& ins,
                                 shape::type_t type,
                                 std::unordered_map<instruction_ref, instruction_ref>& map_ins,
                                 float scale = 1.0f,
                                 float shift = 0.0f)
26
{
Shucai Xiao's avatar
Shucai Xiao committed
27
    if(map_ins.count(ins) > 0)
28
    {
Shucai Xiao's avatar
Shucai Xiao committed
29
        return map_ins[ins];
30
31
    }

Shucai Xiao's avatar
Shucai Xiao committed
32
33
34
35
36
    if(ins->name() == "undefined")
    {
        return ins;
    }

Shucai Xiao's avatar
Shucai Xiao committed
37
    assert(ins->get_shape().type() == shape::float_type ||
Shucai Xiao's avatar
Shucai Xiao committed
38
39
40
           ins->get_shape().type() == shape::double_type ||
           ins->get_shape().type() == shape::int32_type);
    instruction_ref quant_ins{};
Shucai Xiao's avatar
Shucai Xiao committed
41
    quant_ins    = prog.insert_instruction(std::next(ins), op::convert{type, scale, shift}, ins);
Shucai Xiao's avatar
Shucai Xiao committed
42
    map_ins[ins] = quant_ins;
43

Shucai Xiao's avatar
Shucai Xiao committed
44
    return quant_ins;
45
46
}

Shucai Xiao's avatar
Shucai Xiao committed
47
48
49
// This function is to convert any instructions specified in the input
// from double or float to float16 by inserting a convert operator.
// For the conversion, there could be cases of overflowing, but it
Shucai Xiao's avatar
Shucai Xiao committed
50
// is very rare in the area of deeping learning, so we just do a
Shucai Xiao's avatar
Shucai Xiao committed
51
// truncate of the input to get the fp16.
52
void quantize(program& prog, const std::vector<std::string>& ins_names)
53
{
54
    std::unordered_map<instruction_ref, instruction_ref> map_fp16;
Shucai Xiao's avatar
Shucai Xiao committed
55
    for(auto ins : iterator_for(prog))
56
    {
57
        // all indicates every instruction is converted
Shucai Xiao's avatar
Shucai Xiao committed
58
        if((not contains(ins_names, "all")) and (not contains(ins_names, ins->name())))
59
60
61
        {
            continue;
        }
62

63
        shape::type_t orig_type = ins->get_shape().type();
Shucai Xiao's avatar
Shucai Xiao committed
64
        // process all inputs, if input is a fp32 or fp64, convert it
65
        // to a fp16 by adding a convert operator.
66
        auto inputs = ins->inputs();
67
        std::vector<instruction_ref> converted_inputs;
Shucai Xiao's avatar
Shucai Xiao committed
68
        for(auto input : inputs)
69
70
        {
            auto s = input->get_shape();
Shucai Xiao's avatar
Shucai Xiao committed
71
            if(s.type() == shape::float_type || s.type() == shape::double_type)
72
            {
73
                // if the input is a convert operator, uses its input
74
75
                // as its current input
                instruction_ref input_fp16{};
76
                if(input->name() == "convert")
77
78
79
80
81
                {
                    input_fp16 = input->inputs().front();
                }
                else
                {
Shucai Xiao's avatar
Shucai Xiao committed
82
                    input_fp16 = insert_quant_ins(prog, input, shape::half_type, map_fp16);
83
                }
84
                converted_inputs.push_back(input_fp16);
85
            }
86
87
88
89
90
91
            else
            {
                converted_inputs.push_back(input);
            }
        }

92
        // no change for the input, go to the next instruction
Shucai Xiao's avatar
Shucai Xiao committed
93
        if(inputs == converted_inputs)
94
        {
95
            continue;
Shucai Xiao's avatar
Shucai Xiao committed
96
97
98
99
100
101
        }

        auto op        = ins->get_operator();
        auto ins_shape = compute_shape(op, converted_inputs);
        if(ins_shape.type() != orig_type)
        {
Shucai Xiao's avatar
Shucai Xiao committed
102
103
104
105
106
            // check the dead code case to avoid assert
            bool output_empty = ins->outputs().empty();
            auto ins_orig_type =
                prog.insert_instruction(std::next(ins), op::convert{orig_type}, ins);
            if(!output_empty)
107
            {
Shucai Xiao's avatar
Shucai Xiao committed
108
                prog.replace_instruction(ins, ins_orig_type);
Shucai Xiao's avatar
Shucai Xiao committed
109
            }
Shucai Xiao's avatar
Shucai Xiao committed
110
111
112
113
114
115
116
117
118
        }

        prog.replace_instruction(ins, op, converted_inputs);
    }
}

void quantize(program& prog) { quantize(prog, {"all"}); }

// int8 quantization is different from fp16 since int8 can only handle value
Shucai Xiao's avatar
Shucai Xiao committed
119
// -128 ~ 127. To convert the float or double to int8, we need a scale and
Shucai Xiao's avatar
Shucai Xiao committed
120
// a shift, then the convert can be done as v_int8 = fp * scale + shift.
Shucai Xiao's avatar
Shucai Xiao committed
121
// To simplify the changes, we consider shift as 0.0f for now.
Shucai Xiao's avatar
Shucai Xiao committed
122
123
124
void quantize_int8(program& prog,
                   const std::vector<std::string>& ins_names,
                   std::vector<std::pair<float, float>>& int8_quant_params)
Shucai Xiao's avatar
Shucai Xiao committed
125
{
Shucai Xiao's avatar
Shucai Xiao committed
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
    // // For debugging
    // auto print_gemm_res = [&](std::size_t ins_index, std::vector<migraphx::argument> args) {
    //     // scale and shift is need for only int8 type, and we do not
    //     // consider shift, so set shift to 0
    //     std::vector<float> vec_val;
    //     args.front().visit([&](auto output) { vec_val.assign(output.begin(), output.end()); });
    //     std::cout << "quant_gemm = " << std::endl;
    //     for (size_t i = 0; i < 20; i++)
    //     {
    //         std::cout << vec_val[i] << "\t";
    //     }
    //     std::cout << std::endl;
    // };

    // // For debugging
    // auto print_conv_res = [&](std::size_t ins_index, std::vector<migraphx::argument> args) {
    //     // scale and shift is need for only int8 type, and we do not
    //     // consider shift, so set shift to 0
    //     std::vector<float> vec_val;
    //     args.front().visit([&](auto output) { vec_val.assign(output.begin(), output.end()); });
    //     std::cout << "quant_conv = " << std::endl;
    //     for (size_t i = 0; i < 20; i++)
    //     {
    //         std::cout << vec_val[i] << "\t";
    //     }
    //     std::cout << std::endl;
    // };

Shucai Xiao's avatar
Shucai Xiao committed
154
155
    // For now, we only support the int8 quantization of gemm and convolution
    std::vector<std::string> op_names = {"dot", "convolution"};
Shucai Xiao's avatar
Shucai Xiao committed
156
    if(!std::all_of(ins_names.begin(), ins_names.end(), [&](auto name) {
Shucai Xiao's avatar
Shucai Xiao committed
157
           return (std::find(op_names.begin(), op_names.end(), name) != op_names.end());
Shucai Xiao's avatar
Shucai Xiao committed
158
       }))
Shucai Xiao's avatar
Shucai Xiao committed
159
160
161
162
    {
        MIGRAPHX_THROW("QUANTIZE_INT8: only support DOT and CONVOLUTION operation");
    }

Shucai Xiao's avatar
Shucai Xiao committed
163
    std::size_t quant_param_index = 0;
Shucai Xiao's avatar
Shucai Xiao committed
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
    std::unordered_map<instruction_ref, instruction_ref> map_quant_ins;
    for(auto ins : iterator_for(prog))
    {
        if(not contains(ins_names, ins->name()))
        {
            continue;
        }

        shape::type_t orig_type = ins->get_shape().type();

        // for the dot operator, there could be 2 or 3 input arguments
        // if the 3rd argument is available, convert it to an int32.
        std::vector<instruction_ref> converted_inputs;

        // process all inputs, if input is a fp32 or fp64, convert it
Shucai Xiao's avatar
Shucai Xiao committed
179
        // to a int8 type by adding a convert operator and replace
Shucai Xiao's avatar
Shucai Xiao committed
180
        // the operator with the corresponding int8 version
Shucai Xiao's avatar
Shucai Xiao committed
181
182
        auto inputs = ins->inputs();
        std::vector<std::pair<float, float>> ins_quant_params;
Shucai Xiao's avatar
Shucai Xiao committed
183
184
        for(auto input : inputs)
        {
Shucai Xiao's avatar
Shucai Xiao committed
185
186
            // In general, the target_type is int8, but for the dot
            // operation, if it has 3 inputs, then the last one should
Shucai Xiao's avatar
Shucai Xiao committed
187
188
            // be converted to int32_type
            shape::type_t quant_type = shape::int8_type;
Shucai Xiao's avatar
Shucai Xiao committed
189
190
            auto param               = int8_quant_params[quant_param_index++];
            ins_quant_params.push_back(param);
Shucai Xiao's avatar
Shucai Xiao committed
191
            if(ins->name() == "dot" and inputs.size() == 3 and input == inputs.back())
Shucai Xiao's avatar
Shucai Xiao committed
192
            {
Shucai Xiao's avatar
Shucai Xiao committed
193
194
                quant_type = shape::int32_type;
            }
Shucai Xiao's avatar
Shucai Xiao committed
195

Shucai Xiao's avatar
Shucai Xiao committed
196
            auto s = input->get_shape();
197
            if((s.type() == shape::float_type || s.type() == shape::double_type ||
Shucai Xiao's avatar
Shucai Xiao committed
198
199
                s.type() == shape::int32_type) &&
               s.type() != quant_type)
Shucai Xiao's avatar
Shucai Xiao committed
200
201
202
203
204
205
206
            {
                // if the input is a convert operator, uses its input
                // as its current input
                instruction_ref quant_input{};
                if(input->name() == "convert")
                {
                    auto tmp_ins = input->inputs().front();
Shucai Xiao's avatar
Shucai Xiao committed
207
                    if(tmp_ins->get_shape().type() == quant_type)
Shucai Xiao's avatar
Shucai Xiao committed
208
209
210
211
212
                    {
                        quant_input = input->inputs().front();
                    }
                    else
                    {
Shucai Xiao's avatar
Shucai Xiao committed
213
214
                        quant_input = insert_quant_ins(
                            prog, input, quant_type, map_quant_ins, param.first, param.second);
Shucai Xiao's avatar
Shucai Xiao committed
215
216
217
                    }
                }
                else
218
                {
Shucai Xiao's avatar
Shucai Xiao committed
219
220
                    quant_input = insert_quant_ins(
                        prog, input, quant_type, map_quant_ins, param.first, param.second);
221
                }
Shucai Xiao's avatar
Shucai Xiao committed
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
                converted_inputs.push_back(quant_input);
            }
            else
            {
                converted_inputs.push_back(input);
            }
        }

        // no change for the input, go to the next instruction
        if(inputs == converted_inputs)
        {
            continue;
        }

        // When converting from other types to int8_type, there are parameters
        // used as scale and shift(.0f), which will generate results diffrent from
        // the original results. To adjust the output to be "correct(approximatly
Shucai Xiao's avatar
Shucai Xiao committed
239
        // equal)", we need additional calculation for the adjustment
Shucai Xiao's avatar
Shucai Xiao committed
240
        if(ins->name() == "dot")
Shucai Xiao's avatar
Shucai Xiao committed
241
        {
Shucai Xiao's avatar
Shucai Xiao committed
242
243
244
245
            auto dot_op = any_cast<op::dot>(ins->get_operator());
            float new_alpha =
                dot_op.alpha / (ins_quant_params[0].first * ins_quant_params[1].first);
            float new_beta = dot_op.beta;
Shucai Xiao's avatar
Shucai Xiao committed
246
            // We need additional checking about the quant_alpha value. If
247
248
249
            // abs(quant_alpha) > 50 (some tmp value set here), we can convert
            // it to an integer as the new_alpha in the quant_dot
            float threshold = 50.0f;
Shucai Xiao's avatar
Shucai Xiao committed
250
            if(fabs(new_alpha) >= threshold && fabs(new_beta) >= threshold)
251
252
            {
                int32_t quant_alpha = static_cast<int32_t>(new_alpha);
Shucai Xiao's avatar
Shucai Xiao committed
253
254
255
                int32_t quant_beta  = static_cast<int32_t>(new_beta);
                shape quant_shape   = compute_shape(op::quant_dot{1, 0}, converted_inputs);
                if(quant_shape.type() == orig_type)
256
                {
Shucai Xiao's avatar
Shucai Xiao committed
257
258
                    prog.replace_instruction(
                        ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
259
260
261
                }
                else
                {
Shucai Xiao's avatar
Shucai Xiao committed
262
263
                    auto quant_dot = prog.insert_instruction(
                        ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
264
265
266
                    prog.replace_instruction(ins, op::convert{orig_type}, quant_dot);
                }
            }
Shucai Xiao's avatar
Shucai Xiao committed
267
268
            // only alpha can be quantized, quantization of beta will cause
            // big error, so we have to manually do the multiplication and
269
            // addition
Shucai Xiao's avatar
Shucai Xiao committed
270
            else if(fabs(new_alpha) >= threshold)
271
272
            {
                int32_t quant_alpha = static_cast<int32_t>(new_alpha);
Shucai Xiao's avatar
Shucai Xiao committed
273
274
                int32_t quant_beta  = 0;
                if(orig_type == shape::int32_type)
275
                {
Shucai Xiao's avatar
Shucai Xiao committed
276
                    if(inputs.size() == 2 or dot_op.beta == 0.0f)
277
                    {
Shucai Xiao's avatar
Shucai Xiao committed
278
279
                        prog.replace_instruction(
                            ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
280
281
282
283
                    }
                    // if there are 3 inputs, we need to consider the third argument
                    else
                    {
Shucai Xiao's avatar
Shucai Xiao committed
284
285
                        auto q_dot = prog.insert_instruction(
                            ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
286
287
                        std::vector<float> vec_beta(q_dot->get_shape().elements(), dot_op.beta);
                        auto l_beta = prog.add_literal(literal{orig_type, vec_beta});
Shucai Xiao's avatar
Shucai Xiao committed
288
289
                        auto beta_c =
                            prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
290
291
292
293
294
                        prog.replace_instruction(ins, op::add{}, q_dot, beta_c);
                    }
                }
                else
                {
Shucai Xiao's avatar
Shucai Xiao committed
295
                    if(inputs.size() == 2 or dot_op.beta == 0.0f)
296
                    {
Shucai Xiao's avatar
Shucai Xiao committed
297
298
                        auto q_dot = prog.insert_instruction(
                            ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
299
300
301
302
303
                        prog.replace_instruction(ins, op::convert{orig_type}, q_dot);
                    }
                    // if there are 3 inputs, we need to consider the third argument
                    else
                    {
Shucai Xiao's avatar
Shucai Xiao committed
304
305
                        auto q_dot = prog.insert_instruction(
                            ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
306
307
308
                        auto oq_dot = prog.insert_instruction(ins, op::convert{orig_type}, q_dot);
                        std::vector<float> vec_beta(q_dot->get_shape().elements(), dot_op.beta);
                        auto l_beta = prog.add_literal(literal{oq_dot->get_shape(), vec_beta});
Shucai Xiao's avatar
Shucai Xiao committed
309
310
                        auto beta_c =
                            prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
311
312
313
314
315
316
317
318
                        prog.replace_instruction(ins, op::add{}, q_dot, beta_c);
                    }
                }
            }
            else
            {
                auto q_dot = prog.insert_instruction(ins, op::quant_dot{1, 0}, converted_inputs);
                std::vector<float> vec_alpha(q_dot->get_shape().elements(), new_alpha);
Shucai Xiao's avatar
Shucai Xiao committed
319
                if(orig_type == shape::int32_type)
320
321
                {
                    auto l_alpha = prog.add_literal(literal(ins->get_shape(), vec_alpha));
Shucai Xiao's avatar
Shucai Xiao committed
322
                    if(converted_inputs.size() == 2 or dot_op.beta == 0.0f)
323
324
325
326
327
328
329
                    {
                        prog.replace_instruction(ins, op::mul{}, l_alpha, q_dot);
                    }
                    // case of 3 arguments
                    else
                    {
                        std::vector<float> vec_beta(ins->get_shape().elements(), new_beta);
Shucai Xiao's avatar
Shucai Xiao committed
330
                        auto l_beta   = prog.add_literal(literal(ins->get_shape(), vec_beta));
331
                        auto alpha_ab = prog.insert_instruction(ins, op::mul{}, l_alpha, q_dot);
Shucai Xiao's avatar
Shucai Xiao committed
332
333
                        auto beta_c =
                            prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
334
335
336
337
338
                        prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
                    }
                }
                else
                {
Shucai Xiao's avatar
Shucai Xiao committed
339
                    auto oq_dot  = prog.insert_instruction(ins, op::convert{orig_type}, q_dot);
340
                    auto l_alpha = prog.add_literal(literal(ins->get_shape(), vec_alpha));
Shucai Xiao's avatar
Shucai Xiao committed
341
                    if(converted_inputs.size() == 2 or dot_op.beta == 0.0f)
342
343
344
345
346
347
348
                    {
                        prog.replace_instruction(ins, op::mul{}, l_alpha, oq_dot);
                    }
                    // case of 3 arguments
                    else
                    {
                        std::vector<float> vec_beta(ins->get_shape().elements(), new_beta);
Shucai Xiao's avatar
Shucai Xiao committed
349
                        auto l_beta   = prog.add_literal(literal(ins->get_shape(), vec_beta));
350
                        auto alpha_ab = prog.insert_instruction(ins, op::mul{}, l_alpha, oq_dot);
Shucai Xiao's avatar
Shucai Xiao committed
351
352
                        auto beta_c =
                            prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
353
354
355
356
                        prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
                    }
                }
            }
Shucai Xiao's avatar
Shucai Xiao committed
357
        }
Shucai Xiao's avatar
Shucai Xiao committed
358
        else if(ins->name() == "convolution")
Shucai Xiao's avatar
Shucai Xiao committed
359
        {
Shucai Xiao's avatar
Shucai Xiao committed
360
            // Current MIOpen convolution does not support alpha and beta,
Shucai Xiao's avatar
Shucai Xiao committed
361
            // so we need a separate multiply to adjust the output
Shucai Xiao's avatar
Shucai Xiao committed
362
363
364
365
366
367
            auto conv_op       = any_cast<op::convolution>(ins->get_operator());
            auto padding       = conv_op.padding;
            auto stride        = conv_op.stride;
            auto dilation      = conv_op.dilation;
            auto padding_mode  = conv_op.padding_mode;
            auto group         = conv_op.group;
Shucai Xiao's avatar
Shucai Xiao committed
368
            auto adjust_factor = 1.0 / (ins_quant_params[0].first * ins_quant_params[1].first);
Shucai Xiao's avatar
Shucai Xiao committed
369

Shucai Xiao's avatar
Shucai Xiao committed
370
371
372
            shape quant_shape =
                compute_shape(op::quant_convolution{padding, stride, dilation, padding_mode, group},
                              converted_inputs);
373
            std::vector<float> vec_factor(quant_shape.elements(), adjust_factor);
Shucai Xiao's avatar
Shucai Xiao committed
374
375
            auto fl = prog.add_literal(literal{{orig_type, quant_shape.lens()}, vec_factor});
            if(quant_shape.type() == orig_type)
376
            {
Shucai Xiao's avatar
Shucai Xiao committed
377
                if(adjust_factor == 1.0f)
378
                {
Shucai Xiao's avatar
Shucai Xiao committed
379
380
381
382
                    prog.replace_instruction(
                        ins,
                        op::quant_convolution{padding, stride, dilation, padding_mode, group},
                        converted_inputs);
383
384
385
                }
                else
                {
Shucai Xiao's avatar
Shucai Xiao committed
386
                    auto quant_conv = prog.insert_instruction(
Shucai Xiao's avatar
Shucai Xiao committed
387
388
389
                        ins,
                        op::quant_convolution{padding, stride, dilation, padding_mode, group},
                        converted_inputs);
390
391
392
393
394
                    prog.replace_instruction(ins, op::mul{}, quant_conv, fl);
                }
            }
            else
            {
Shucai Xiao's avatar
Shucai Xiao committed
395
396
397
398
399
                auto quant_conv = prog.insert_instruction(
                    ins,
                    op::quant_convolution{padding, stride, dilation, padding_mode, group},
                    converted_inputs);
                if(adjust_factor == 1.0f)
400
401
402
403
404
405
406
407
408
                {
                    prog.replace_instruction(ins, op::convert{orig_type}, quant_conv);
                }
                else
                {
                    auto oq_conv = prog.insert_instruction(ins, op::convert{orig_type}, quant_conv);
                    prog.replace_instruction(ins, op::mul{}, oq_conv, fl);
                }
            }
Shucai Xiao's avatar
Shucai Xiao committed
409
410
411
412
413
        }
        else
        {
            MIGRAPHX_THROW("INT8_QUANTIZE: does not support operator" + ins->name());
        }
414
415
416
417
418
    }
}

} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx