quantization.cpp 15.7 KB
Newer Older
Shucai Xiao's avatar
Shucai Xiao committed
1
#include <migraphx/quantization.hpp>
2
3
4
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
5
#include <migraphx/op/convert.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
6
7
8
#include <migraphx/op/dot.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/op/add.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
9
#include <migraphx/op/quant_dot.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
10
#include <migraphx/op/capture.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
11
#include <migraphx/op/convolution.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
12
#include <migraphx/op/quant_convolution.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
13
#include <migraphx/op/multibroadcast.hpp>
14
#include <migraphx/stringutils.hpp>
15
#include <migraphx/ranges.hpp>
16
#include <utility>
17
18
#include <iomanip>
#include <fstream>
19
20
21
22

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

Shucai Xiao's avatar
Shucai Xiao committed
23
24
25
26
27
28
instruction_ref insert_quant_ins(program& prog,
                                 instruction_ref& ins,
                                 shape::type_t type,
                                 std::unordered_map<instruction_ref, instruction_ref>& map_ins,
                                 float scale = 1.0f,
                                 float shift = 0.0f)
29
{
Shucai Xiao's avatar
Shucai Xiao committed
30
    if(map_ins.count(ins) > 0)
31
    {
Shucai Xiao's avatar
Shucai Xiao committed
32
33
34
35
36
37
        return map_ins[ins];
    }

    if(ins->name() == "undefined")
    {
        return ins;
38
39
    }

40
41
42
43
44
    if(scale < 0.0f)
    {
        MIGRAPHX_THROW("INSERT_QUANT_INS: scale less than 0");
    }

Shucai Xiao's avatar
Shucai Xiao committed
45
    assert(ins->get_shape().type() == shape::float_type ||
Shucai Xiao's avatar
Shucai Xiao committed
46
47
48
49
50
           ins->get_shape().type() == shape::double_type ||
           ins->get_shape().type() == shape::int32_type);
    instruction_ref quant_ins{};
    quant_ins    = prog.insert_instruction(std::next(ins), op::convert{type, scale, shift}, ins);
    map_ins[ins] = quant_ins;
51

Shucai Xiao's avatar
Shucai Xiao committed
52
    return quant_ins;
53
54
}

Shucai Xiao's avatar
Shucai Xiao committed
55
56
57
58
59
// This function is to convert any instructions specified in the input
// from double or float to float16 by inserting a convert operator.
// For the conversion, there could be cases of overflowing, but it
// is very rare in the area of deeping learning, so we just do a
// truncate of the input to get the fp16.
60
void quantize(program& prog, const std::vector<std::string>& ins_names)
61
{
62
    std::unordered_map<instruction_ref, instruction_ref> map_fp16;
Shucai Xiao's avatar
Shucai Xiao committed
63
    for(auto ins : iterator_for(prog))
64
    {
65
        // all indicates every instruction is converted
Shucai Xiao's avatar
Shucai Xiao committed
66
        if((not contains(ins_names, "all")) and (not contains(ins_names, ins->name())))
67
68
69
        {
            continue;
        }
70

71
        shape::type_t orig_type = ins->get_shape().type();
Shucai Xiao's avatar
Shucai Xiao committed
72
        // process all inputs, if input is a fp32 or fp64, convert it
73
        // to a fp16 by adding a convert operator.
74
        auto inputs = ins->inputs();
75
        std::vector<instruction_ref> converted_inputs;
Shucai Xiao's avatar
Shucai Xiao committed
76
        for(auto input : inputs)
77
78
        {
            auto s = input->get_shape();
Shucai Xiao's avatar
Shucai Xiao committed
79
            if(s.type() == shape::float_type || s.type() == shape::double_type)
80
            {
81
                // if the input is a convert operator, uses its input
82
83
                // as its current input
                instruction_ref input_fp16{};
84
                if(input->name() == "convert")
85
86
87
88
89
                {
                    input_fp16 = input->inputs().front();
                }
                else
                {
Shucai Xiao's avatar
Shucai Xiao committed
90
                    input_fp16 = insert_quant_ins(prog, input, shape::half_type, map_fp16);
91
                }
92
                converted_inputs.push_back(input_fp16);
93
            }
94
95
96
97
98
99
            else
            {
                converted_inputs.push_back(input);
            }
        }

100
        // no change for the input, go to the next instruction
Shucai Xiao's avatar
Shucai Xiao committed
101
        if(inputs == converted_inputs)
102
        {
103
            continue;
Shucai Xiao's avatar
Shucai Xiao committed
104
105
106
107
108
109
        }

        auto op        = ins->get_operator();
        auto ins_shape = compute_shape(op, converted_inputs);
        if(ins_shape.type() != orig_type)
        {
Shucai Xiao's avatar
Shucai Xiao committed
110
111
112
113
114
            // check the dead code case to avoid assert
            bool output_empty = ins->outputs().empty();
            auto ins_orig_type =
                prog.insert_instruction(std::next(ins), op::convert{orig_type}, ins);
            if(!output_empty)
115
            {
Shucai Xiao's avatar
Shucai Xiao committed
116
                prog.replace_instruction(ins, ins_orig_type);
117
            }
118
        }
Shucai Xiao's avatar
Shucai Xiao committed
119
120

        prog.replace_instruction(ins, op, converted_inputs);
121
122
123
    }
}

Shucai Xiao's avatar
Shucai Xiao committed
124
void quantize(program& prog) { quantize(prog, {"all"}); }
Shucai Xiao's avatar
Shucai Xiao committed
125

Shucai Xiao's avatar
Shucai Xiao committed
126
// int8 quantization is different from fp16 since int8 can only handle value
Shucai Xiao's avatar
Shucai Xiao committed
127
// -128 ~ 127. To convert the float or double to int8, we need a scale and
Shucai Xiao's avatar
Shucai Xiao committed
128
// a shift, then the convert can be done as v_int8 = fp * scale + shift.
Shucai Xiao's avatar
Shucai Xiao committed
129
// To simplify the changes, we consider shift as 0.0f for now.
Shucai Xiao's avatar
Shucai Xiao committed
130
131
void quantize_int8(program& prog,
                   const std::vector<std::string>& ins_names,
132
                   const std::vector<std::pair<float, float>>& quant_params)
Shucai Xiao's avatar
Shucai Xiao committed
133
{
134
135
136
137
138
139
140
    for(size_t i = 0; i < quant_params.size(); i++)
    {
        auto param = quant_params.at(i);
        std::cout << "index = " << i << ", scale = " << param.first << "\t" << param.second
                  << std::endl;
    }
    std::cout << std::endl;
Shucai Xiao's avatar
Shucai Xiao committed
141

Shucai Xiao's avatar
Shucai Xiao committed
142
143
    // For now, we only support the int8 quantization of gemm and convolution
    std::vector<std::string> op_names = {"dot", "convolution"};
Shucai Xiao's avatar
Shucai Xiao committed
144
    if(!std::all_of(ins_names.begin(), ins_names.end(), [&](auto name) {
Shucai Xiao's avatar
Shucai Xiao committed
145
           return (std::find(op_names.begin(), op_names.end(), name) != op_names.end());
Shucai Xiao's avatar
Shucai Xiao committed
146
       }))
Shucai Xiao's avatar
Shucai Xiao committed
147
148
149
150
    {
        MIGRAPHX_THROW("QUANTIZE_INT8: only support DOT and CONVOLUTION operation");
    }

Shucai Xiao's avatar
Shucai Xiao committed
151
    std::size_t quant_param_index = 0;
Shucai Xiao's avatar
Shucai Xiao committed
152
    std::unordered_map<instruction_ref, instruction_ref> map_quant_ins;
153
    std::unordered_map<instruction_ref, std::size_t> map_index;
Shucai Xiao's avatar
Shucai Xiao committed
154
155
156
157
158
159
160
161
162
163
164
165
166
167
    for(auto ins : iterator_for(prog))
    {
        if(not contains(ins_names, ins->name()))
        {
            continue;
        }

        shape::type_t orig_type = ins->get_shape().type();

        // for the dot operator, there could be 2 or 3 input arguments
        // if the 3rd argument is available, convert it to an int32.
        std::vector<instruction_ref> converted_inputs;

        // process all inputs, if input is a fp32 or fp64, convert it
Shucai Xiao's avatar
Shucai Xiao committed
168
        // to a int8 type by adding a convert operator and replace
Shucai Xiao's avatar
Shucai Xiao committed
169
        // the operator with the corresponding int8 version
Shucai Xiao's avatar
Shucai Xiao committed
170
171
        auto inputs = ins->inputs();
        std::vector<std::pair<float, float>> ins_quant_params;
Shucai Xiao's avatar
Shucai Xiao committed
172
173
        for(auto input : inputs)
        {
174
175
176
177
178
179
180
181
            // calculate the index of each instruction to be quantized
            if(map_index.count(input) == 0)
            {
                map_index[input] = quant_param_index++;
            }
            auto param = quant_params[map_index[input]];
            ins_quant_params.push_back(param);

Shucai Xiao's avatar
Shucai Xiao committed
182
183
            // In general, the target_type is int8, but for the dot
            // operation, if it has 3 inputs, then the last one should
Shucai Xiao's avatar
Shucai Xiao committed
184
185
            // be converted to int32_type
            shape::type_t quant_type = shape::int8_type;
Shucai Xiao's avatar
Shucai Xiao committed
186
            if(ins->name() == "dot" and inputs.size() == 3 and input == inputs.back())
Shucai Xiao's avatar
Shucai Xiao committed
187
            {
Shucai Xiao's avatar
Shucai Xiao committed
188
189
                quant_type = shape::int32_type;
            }
Shucai Xiao's avatar
Shucai Xiao committed
190

Shucai Xiao's avatar
Shucai Xiao committed
191
            auto s = input->get_shape();
192
            if((s.type() == shape::float_type || s.type() == shape::double_type ||
Shucai Xiao's avatar
Shucai Xiao committed
193
194
                s.type() == shape::int32_type) &&
               s.type() != quant_type)
Shucai Xiao's avatar
Shucai Xiao committed
195
196
197
198
199
200
201
            {
                // if the input is a convert operator, uses its input
                // as its current input
                instruction_ref quant_input{};
                if(input->name() == "convert")
                {
                    auto tmp_ins = input->inputs().front();
Shucai Xiao's avatar
Shucai Xiao committed
202
                    if(tmp_ins->get_shape().type() == quant_type)
Shucai Xiao's avatar
Shucai Xiao committed
203
204
205
206
207
                    {
                        quant_input = input->inputs().front();
                    }
                    else
                    {
Shucai Xiao's avatar
Shucai Xiao committed
208
209
                        quant_input = insert_quant_ins(
                            prog, input, quant_type, map_quant_ins, param.first, param.second);
Shucai Xiao's avatar
Shucai Xiao committed
210
211
212
                    }
                }
                else
213
                {
Shucai Xiao's avatar
Shucai Xiao committed
214
215
                    quant_input = insert_quant_ins(
                        prog, input, quant_type, map_quant_ins, param.first, param.second);
216
                }
Shucai Xiao's avatar
Shucai Xiao committed
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
                converted_inputs.push_back(quant_input);
            }
            else
            {
                converted_inputs.push_back(input);
            }
        }

        // no change for the input, go to the next instruction
        if(inputs == converted_inputs)
        {
            continue;
        }

        // When converting from other types to int8_type, there are parameters
        // used as scale and shift(.0f), which will generate results diffrent from
        // the original results. To adjust the output to be "correct(approximatly
Shucai Xiao's avatar
Shucai Xiao committed
234
        // equal)", we need additional calculation for the adjustment
Shucai Xiao's avatar
Shucai Xiao committed
235
        if(ins->name() == "dot")
Shucai Xiao's avatar
Shucai Xiao committed
236
        {
Shucai Xiao's avatar
Shucai Xiao committed
237
238
239
240
            auto dot_op = any_cast<op::dot>(ins->get_operator());
            float new_alpha =
                dot_op.alpha / (ins_quant_params[0].first * ins_quant_params[1].first);
            float new_beta = dot_op.beta;
Shucai Xiao's avatar
Shucai Xiao committed
241
            // We need additional checking about the quant_alpha value. If
242
243
244
            // abs(quant_alpha) > 50 (some tmp value set here), we can convert
            // it to an integer as the new_alpha in the quant_dot
            float threshold = 50.0f;
Shucai Xiao's avatar
Shucai Xiao committed
245
            if(fabs(new_alpha) >= threshold && fabs(new_beta) >= threshold)
246
247
            {
                int32_t quant_alpha = static_cast<int32_t>(new_alpha);
Shucai Xiao's avatar
Shucai Xiao committed
248
249
250
                int32_t quant_beta  = static_cast<int32_t>(new_beta);
                shape quant_shape   = compute_shape(op::quant_dot{1, 0}, converted_inputs);
                if(quant_shape.type() == orig_type)
251
                {
Shucai Xiao's avatar
Shucai Xiao committed
252
253
                    prog.replace_instruction(
                        ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
254
255
256
                }
                else
                {
Shucai Xiao's avatar
Shucai Xiao committed
257
258
                    auto quant_dot = prog.insert_instruction(
                        ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
259
260
261
                    prog.replace_instruction(ins, op::convert{orig_type}, quant_dot);
                }
            }
Shucai Xiao's avatar
Shucai Xiao committed
262
            // either alpha or beta cannot be quantized because of too big
263
            // relative rounding error
264
265
266
            else
            {
                auto q_dot = prog.insert_instruction(ins, op::quant_dot{1, 0}, converted_inputs);
Shucai Xiao's avatar
Shucai Xiao committed
267
                if(inputs.size() == 3 and dot_op.beta != 0.0f)
268
                {
Shucai Xiao's avatar
Shucai Xiao committed
269
270
                    auto alpha_ab = prog.insert_instruction(
                        ins, op::convert{orig_type, new_alpha, 0.0f}, q_dot);
271
272
                    auto c_shape = q_dot->get_shape();
                    std::vector<float> vec_beta(c_shape.elements(), dot_op.beta);
Shucai Xiao's avatar
Shucai Xiao committed
273
274
                    auto l_beta =
                        prog.add_literal(literal({shape::float_type, c_shape.lens()}, vec_beta));
275
                    instruction_ref beta_c{};
Shucai Xiao's avatar
Shucai Xiao committed
276
                    if(orig_type != shape::float_type)
277
                    {
Shucai Xiao's avatar
Shucai Xiao committed
278
279
                        auto fp32_c = prog.insert_instruction(
                            ins, op::convert{shape::float_type}, inputs.back());
280
281
                        auto fp32_beta_c = prog.insert_instruction(ins, op::mul{}, l_beta, fp32_c);
                        beta_c = prog.insert_instruction(ins, op::convert{orig_type}, fp32_beta_c);
282
283
284
                    }
                    else
                    {
285
                        beta_c = prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
286
                    }
287
                    prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
288
289
290
                }
                else
                {
291
                    prog.replace_instruction(ins, op::convert{orig_type, new_alpha, 0.0f}, q_dot);
292
293
                }
            }
Shucai Xiao's avatar
Shucai Xiao committed
294
        }
Shucai Xiao's avatar
Shucai Xiao committed
295
        else if(ins->name() == "convolution")
Shucai Xiao's avatar
Shucai Xiao committed
296
        {
Shucai Xiao's avatar
Shucai Xiao committed
297
            // Current MIOpen convolution does not support alpha and beta,
Shucai Xiao's avatar
Shucai Xiao committed
298
            // so we need a separate multiply to adjust the output
Shucai Xiao's avatar
Shucai Xiao committed
299
300
301
302
303
304
            auto conv_op       = any_cast<op::convolution>(ins->get_operator());
            auto padding       = conv_op.padding;
            auto stride        = conv_op.stride;
            auto dilation      = conv_op.dilation;
            auto padding_mode  = conv_op.padding_mode;
            auto group         = conv_op.group;
305
            auto adjust_factor = 1.0f / (ins_quant_params[0].first * ins_quant_params[1].first);
Shucai Xiao's avatar
Shucai Xiao committed
306

307
308
309
310
            auto quant_conv = prog.insert_instruction(
                ins,
                op::quant_convolution{padding, stride, dilation, padding_mode, group},
                converted_inputs);
311
            prog.replace_instruction(ins, op::convert{orig_type, adjust_factor, 0.0f}, quant_conv);
Shucai Xiao's avatar
Shucai Xiao committed
312
313
314
        }
        else
        {
315
            MIGRAPHX_THROW("QUANTIZE_INT8: does not support operator" + ins->name());
Shucai Xiao's avatar
Shucai Xiao committed
316
        }
317
    }
318
319
320
321
322
323
324
325
326

    if(quant_param_index != quant_params.size())
    {
        MIGRAPHX_THROW("QUANTIZE_INT8: number of scales does not match");
    }
}

void quantize_int8(program& prog, const std::vector<std::string>& ins_names)
{
327
    quantize_int8(prog, ins_names, *prog.int8_quant_params);
328
329
330
331
332
}

void quantize_int8(program& prog)
{
    std::vector<std::string> ins_names = {"dot", "convolution"};
333
    quantize_int8(prog, ins_names);
Shucai Xiao's avatar
Shucai Xiao committed
334
335
}

Shucai Xiao's avatar
Shucai Xiao committed
336
337
// For the input of each input argument, we need to insert a
// capture operator to compute the scale and shift
Shucai Xiao's avatar
Shucai Xiao committed
338
339
void capture_arguments(program& prog,
                       const std::vector<std::string>& ins_names,
Shucai Xiao's avatar
Shucai Xiao committed
340
                       std::function<void(std::size_t, std::vector<argument>)> func)
Shucai Xiao's avatar
Shucai Xiao committed
341
{
342

Shucai Xiao's avatar
Shucai Xiao committed
343
    size_t num_quant_params = 0;
Shucai Xiao's avatar
Shucai Xiao committed
344
    // the int8 quantization only support dot and convolution
Shucai Xiao's avatar
Shucai Xiao committed
345
    std::vector<std::string> op_names = {"dot", "convolution", "quant_dot", "quant_convolution"};
Shucai Xiao's avatar
Shucai Xiao committed
346
347
348
    if(!std::all_of(ins_names.begin(), ins_names.end(), [&](auto name) {
           return std::find(op_names.begin(), op_names.end(), name) != op_names.end();
       }))
Shucai Xiao's avatar
Shucai Xiao committed
349
350
351
352
353
354
355
    {
        MIGRAPHX_THROW("CAPTURE_ARGUMENTS: input operator is not supported");
    }

    std::unordered_map<instruction_ref, instruction_ref> ins_map;
    for(auto ins : iterator_for(prog))
    {
Shucai Xiao's avatar
Shucai Xiao committed
356
        if(not contains(ins_names, ins->name()))
Shucai Xiao's avatar
Shucai Xiao committed
357
358
359
360
361
362
        {
            continue;
        }

        auto inputs = ins->inputs();
        std::vector<instruction_ref> new_args;
Shucai Xiao's avatar
Shucai Xiao committed
363
        for(auto input : inputs)
Shucai Xiao's avatar
Shucai Xiao committed
364
365
        {
            instruction_ref new_ins{};
Shucai Xiao's avatar
Shucai Xiao committed
366
            if(ins_map.count(input) > 0)
Shucai Xiao's avatar
Shucai Xiao committed
367
368
369
370
371
            {
                new_ins = ins_map[input];
            }
            else
            {
Shucai Xiao's avatar
Shucai Xiao committed
372
                new_ins = prog.insert_instruction(
Shucai Xiao's avatar
Shucai Xiao committed
373
                    std::next(input), op::capture{num_quant_params++, func}, input);
Shucai Xiao's avatar
Shucai Xiao committed
374
375
376
377
378
379
                ins_map[input] = new_ins;
            }
            new_args.push_back(new_ins);
        }
        instruction::replace(ins, ins->get_operator(), ins->get_shape(), new_args);
    }
Shucai Xiao's avatar
Shucai Xiao committed
380
381

    // set one pair of parameter for each argument
382
    prog.int8_quant_params->resize(num_quant_params, std::make_pair(-1.0f, -1.0f));
Shucai Xiao's avatar
Shucai Xiao committed
383
384
385
386
}

void capture_arguments(program& prog, const std::vector<std::string>& ins_names)
{
Shucai Xiao's avatar
Shucai Xiao committed
387
    auto calc_quant_params = [&](std::size_t ins_index, std::vector<migraphx::argument> args) {
388
389
390
391
392
393
        std::pair<float, float> param_pair{1.0f, 0.0f};

        // scale and shift is need for only int8 type, and we do not
        // consider shift, so set shift to 0
        std::vector<float> vec_val;
        args.front().visit([&](auto output) { vec_val.assign(output.begin(), output.end()); });
Shucai Xiao's avatar
Shucai Xiao committed
394
395
396
        auto max_val = *std::max_element(vec_val.begin(), vec_val.end());
        auto min_val = *std::min_element(vec_val.begin(), vec_val.end());
        auto max_abs = std::max(std::fabs(max_val), std::fabs(min_val));
397

Shucai Xiao's avatar
Shucai Xiao committed
398
        param_pair.first                     = 127.0f / max_abs;
399
        (*prog.int8_quant_params)[ins_index] = param_pair;
400
401
    };

Shucai Xiao's avatar
Shucai Xiao committed
402
    capture_arguments(prog, ins_names, calc_quant_params);
Shucai Xiao's avatar
Shucai Xiao committed
403
404
}

405
406
407
408
409
410
void capture_arguments(program& prog)
{
    std::vector<std::string> ins_names = {"dot", "convolution"};
    capture_arguments(prog, ins_names);
}

411
412
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx