quantization.cpp 22.3 KB
Newer Older
Shucai Xiao's avatar
Shucai Xiao committed
1
#include <migraphx/quantization.hpp>
2
3
4
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
5
#include <migraphx/op/convert.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
6
7
#include <migraphx/op/clip.hpp>
#include <migraphx/op/round.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
8
9
10
#include <migraphx/op/dot.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/op/add.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
11
#include <migraphx/op/quant_dot.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
12
#include <migraphx/op/capture.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
13
#include <migraphx/op/convolution.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
14
#include <migraphx/op/quant_convolution.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
15
#include <migraphx/op/multibroadcast.hpp>
16
#include <migraphx/stringutils.hpp>
17
#include <migraphx/ranges.hpp>
18
#include <migraphx/target.hpp>
19
#include <utility>
Shucai Xiao's avatar
Shucai Xiao committed
20
#include <set>
21
#include <iomanip>
22
23
24
25
#include <migraphx/serialize.hpp>

#include <migraphx/make_op.hpp>

26
#include <fstream>
Shucai Xiao's avatar
Shucai Xiao committed
27
#include <algorithm>
28
29
30
31

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

Shucai Xiao's avatar
Shucai Xiao committed
32
33
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_INT8_QUANTIZATION_PARAMS)

34
instruction_ref insert_quant_ins(module& modl,
Shucai Xiao's avatar
Shucai Xiao committed
35
36
                                 instruction_ref& ins,
                                 shape::type_t type,
Shucai Xiao's avatar
Shucai Xiao committed
37
38
39
                                 std::unordered_map<instruction_ref, instruction_ref>& map_ins,
                                 float scale = 1.0f,
                                 float shift = 0.0f)
40
{
Shucai Xiao's avatar
Shucai Xiao committed
41
    if(map_ins.count(ins) > 0)
42
    {
Shucai Xiao's avatar
Shucai Xiao committed
43
44
45
46
47
48
        return map_ins[ins];
    }

    if(ins->name() == "undefined")
    {
        return ins;
49
50
    }

Shucai Xiao's avatar
Shucai Xiao committed
51
52
    assert(ins->get_shape().type() == shape::float_type or
           ins->get_shape().type() == shape::double_type or
Shucai Xiao's avatar
Shucai Xiao committed
53
54
           ins->get_shape().type() == shape::int32_type or
           ins->get_shape().type() == shape::half_type);
Shucai Xiao's avatar
Shucai Xiao committed
55
    instruction_ref quant_ins{};
Shucai Xiao's avatar
Shucai Xiao committed
56
    auto insert_loc = std::next(ins);
Shucai Xiao's avatar
Shucai Xiao committed
57
    if(type == shape::int8_type)
Shucai Xiao's avatar
Shucai Xiao committed
58
59
    {
        auto scaled_ins = ins;
Shucai Xiao's avatar
Shucai Xiao committed
60
        if(scale != 1.0f)
Shucai Xiao's avatar
Shucai Xiao committed
61
62
        {
            auto float_ins = scaled_ins;
Shucai Xiao's avatar
Shucai Xiao committed
63
            if(scaled_ins->get_shape().type() != shape::float_type)
Shucai Xiao's avatar
Shucai Xiao committed
64
            {
65
66
67
68
                float_ins = modl.insert_instruction(
                    insert_loc,
                    make_op("convert", {{"target_type", to_value(shape::float_type)}}),
                    scaled_ins);
Shucai Xiao's avatar
Shucai Xiao committed
69
70
            }
            std::vector<float> vec_scale(scaled_ins->get_shape().elements(), scale);
71
            auto l_scale = modl.add_literal(literal(float_ins->get_shape(), vec_scale));
72
            scaled_ins   = modl.insert_instruction(insert_loc, make_op("mul"), l_scale, float_ins);
Shucai Xiao's avatar
Shucai Xiao committed
73
74
75
        }

        auto shifted_ins = scaled_ins;
Shucai Xiao's avatar
Shucai Xiao committed
76
        if(shift != 0.0f)
Shucai Xiao's avatar
Shucai Xiao committed
77
78
        {
            auto float_ins = shifted_ins;
Shucai Xiao's avatar
Shucai Xiao committed
79
            if(shifted_ins->get_shape().type() != shape::float_type)
Shucai Xiao's avatar
Shucai Xiao committed
80
            {
81
                float_ins = modl.insert_instruction(
82
83
84
                    insert_loc,
                    make_op("convert", {{"target_type", to_value(shape::float_type)}}),
                    shifted_ins);
Shucai Xiao's avatar
Shucai Xiao committed
85
86
            }
            std::vector<float> vec_shift(shifted_ins->get_shape().elements(), shift);
87
            auto l_shift = modl.add_literal(literal(float_ins->get_shape(), vec_shift));
88
            shifted_ins  = modl.insert_instruction(insert_loc, make_op("add"), l_shift, float_ins);
Shucai Xiao's avatar
Shucai Xiao committed
89
90
        }

91
        auto rounded_ins  = modl.insert_instruction(insert_loc, make_op("round"), shifted_ins);
kahmed10's avatar
kahmed10 committed
92
        auto rounded_lens = rounded_ins->get_shape().lens();
93
94
        auto max_clip     = modl.add_literal(127.0f);
        auto min_clip     = modl.add_literal(-128.0f);
95
        max_clip          = modl.insert_instruction(
96
            insert_loc, make_op("multibroadcast", {{"out_lens", rounded_lens}}), max_clip);
97
        min_clip = modl.insert_instruction(
98
            insert_loc, make_op("multibroadcast", {{"out_lens", rounded_lens}}), min_clip);
Shucai Xiao's avatar
Shucai Xiao committed
99
        auto clipped_ins =
100
101
102
            modl.insert_instruction(insert_loc, make_op("clip"), rounded_ins, min_clip, max_clip);
        quant_ins = modl.insert_instruction(
            insert_loc, make_op("convert", {{"target_type", type}}), clipped_ins);
Shucai Xiao's avatar
Shucai Xiao committed
103
104
105
    }
    else
    {
106
107
        quant_ins =
            modl.insert_instruction(insert_loc, make_op("convert", {{"target_type", type}}), ins);
Shucai Xiao's avatar
Shucai Xiao committed
108
    }
Shucai Xiao's avatar
Shucai Xiao committed
109

Shucai Xiao's avatar
Shucai Xiao committed
110
    map_ins[ins] = quant_ins;
111

Shucai Xiao's avatar
Shucai Xiao committed
112
    return quant_ins;
113
114
}

Shucai Xiao's avatar
Shucai Xiao committed
115
116
117
118
119
// This function is to convert any instructions specified in the input
// from double or float to float16 by inserting a convert operator.
// For the conversion, there could be cases of overflowing, but it
// is very rare in the area of deeping learning, so we just do a
// truncate of the input to get the fp16.
Shucai Xiao's avatar
Shucai Xiao committed
120
void quantize_fp16(program& prog, const std::vector<std::string>& ins_names)
121
{
122
    auto* mm = prog.get_main_module();
123
    std::unordered_map<instruction_ref, instruction_ref> map_fp16;
124
    for(auto ins : iterator_for(*mm))
125
    {
126
127
128
        if(ins->name() == "@return")
            break;

129
        // all indicates every instruction is converted
Shucai Xiao's avatar
Shucai Xiao committed
130
        if((not contains(ins_names, "all")) and (not contains(ins_names, ins->name())))
131
132
133
        {
            continue;
        }
134

135
        shape::type_t orig_type = ins->get_shape().type();
Shucai Xiao's avatar
Shucai Xiao committed
136
        // process all inputs, if input is a fp32 or fp64, convert it
137
        // to a fp16 by adding a convert operator.
138
        auto inputs = ins->inputs();
139
        std::vector<instruction_ref> converted_inputs;
Shucai Xiao's avatar
Shucai Xiao committed
140
        for(auto input : inputs)
141
142
        {
            auto s = input->get_shape();
Shucai Xiao's avatar
Shucai Xiao committed
143
            if(s.type() == shape::float_type || s.type() == shape::double_type)
144
            {
145
                // if the input is a convert operator, uses its input
146
147
                // as its current input
                instruction_ref input_fp16{};
Shucai Xiao's avatar
Shucai Xiao committed
148
149
                if(input->name() == "convert" and
                   input->inputs().front()->get_shape().type() == shape::half_type)
150
151
152
153
154
                {
                    input_fp16 = input->inputs().front();
                }
                else
                {
155
                    input_fp16 = insert_quant_ins(*mm, input, shape::half_type, map_fp16);
156
                }
157
                converted_inputs.push_back(input_fp16);
158
            }
159
160
161
162
163
164
            else
            {
                converted_inputs.push_back(input);
            }
        }

165
        // no change for the input, go to the next instruction
Shucai Xiao's avatar
Shucai Xiao committed
166
        if(inputs == converted_inputs)
167
        {
168
            continue;
Shucai Xiao's avatar
Shucai Xiao committed
169
170
171
172
173
174
        }

        auto op        = ins->get_operator();
        auto ins_shape = compute_shape(op, converted_inputs);
        if(ins_shape.type() != orig_type)
        {
Shucai Xiao's avatar
Shucai Xiao committed
175
            // check the dead code case to avoid assert
176
177
178
            bool output_empty  = ins->outputs().empty();
            auto ins_orig_type = mm->insert_instruction(
                std::next(ins), make_op("convert", {{"target_type", orig_type}}), ins);
Shucai Xiao's avatar
Shucai Xiao committed
179
            if(!output_empty)
180
            {
181
                mm->replace_instruction(ins, ins_orig_type);
182
            }
183
        }
Shucai Xiao's avatar
Shucai Xiao committed
184

185
        mm->replace_instruction(ins, op, converted_inputs);
186
187
188
    }
}

189
static void ins_quantize_int8(module& modl,
Shucai Xiao's avatar
Shucai Xiao committed
190
191
192
                              instruction_ref ins,
                              std::vector<instruction_ref>& converted_inputs,
                              const std::vector<std::pair<float, float>>& ins_quant_params)
Shucai Xiao's avatar
Shucai Xiao committed
193
194
{
    auto orig_type = ins->get_shape().type();
Shucai Xiao's avatar
Shucai Xiao committed
195
    auto inputs    = ins->inputs();
Shucai Xiao's avatar
Shucai Xiao committed
196
197
    if(ins->name() == "dot")
    {
Shucai Xiao's avatar
Shucai Xiao committed
198
199
200
        auto dot_op     = any_cast<op::dot>(ins->get_operator());
        float new_alpha = dot_op.alpha / (ins_quant_params[0].first * ins_quant_params[1].first);
        float new_beta  = dot_op.beta;
Shucai Xiao's avatar
Shucai Xiao committed
201
202
203
204
205
206
        // We need additional checking about the quant_alpha value. If
        // abs(quant_alpha) > 50 (some tmp value set here), we can convert
        // it to an integer as the new_alpha in the quant_dot
        float threshold = 50.0f;
        if(fabs(new_alpha) >= threshold && fabs(new_beta) >= threshold)
        {
207
208
            int32_t quant_alpha = static_cast<int32_t>(std::round(new_alpha));
            int32_t quant_beta  = static_cast<int32_t>(std::round(new_beta));
Shucai Xiao's avatar
Shucai Xiao committed
209
210
            if(shape::int32_type == orig_type)
            {
211
                modl.replace_instruction(
212
213
214
                    ins,
                    make_op("quant_dot", {{"alpha", quant_alpha}, {"beta", quant_beta}}),
                    converted_inputs);
Shucai Xiao's avatar
Shucai Xiao committed
215
216
217
            }
            else
            {
218
                auto quant_dot = modl.insert_instruction(
219
220
221
222
223
                    ins,
                    make_op("quant_dot", {{"alpha", quant_alpha}, {"beta", quant_beta}}),
                    converted_inputs);
                modl.replace_instruction(
                    ins, make_op("convert", {{"target_type", to_value(orig_type)}}), quant_dot);
Shucai Xiao's avatar
Shucai Xiao committed
224
225
226
227
228
229
230
231
232
233
            }
        }
        // either alpha or beta cannot be quantized because of too big
        // relative rounding error
        else
        {
            if(converted_inputs.size() == 3)
            {
                converted_inputs.pop_back();
            }
234
235
236
237
            auto q_dot = modl.insert_instruction(
                ins, make_op("quant_dot", {{"alpha", 1}, {"beta", 0}}), converted_inputs);
            auto f_dot = modl.insert_instruction(
                ins, make_op("convert", {{"target_type", to_value(shape::float_type)}}), q_dot);
Shucai Xiao's avatar
Shucai Xiao committed
238
239
240
            auto c_shape = q_dot->get_shape();
            std::vector<float> vec_alpha(c_shape.elements(), new_alpha);
            auto l_alpha =
241
                modl.add_literal(literal({shape::float_type, c_shape.lens()}, vec_alpha));
Shucai Xiao's avatar
Shucai Xiao committed
242
243
244

            if(inputs.size() == 3 and dot_op.beta != 0.0f)
            {
245
                auto alpha_ab = modl.insert_instruction(ins, make_op("mul"), l_alpha, f_dot);
Shucai Xiao's avatar
Shucai Xiao committed
246
247
                std::vector<float> vec_beta(c_shape.elements(), dot_op.beta);
                auto l_beta =
248
                    modl.add_literal(literal({shape::float_type, c_shape.lens()}, vec_beta));
Shucai Xiao's avatar
Shucai Xiao committed
249
250
251
                instruction_ref beta_c{};
                if(orig_type != shape::float_type)
                {
252
253
254
255
256
                    auto fp32_c = modl.insert_instruction(
                        ins,
                        make_op("convert", {{"target_type", to_value(shape::float_type)}}),
                        inputs.back());
                    beta_c = modl.insert_instruction(ins, make_op("mul"), l_beta, fp32_c);
Shucai Xiao's avatar
Shucai Xiao committed
257
258
259
                }
                else
                {
260
                    beta_c = modl.insert_instruction(ins, make_op("mul"), l_beta, inputs.back());
Shucai Xiao's avatar
Shucai Xiao committed
261
262
263
264
                }

                if(orig_type == shape::float_type)
                {
265
                    modl.replace_instruction(ins, make_op("add"), alpha_ab, beta_c);
Shucai Xiao's avatar
Shucai Xiao committed
266
267
268
                }
                else
                {
269
270
271
                    auto f_res = modl.insert_instruction(ins, make_op("add"), alpha_ab, beta_c);
                    modl.replace_instruction(
                        ins, make_op("convert", {{"target_type", to_value(orig_type)}}), f_res);
Shucai Xiao's avatar
Shucai Xiao committed
272
273
274
275
276
277
                }
            }
            else
            {
                if(orig_type == shape::float_type)
                {
278
                    modl.replace_instruction(ins, make_op("mul"), l_alpha, f_dot);
Shucai Xiao's avatar
Shucai Xiao committed
279
280
281
                }
                else
                {
282
283
284
                    auto alpha_ab = modl.insert_instruction(ins, make_op("mul"), l_alpha, f_dot);
                    modl.replace_instruction(
                        ins, make_op("convert", {{"target_type", to_value(orig_type)}}), alpha_ab);
Shucai Xiao's avatar
Shucai Xiao committed
285
286
287
288
289
290
291
292
                }
            }
        }
    }
    else if(ins->name() == "convolution")
    {
        // Current MIOpen convolution does not support alpha and beta,
        // so we need a separate multiply to adjust the output
Shucai Xiao's avatar
Shucai Xiao committed
293
294
295
296
297
298
        auto conv_op       = any_cast<op::convolution>(ins->get_operator());
        auto padding       = conv_op.padding;
        auto stride        = conv_op.stride;
        auto dilation      = conv_op.dilation;
        auto padding_mode  = conv_op.padding_mode;
        auto group         = conv_op.group;
299
        auto adjust_factor = 1.0f / (ins_quant_params[0].first * ins_quant_params[1].first);
Shucai Xiao's avatar
Shucai Xiao committed
300

301
        auto quant_conv = modl.insert_instruction(
Shucai Xiao's avatar
Shucai Xiao committed
302
303
304
305
306
307
308
            ins,
            op::quant_convolution{padding, stride, dilation, padding_mode, group},
            converted_inputs);
        float threshold = 50.0f;
        std::vector<float> vec_factor(quant_conv->get_shape().elements(), adjust_factor);
        if(quant_conv->get_shape().type() == orig_type and adjust_factor >= threshold)
        {
309
            auto l_factor = modl.add_literal(
Shucai Xiao's avatar
Shucai Xiao committed
310
                literal(quant_conv->get_shape(), vec_factor.begin(), vec_factor.end()));
311
            modl.replace_instruction(ins, make_op("mul"), quant_conv, l_factor);
Shucai Xiao's avatar
Shucai Xiao committed
312
313
314
315
316
        }
        // convert quant_conv output to float type, multiply the factor and
        // conver back to original type
        else
        {
317
318
319
320
            auto float_conv = modl.insert_instruction(
                ins,
                make_op("convert", {{"target_type", to_value(shape::float_type)}}),
                quant_conv);
321
            auto l_factor = modl.add_literal(literal(float_conv->get_shape(), vec_factor));
Shucai Xiao's avatar
Shucai Xiao committed
322
323
            if(orig_type == shape::float_type)
            {
324
                modl.replace_instruction(ins, make_op("mul"), l_factor, float_conv);
Shucai Xiao's avatar
Shucai Xiao committed
325
326
327
            }
            else
            {
328
329
330
331
                auto adjusted_conv =
                    modl.insert_instruction(ins, make_op("mul"), l_factor, float_conv);
                modl.replace_instruction(
                    ins, make_op("convert", {{"target_type", to_value(orig_type)}}), adjusted_conv);
Shucai Xiao's avatar
Shucai Xiao committed
332
333
334
335
336
            }
        }
    }
    else
    {
337
        MIGRAPHX_THROW("QUANTIZE_INT8: does not support operator " + ins->name());
Shucai Xiao's avatar
Shucai Xiao committed
338
339
340
    }
}

Shucai Xiao's avatar
Shucai Xiao committed
341
342
343
344
// int8 quantization is different from fp16 since int8 can only handle value
// -128 ~ 127. To convert the float or double to int8, we need a scale and
// a shift, then the convert can be done as v_int8 = fp * scale + shift.
// To simplify the changes, we consider shift as 0.0f for now.
Shucai Xiao's avatar
Shucai Xiao committed
345
void quantize_int8_impl(program& prog,
Shucai Xiao's avatar
Shucai Xiao committed
346
347
                        const std::vector<std::pair<float, float>>& quant_params,
                        const std::vector<std::string>& ins_names)
Shucai Xiao's avatar
Shucai Xiao committed
348
{
Shucai Xiao's avatar
Shucai Xiao committed
349
350
351
    if(enabled(MIGRAPHX_INT8_QUANTIZATION_PARAMS{}))
    {
        for(std::size_t i = 0; i < quant_params.size(); ++i)
352
353
        {
            auto param = quant_params.at(i);
Shucai Xiao's avatar
Shucai Xiao committed
354
355
            std::cout << "ins_index = " << i << ", scale = " << param.first
                      << ", shift = " << param.second << std::endl;
356
357
358
359
        }
        std::cout << std::endl;
    }

Shucai Xiao's avatar
Shucai Xiao committed
360
    // For now, we only support the int8 quantization of gemm and convolution
Shucai Xiao's avatar
Shucai Xiao committed
361
362
    std::set<std::string> op_names = {"convolution", "dot"};
    std::set<std::string> input_ins_names(ins_names.begin(), ins_names.end());
Shucai Xiao's avatar
Shucai Xiao committed
363
364
    if(!std::includes(
           op_names.begin(), op_names.end(), input_ins_names.begin(), input_ins_names.end()))
Shucai Xiao's avatar
Shucai Xiao committed
365
366
367
368
    {
        MIGRAPHX_THROW("QUANTIZE_INT8: only support DOT and CONVOLUTION operation");
    }

369
    auto* mm                      = prog.get_main_module();
Shucai Xiao's avatar
Shucai Xiao committed
370
371
    std::size_t quant_param_index = 0;
    std::unordered_map<instruction_ref, instruction_ref> map_quant_ins;
Shucai Xiao's avatar
Shucai Xiao committed
372
    std::unordered_map<instruction_ref, std::size_t> map_ins_index;
373
    for(auto ins : iterator_for(*mm))
Shucai Xiao's avatar
Shucai Xiao committed
374
    {
375
376
377
        if(ins->name() == "@return")
            break;

Shucai Xiao's avatar
Shucai Xiao committed
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
        if(not contains(ins_names, ins->name()))
        {
            continue;
        }

        // for the dot operator, there could be 2 or 3 input arguments
        // if the 3rd argument is available, convert it to an int32.
        std::vector<instruction_ref> converted_inputs;

        // process all inputs, if input is a fp32 or fp64, convert it
        // to a int8 type by adding a convert operator and replace
        // the operator with the corresponding int8 version
        auto inputs = ins->inputs();
        std::vector<std::pair<float, float>> ins_quant_params;
        for(auto input : inputs)
        {
            // calculate the index of each instruction to be quantized
Shucai Xiao's avatar
Shucai Xiao committed
395
396
            std::size_t ins_index =
                (map_ins_index.count(input) > 0) ? map_ins_index[input] : quant_param_index++;
Shucai Xiao's avatar
Shucai Xiao committed
397
398
399
            map_ins_index[input] = ins_index;

            auto param = quant_params[map_ins_index[input]];
Shucai Xiao's avatar
Shucai Xiao committed
400
401
402
403
404
405
            ins_quant_params.push_back(param);

            // In general, the target_type is int8, but for the dot
            // operation, if it has 3 inputs, then the last one should
            // be converted to int32_type
            shape::type_t quant_type = shape::int8_type;
Shucai Xiao's avatar
Shucai Xiao committed
406
            if((ins->name() == "dot") and (inputs.size() == 3) and (input == inputs.back()))
Shucai Xiao's avatar
Shucai Xiao committed
407
408
409
410
411
            {
                quant_type = shape::int32_type;
            }

            auto s = input->get_shape();
Shucai Xiao's avatar
Shucai Xiao committed
412
            if((s.type() == shape::float_type or s.type() == shape::double_type or
413
                s.type() == shape::half_type or s.type() == shape::int32_type) and
Shucai Xiao's avatar
Shucai Xiao committed
414
415
416
417
418
               s.type() != quant_type)
            {
                // if the input is a convert operator, uses its input
                // as its current input
                instruction_ref quant_input{};
Shucai Xiao's avatar
Shucai Xiao committed
419
420
                if(input->name() == "convert" and
                   input->inputs().front()->get_shape().type() == quant_type)
Shucai Xiao's avatar
Shucai Xiao committed
421
                {
Shucai Xiao's avatar
Shucai Xiao committed
422
                    quant_input = input->inputs().front();
423
424
                    // the scale in this case is not used, so tune the scale
                    // to 1.0f for this parameter
Shucai Xiao's avatar
Shucai Xiao committed
425
                    ins_quant_params.back() = std::pair<float, float>(1.0f, 0.0f);
Shucai Xiao's avatar
Shucai Xiao committed
426
427
428
                }
                else
                {
Shucai Xiao's avatar
Shucai Xiao committed
429
                    quant_input = insert_quant_ins(
430
                        *mm, input, quant_type, map_quant_ins, param.first, param.second);
Shucai Xiao's avatar
Shucai Xiao committed
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
                }
                converted_inputs.push_back(quant_input);
            }
            else
            {
                converted_inputs.push_back(input);
            }
        }

        // no change for the input, go to the next instruction
        if(inputs == converted_inputs)
        {
            continue;
        }

446
        ins_quantize_int8(*mm, ins, converted_inputs, ins_quant_params);
Shucai Xiao's avatar
Shucai Xiao committed
447
448
449
450
451
452
453
454
    }

    if(quant_param_index != quant_params.size())
    {
        MIGRAPHX_THROW("QUANTIZE_INT8: number of scales does not match");
    }
}

Shucai Xiao's avatar
Shucai Xiao committed
455
456
void quantize_int8(program& prog,
                   const target& t,
457
                   const std::vector<parameter_map>& calibration,
Shucai Xiao's avatar
Shucai Xiao committed
458
                   const std::vector<std::string>& ins_names)
Shucai Xiao's avatar
Shucai Xiao committed
459
{
460
    // insert capture operator
Shucai Xiao's avatar
Shucai Xiao committed
461
    auto cap_prog          = prog;
462
463
464
465
466
    auto int8_quant_params = capture_arguments(cap_prog, t, ins_names);

    // use the calibration data to compute the quantization scale
    cap_prog.compile(t);

Shucai Xiao's avatar
Shucai Xiao committed
467
    // use all calibration data to run the program to calculate the
468
    // quantization scale and shift
Shucai Xiao's avatar
Shucai Xiao committed
469
    for(auto&& arg : calibration)
470
    {
471
        parameter_map m;
Shucai Xiao's avatar
Shucai Xiao committed
472
        for(auto&& x : cap_prog.get_parameter_shapes())
473
        {
Shucai Xiao's avatar
Shucai Xiao committed
474
            if(arg.count(x.first) > 0)
475
            {
476
477
                assert(x.second == arg.at(x.first).get_shape());
                m[x.first] = t.copy_to(arg.at(x.first));
478
479
480
481
482
483
484
485
486
            }
            else
            {
                m[x.first] = t.allocate(x.second);
            }
        }
        cap_prog.eval(m);
    }

Shucai Xiao's avatar
Shucai Xiao committed
487
    quantize_int8_impl(prog, *int8_quant_params, ins_names);
Shucai Xiao's avatar
Shucai Xiao committed
488
489
}

Shucai Xiao's avatar
Shucai Xiao committed
490
491
// For the input of each input argument, we need to insert a
// capture operator to compute the scale and shift
Shucai Xiao's avatar
Shucai Xiao committed
492
std::size_t capture_arguments(program& prog,
Shucai Xiao's avatar
Shucai Xiao committed
493
494
                              const std::vector<std::string>& ins_names,
                              const std::function<void(std::size_t, std::vector<argument>)>& func)
Shucai Xiao's avatar
Shucai Xiao committed
495
{
496
    auto* mm                = prog.get_main_module();
Shucai Xiao's avatar
Shucai Xiao committed
497
    size_t num_quant_params = 0;
Shucai Xiao's avatar
Shucai Xiao committed
498
    // the int8 quantization only support dot and convolution
Shucai Xiao's avatar
Shucai Xiao committed
499
    std::set<std::string> op_names = {"dot", "convolution"};
Shucai Xiao's avatar
Shucai Xiao committed
500
    std::set<std::string> input_ins_names(ins_names.begin(), ins_names.end());
Shucai Xiao's avatar
Shucai Xiao committed
501
502
    if(!std::includes(
           op_names.begin(), op_names.end(), input_ins_names.begin(), input_ins_names.end()))
Shucai Xiao's avatar
Shucai Xiao committed
503
504
505
506
507
    {
        MIGRAPHX_THROW("CAPTURE_ARGUMENTS: input operator is not supported");
    }

    std::unordered_map<instruction_ref, instruction_ref> ins_map;
508
    for(auto ins : iterator_for(*mm))
Shucai Xiao's avatar
Shucai Xiao committed
509
    {
Shucai Xiao's avatar
Shucai Xiao committed
510
        if(not contains(ins_names, ins->name()))
Shucai Xiao's avatar
Shucai Xiao committed
511
512
513
514
515
516
        {
            continue;
        }

        auto inputs = ins->inputs();
        std::vector<instruction_ref> new_args;
Shucai Xiao's avatar
Shucai Xiao committed
517
        for(auto input : inputs)
Shucai Xiao's avatar
Shucai Xiao committed
518
519
        {
            instruction_ref new_ins{};
Shucai Xiao's avatar
Shucai Xiao committed
520
            if(ins_map.count(input) > 0)
Shucai Xiao's avatar
Shucai Xiao committed
521
522
523
524
525
            {
                new_ins = ins_map[input];
            }
            else
            {
526
                new_ins = mm->insert_instruction(
Shucai Xiao's avatar
Shucai Xiao committed
527
                    std::next(input), op::capture{num_quant_params++, func}, input);
Shucai Xiao's avatar
Shucai Xiao committed
528
529
530
531
532
533
                ins_map[input] = new_ins;
            }
            new_args.push_back(new_ins);
        }
        instruction::replace(ins, ins->get_operator(), ins->get_shape(), new_args);
    }
Shucai Xiao's avatar
Shucai Xiao committed
534

Shucai Xiao's avatar
Shucai Xiao committed
535
    return num_quant_params;
Shucai Xiao's avatar
Shucai Xiao committed
536
537
}

Shucai Xiao's avatar
Shucai Xiao committed
538
std::shared_ptr<std::vector<std::pair<float, float>>>
Shucai Xiao's avatar
Shucai Xiao committed
539
capture_arguments_impl(program& prog, const target& t, const std::vector<std::string>& ins_names)
Shucai Xiao's avatar
Shucai Xiao committed
540
{
Shucai Xiao's avatar
Shucai Xiao committed
541
542
543
544
    std::shared_ptr<std::vector<std::pair<float, float>>> int8_quant_params =
        std::make_shared<std::vector<std::pair<float, float>>>();
    std::shared_ptr<std::vector<float>> max_abs_vals = std::make_shared<std::vector<float>>();

Shucai Xiao's avatar
Shucai Xiao committed
545
546
    auto calc_quant_params = [int8_quant_params, max_abs_vals, &t](std::size_t ins_index,
                                                                   std::vector<argument> args) {
Shucai Xiao's avatar
Shucai Xiao committed
547
        std::pair<float, float> param_pair{64.0f, 0.0f};
548
549
550
551

        // scale and shift is need for only int8 type, and we do not
        // consider shift, so set shift to 0
        std::vector<float> vec_val;
Shucai Xiao's avatar
Shucai Xiao committed
552
        argument arg = t.copy_from(args.front());
Shucai Xiao's avatar
Shucai Xiao committed
553
        arg.visit([&](auto output) { vec_val.assign(output.begin(), output.end()); });
Shucai Xiao's avatar
Shucai Xiao committed
554
555
556
        auto max_val                = *std::max_element(vec_val.begin(), vec_val.end());
        auto min_val                = *std::min_element(vec_val.begin(), vec_val.end());
        auto max_abs                = std::max(std::fabs(max_val), std::fabs(min_val));
Shucai Xiao's avatar
Shucai Xiao committed
557
        max_abs_vals->at(ins_index) = std::max(max_abs_vals->at(ins_index), max_abs);
558

Shucai Xiao's avatar
Shucai Xiao committed
559
        // if all values are 0, no need to do scaling
Shucai Xiao's avatar
Shucai Xiao committed
560
        if(max_abs_vals->at(ins_index) == 0.0f)
Shucai Xiao's avatar
Shucai Xiao committed
561
562
563
564
565
566
567
        {
            param_pair.first = 1.0f;
        }
        else
        {
            param_pair.first = 127.0f / max_abs_vals->at(ins_index);
        }
Shucai Xiao's avatar
Shucai Xiao committed
568
        int8_quant_params->at(ins_index) = param_pair;
569
570
    };

Shucai Xiao's avatar
Shucai Xiao committed
571
572
    auto num_params = capture_arguments(prog, ins_names, calc_quant_params);

Shucai Xiao's avatar
Shucai Xiao committed
573
    int8_quant_params->resize(num_params, std::pair<float, float>(64.0f, 0.0f));
Shucai Xiao's avatar
Shucai Xiao committed
574
575
576
    max_abs_vals->resize(num_params, 0.0f);

    return int8_quant_params;
Shucai Xiao's avatar
Shucai Xiao committed
577
578
}

579
580
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx