quantization.cpp 7.25 KB
Newer Older
Shucai Xiao's avatar
Shucai Xiao committed
1
#include <migraphx/quantization.hpp>
2
3
4
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
5
#include <migraphx/op/convert.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
6
7
8
#include <migraphx/op/dot.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/op/add.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
9
#include <migraphx/op/quant_dot.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
10
#include <migraphx/op/capture.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
11
#include <migraphx/op/convolution.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
12
#include <migraphx/op/quant_convolution.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
13
#include <migraphx/op/multibroadcast.hpp>
14
#include <migraphx/stringutils.hpp>
15
#include <migraphx/ranges.hpp>
16
#include <utility>
17
18
#include <iomanip>
#include <fstream>
19
20
21
22

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

Shucai Xiao's avatar
Shucai Xiao committed
23
24
25
instruction_ref insert_quant_ins(program& prog,
                                 instruction_ref& ins,
                                 shape::type_t type,
Shucai Xiao's avatar
Shucai Xiao committed
26
                                 std::unordered_map<instruction_ref, instruction_ref>& map_ins)
27
{
Shucai Xiao's avatar
Shucai Xiao committed
28
    if(map_ins.count(ins) > 0)
29
    {
Shucai Xiao's avatar
Shucai Xiao committed
30
31
32
33
34
35
        return map_ins[ins];
    }

    if(ins->name() == "undefined")
    {
        return ins;
36
37
    }

Shucai Xiao's avatar
Shucai Xiao committed
38
    assert(ins->get_shape().type() == shape::float_type ||
Shucai Xiao's avatar
Shucai Xiao committed
39
40
41
           ins->get_shape().type() == shape::double_type ||
           ins->get_shape().type() == shape::int32_type);
    instruction_ref quant_ins{};
Shucai Xiao's avatar
Shucai Xiao committed
42
    quant_ins    = prog.insert_instruction(std::next(ins), op::convert{type}, ins);
Shucai Xiao's avatar
Shucai Xiao committed
43
    map_ins[ins] = quant_ins;
44

Shucai Xiao's avatar
Shucai Xiao committed
45
    return quant_ins;
46
47
}

Shucai Xiao's avatar
Shucai Xiao committed
48
49
50
51
52
// This function is to convert any instructions specified in the input
// from double or float to float16 by inserting a convert operator.
// For the conversion, there could be cases of overflowing, but it
// is very rare in the area of deeping learning, so we just do a
// truncate of the input to get the fp16.
53
void quantize(program& prog, const std::vector<std::string>& ins_names)
54
{
55
    std::unordered_map<instruction_ref, instruction_ref> map_fp16;
Shucai Xiao's avatar
Shucai Xiao committed
56
    for(auto ins : iterator_for(prog))
57
    {
58
        // all indicates every instruction is converted
Shucai Xiao's avatar
Shucai Xiao committed
59
        if((not contains(ins_names, "all")) and (not contains(ins_names, ins->name())))
60
61
62
        {
            continue;
        }
63

64
        shape::type_t orig_type = ins->get_shape().type();
Shucai Xiao's avatar
Shucai Xiao committed
65
        // process all inputs, if input is a fp32 or fp64, convert it
66
        // to a fp16 by adding a convert operator.
67
        auto inputs = ins->inputs();
68
        std::vector<instruction_ref> converted_inputs;
Shucai Xiao's avatar
Shucai Xiao committed
69
        for(auto input : inputs)
70
71
        {
            auto s = input->get_shape();
Shucai Xiao's avatar
Shucai Xiao committed
72
            if(s.type() == shape::float_type || s.type() == shape::double_type)
73
            {
74
                // if the input is a convert operator, uses its input
75
76
                // as its current input
                instruction_ref input_fp16{};
77
                if(input->name() == "convert")
78
79
80
81
82
                {
                    input_fp16 = input->inputs().front();
                }
                else
                {
Shucai Xiao's avatar
Shucai Xiao committed
83
                    input_fp16 = insert_quant_ins(prog, input, shape::half_type, map_fp16);
84
                }
85
                converted_inputs.push_back(input_fp16);
86
            }
87
88
89
90
91
92
            else
            {
                converted_inputs.push_back(input);
            }
        }

93
        // no change for the input, go to the next instruction
Shucai Xiao's avatar
Shucai Xiao committed
94
        if(inputs == converted_inputs)
95
        {
96
            continue;
Shucai Xiao's avatar
Shucai Xiao committed
97
98
99
100
101
102
        }

        auto op        = ins->get_operator();
        auto ins_shape = compute_shape(op, converted_inputs);
        if(ins_shape.type() != orig_type)
        {
Shucai Xiao's avatar
Shucai Xiao committed
103
104
105
106
107
            // check the dead code case to avoid assert
            bool output_empty = ins->outputs().empty();
            auto ins_orig_type =
                prog.insert_instruction(std::next(ins), op::convert{orig_type}, ins);
            if(!output_empty)
108
            {
Shucai Xiao's avatar
Shucai Xiao committed
109
                prog.replace_instruction(ins, ins_orig_type);
110
            }
111
        }
Shucai Xiao's avatar
Shucai Xiao committed
112
113

        prog.replace_instruction(ins, op, converted_inputs);
114
115
116
    }
}

Shucai Xiao's avatar
Shucai Xiao committed
117
void quantize(program& prog) { quantize(prog, {"all"}); }
Shucai Xiao's avatar
Shucai Xiao committed
118

Shucai Xiao's avatar
Shucai Xiao committed
119
120
// For the input of each input argument, we need to insert a
// capture operator to compute the scale and shift
Shucai Xiao's avatar
Shucai Xiao committed
121
std::size_t capture_arguments(program& prog,
Shucai Xiao's avatar
Shucai Xiao committed
122
123
                              const std::vector<std::string>& ins_names,
                              const std::function<void(std::size_t, std::vector<argument>)>& func)
Shucai Xiao's avatar
Shucai Xiao committed
124
{
125

Shucai Xiao's avatar
Shucai Xiao committed
126
    size_t num_quant_params = 0;
Shucai Xiao's avatar
Shucai Xiao committed
127
    // the int8 quantization only support dot and convolution
128
    std::vector<std::string> op_names = {"dot", "convolution"};
Shucai Xiao's avatar
Shucai Xiao committed
129
130
131
    if(!std::all_of(ins_names.begin(), ins_names.end(), [&](auto name) {
           return std::find(op_names.begin(), op_names.end(), name) != op_names.end();
       }))
Shucai Xiao's avatar
Shucai Xiao committed
132
133
134
135
136
137
138
    {
        MIGRAPHX_THROW("CAPTURE_ARGUMENTS: input operator is not supported");
    }

    std::unordered_map<instruction_ref, instruction_ref> ins_map;
    for(auto ins : iterator_for(prog))
    {
Shucai Xiao's avatar
Shucai Xiao committed
139
        if(not contains(ins_names, ins->name()))
Shucai Xiao's avatar
Shucai Xiao committed
140
141
142
143
144
145
        {
            continue;
        }

        auto inputs = ins->inputs();
        std::vector<instruction_ref> new_args;
Shucai Xiao's avatar
Shucai Xiao committed
146
        for(auto input : inputs)
Shucai Xiao's avatar
Shucai Xiao committed
147
148
        {
            instruction_ref new_ins{};
Shucai Xiao's avatar
Shucai Xiao committed
149
            if(ins_map.count(input) > 0)
Shucai Xiao's avatar
Shucai Xiao committed
150
151
152
153
154
            {
                new_ins = ins_map[input];
            }
            else
            {
Shucai Xiao's avatar
Shucai Xiao committed
155
                new_ins = prog.insert_instruction(
Shucai Xiao's avatar
Shucai Xiao committed
156
                    std::next(input), op::capture{num_quant_params++, func}, input);
Shucai Xiao's avatar
Shucai Xiao committed
157
158
159
160
161
162
                ins_map[input] = new_ins;
            }
            new_args.push_back(new_ins);
        }
        instruction::replace(ins, ins->get_operator(), ins->get_shape(), new_args);
    }
Shucai Xiao's avatar
Shucai Xiao committed
163

Shucai Xiao's avatar
Shucai Xiao committed
164
    return num_quant_params;
Shucai Xiao's avatar
Shucai Xiao committed
165
166
}

Shucai Xiao's avatar
Shucai Xiao committed
167
168
std::shared_ptr<std::vector<std::pair<float, float>>>
capture_arguments(program& prog, const std::vector<std::string>& ins_names)
Shucai Xiao's avatar
Shucai Xiao committed
169
{
Shucai Xiao's avatar
Shucai Xiao committed
170
171
172
173
    std::shared_ptr<std::vector<std::pair<float, float>>> int8_quant_params =
        std::make_shared<std::vector<std::pair<float, float>>>();
    std::shared_ptr<std::vector<float>> max_abs_vals = std::make_shared<std::vector<float>>();

Shucai Xiao's avatar
Shucai Xiao committed
174
175
    auto calc_quant_params = [int8_quant_params, max_abs_vals](
                                 std::size_t ins_index, std::vector<migraphx::argument> args) {
Shucai Xiao's avatar
Shucai Xiao committed
176
        std::pair<float, float> param_pair{64.0f, 0.0f};
177
178
179
180
181

        // scale and shift is need for only int8 type, and we do not
        // consider shift, so set shift to 0
        std::vector<float> vec_val;
        args.front().visit([&](auto output) { vec_val.assign(output.begin(), output.end()); });
Shucai Xiao's avatar
Shucai Xiao committed
182
183
184
        auto max_val                = *std::max_element(vec_val.begin(), vec_val.end());
        auto min_val                = *std::min_element(vec_val.begin(), vec_val.end());
        auto max_abs                = std::max(std::fabs(max_val), std::fabs(min_val));
Shucai Xiao's avatar
Shucai Xiao committed
185
        max_abs_vals->at(ins_index) = std::max(max_abs_vals->at(ins_index), max_abs);
186

Shucai Xiao's avatar
Shucai Xiao committed
187
        param_pair.first                 = 127.0f / max_abs_vals->at(ins_index);
Shucai Xiao's avatar
Shucai Xiao committed
188
        int8_quant_params->at(ins_index) = param_pair;
189
190
    };

Shucai Xiao's avatar
Shucai Xiao committed
191
192
193
194
195
196
    auto num_params = capture_arguments(prog, ins_names, calc_quant_params);

    int8_quant_params->resize(num_params, std::make_pair<float, float>(64.0f, 0.0f));
    max_abs_vals->resize(num_params, 0.0f);

    return int8_quant_params;
Shucai Xiao's avatar
Shucai Xiao committed
197
198
}

Shucai Xiao's avatar
Shucai Xiao committed
199
std::shared_ptr<std::vector<std::pair<float, float>>> capture_arguments(program& prog)
200
201
{
    std::vector<std::string> ins_names = {"dot", "convolution"};
Shucai Xiao's avatar
Shucai Xiao committed
202
    return capture_arguments(prog, ins_names);
203
204
}

205
206
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx