quantization.cpp 7.3 KB
Newer Older
1
2
3
/*
 * The MIT License (MIT)
 *
4
 * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
24
25
#include <migraphx/float_equal.hpp>
#include <migraphx/instruction_ref.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
26
#include <migraphx/quantization.hpp>
27
#include <migraphx/quantize_fp16.hpp>
28
#include <migraphx/quantize_8bits.hpp>
29
30
31
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
32
#include <migraphx/optimize_module.hpp>
33
#include <migraphx/dead_code_elimination.hpp>
34
35
36
37
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/stringutils.hpp>
38
#include <migraphx/op/capture.hpp>
39
#include <migraphx/ranges.hpp>
40
#include <migraphx/target.hpp>
41
#include <migraphx/make_op.hpp>
42
43
#include <migraphx/pass_manager.hpp>
#include <set>
44
45
46
47

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

48
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_8BITS_QUANTIZATION_PARAMS)
Shucai Xiao's avatar
Shucai Xiao committed
49

Shucai Xiao's avatar
Shucai Xiao committed
50
51
// This function is to convert any instructions specified in the input
// from double or float to float16 by inserting a convert operator.
52
53
54
// For the conversion, there could be cases of overflowing or underflowing, but it
// is uncommon. Run optimize_module() before converting to fp16 to const eval and fold in FP32 to
// avoid loss of precision.
Shucai Xiao's avatar
Shucai Xiao committed
55
void quantize_fp16(program& prog, const std::vector<std::string>& ins_names)
56
{
57
    run_passes(prog, {optimize_module{}, quantize_fp16_pass{ins_names}, optimize_module{}});
Shucai Xiao's avatar
Shucai Xiao committed
58
59
}

60
61
62
63
64
void quantize_8bits(program& prog,
                    const target& t,
                    shape::type_t precision,
                    const std::vector<parameter_map>& calibration,
                    const std::vector<std::string>& ins_names)
Shucai Xiao's avatar
Shucai Xiao committed
65
{
66
    // Run optimize_module() before converting to int8/fp8 to const eval and fold in FP32 to
67
68
69
    // avoid loss of precision.
    run_passes(prog, {optimize_module{}});

70
    std::shared_ptr<std::vector<std::pair<float, float>>> quant_8bit_params =
Shucai Xiao's avatar
Shucai Xiao committed
71
72
73
        std::make_shared<std::vector<std::pair<float, float>>>();
    std::shared_ptr<std::vector<float>> max_abs_vals = std::make_shared<std::vector<float>>();

74
75
    float quantized_range  = (precision == shape::type_t::int8_type) ? 127.0 : 240.0;
    auto calc_quant_params = [&](std::size_t ins_index, std::vector<argument> args) {
Shucai Xiao's avatar
Shucai Xiao committed
76
        std::pair<float, float> param_pair{64.0f, 0.0f};
77
78
79
        // scale and shift is need for only int8 type, and we do not
        // consider shift, so set shift to 0
        std::vector<float> vec_val;
Shucai Xiao's avatar
Shucai Xiao committed
80
        argument arg = t.copy_from(args.front());
Shucai Xiao's avatar
Shucai Xiao committed
81
        arg.visit([&](auto output) { vec_val.assign(output.begin(), output.end()); });
Shucai Xiao's avatar
Shucai Xiao committed
82
83
84
        auto max_val                = *std::max_element(vec_val.begin(), vec_val.end());
        auto min_val                = *std::min_element(vec_val.begin(), vec_val.end());
        auto max_abs                = std::max(std::fabs(max_val), std::fabs(min_val));
Shucai Xiao's avatar
Shucai Xiao committed
85
        max_abs_vals->at(ins_index) = std::max(max_abs_vals->at(ins_index), max_abs);
Shucai Xiao's avatar
Shucai Xiao committed
86
        // if all values are 0, no need to do scaling
87
        if(float_equal(max_abs_vals->at(ins_index), 0.0f))
Shucai Xiao's avatar
Shucai Xiao committed
88
89
90
91
92
        {
            param_pair.first = 1.0f;
        }
        else
        {
93
            param_pair.first = quantized_range / max_abs_vals->at(ins_index);
Shucai Xiao's avatar
Shucai Xiao committed
94
        }
95
        quant_8bit_params->at(ins_index) = param_pair;
96
97
    };

98
99
100
    // pass to add capture argument op
    std::size_t param_num = 0;
    run_passes(prog, {capture_arguments_pass{ins_names, calc_quant_params, &param_num}});
101
    quant_8bit_params->resize(param_num, std::pair<float, float>(64.0f, 0.0f));
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
    max_abs_vals->resize(param_num, 0.0f);

    // use the calibration data to compute the quantization scale
    auto capture_prog = prog;
    capture_prog.compile(t);

    // use all calibration data to run the program to calculate the
    // quantization scale and shift
    for(auto&& arg : calibration)
    {
        parameter_map m;
        for(auto&& x : capture_prog.get_parameter_shapes())
        {
            if(arg.count(x.first) > 0)
            {
                assert(x.second == arg.at(x.first).get_shape());
                m[x.first] = t.copy_to(arg.at(x.first));
            }
            else
            {
                m[x.first] = t.allocate(x.second);
            }
        }
        capture_prog.eval(m);
    }
Shucai Xiao's avatar
Shucai Xiao committed
127

128
    // print the quantization parameters in only the main module
129
    if(enabled(MIGRAPHX_8BITS_QUANTIZATION_PARAMS{}))
130
    {
131
        for(std::size_t i = 0; i < quant_8bit_params->size(); ++i)
132
        {
133
            auto param = quant_8bit_params->at(i);
134
135
136
137
138
            std::cout << "ins_index = " << i << ", scale = " << param.first
                      << ", shift = " << param.second << std::endl;
        }
        std::cout << std::endl;
    }
Shucai Xiao's avatar
Shucai Xiao committed
139

140
    run_passes(prog,
141
               {quantize_8bits_pass{precision, ins_names, *quant_8bit_params},
142
                simplify_qdq{},
143
                optimize_module{},
144
                dead_code_elimination{}});
Shucai Xiao's avatar
Shucai Xiao committed
145
146
}

147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
void quantize_int8(program& prog,
                   const target& t,
                   const std::vector<parameter_map>& calibration,
                   const std::vector<std::string>& ins_names)
{
    std::set<std::string> op_names = {"convolution", "dot"};
    std::set<std::string> input_ins_names(ins_names.begin(), ins_names.end());
    if(not std::includes(
           op_names.begin(), op_names.end(), input_ins_names.begin(), input_ins_names.end()))
    {
        MIGRAPHX_THROW("QUANTIZE_INT8: only support DOT and CONVOLUTION operation");
    }
    quantize_8bits(prog, t, shape::int8_type, calibration, ins_names);
}

void quantize_fp8(program& prog, const target& t, const std::vector<parameter_map>& calibration)
{
    std::cout << "[Warning] : MIGraphX has BETA support for FP8. Using FP8 may result in "
                 "incorrect final outputs\n";

    std::vector<std::string> supported_ins_names;
    auto* mm                                     = prog.get_main_module();
    for(auto ins : iterator_for(*mm))
    {
        if(ins->name() == "convert")
        {
            continue;
        }
        else if(not starts_with(ins->name(), "@"))
        {
            supported_ins_names.push_back(ins->name());
        }
    }
    quantize_8bits(prog, t, shape::fp8e4m3fnuz_type, calibration, supported_ins_names);
}
182
183
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx