recover deleted changes

e52638e5 · Shucai Xiao · 4b24e949 · e52638e5 · e52638e5 · e52638e5
Commit e52638e5 authored Aug 01, 2019 by Shucai Xiao
Showing with 228 additions and 1 deletion

src/include/migraphx/quantization.hpp src/include/migraphx/quantization.hpp +6 -0

src/py/migraphx_py.cpp src/py/migraphx_py.cpp +11 -0

src/quantization.cpp src/quantization.cpp +211 -1

No files found.
--- a/src/include/migraphx/quantization.hpp
+++ b/src/include/migraphx/quantization.hpp
@@ -23,6 +23,12 @@ void capture_arguments(program& prog,
 void capture_arguments(program& prog, const std::vector<std::string>& ins_names);
 void capture_arguments(program& prog);

+void quantize_int8(program& prog);
+void quantize_int8(program& prog, const std::vector<std::string>& ins_names);
+void quantize_int8(program& prog,
+                   const std::vector<std::string>& ins_names,
+                   const std::vector<std::pair<float, float>>& quant_params);
+
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx


--- a/src/py/migraphx_py.cpp
+++ b/src/py/migraphx_py.cpp
@@ -187,6 +187,17 @@ PYBIND11_MODULE(migraphx, m)
        migraphx::quantize(p, ins_names);
    });
    m.def("quantize", [](migraphx::program& p) { migraphx::quantize(p, {"all"}); });
+    m.def("quantize_int8", [](migraphx::program& p, std::vector<std::string>& ins_names) {
+        migraphx::quantize_int8(p, ins_names);
+    });
+    m.def("quantize_int8",
+          [](migraphx::program& p,
+             std::vector<std::string>& ins_names,
+             std::vector<std::pair<float, float>>& quant_params) {
+              migraphx::quantize_int8(p, ins_names, quant_params);
+          });
+    m.def("quantize_int8", [](migraphx::program& p) { migraphx::quantize_int8(p); });
+
    m.def("capture_arguments", [](migraphx::program& p, const std::vector<std::string>& ins_names) {
        migraphx::capture_arguments(p, ins_names);
    });

--- a/src/quantization.cpp
+++ b/src/quantization.cpp
@@ -123,6 +123,216 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)

 void quantize(program& prog) { quantize(prog, {"all"}); }

+// int8 quantization is different from fp16 since int8 can only handle value
+// -128 ~ 127. To convert the float or double to int8, we need a scale and
+// a shift, then the convert can be done as v_int8 = fp * scale + shift.
+// To simplify the changes, we consider shift as 0.0f for now.
+void quantize_int8(program& prog,
+                   const std::vector<std::string>& ins_names,
+                   const std::vector<std::pair<float, float>>& quant_params)
+{
+    for(size_t i = 0; i < quant_params.size(); i++)
+    {
+        auto param = quant_params.at(i);
+        std::cout << "index = " << i << ", scale = " << param.first << "\t" << param.second
+                  << std::endl;
+    }
+    std::cout << std::endl;
+
+    // For now, we only support the int8 quantization of gemm and convolution
+    std::vector<std::string> op_names = {"dot", "convolution"};
+    if(!std::all_of(ins_names.begin(), ins_names.end(), [&](auto name) {
+           return (std::find(op_names.begin(), op_names.end(), name) != op_names.end());
+       }))
+    {
+        MIGRAPHX_THROW("QUANTIZE_INT8: only support DOT and CONVOLUTION operation");
+    }
+
+    std::size_t quant_param_index = 0;
+    std::unordered_map<instruction_ref, instruction_ref> map_quant_ins;
+    std::unordered_map<instruction_ref, std::size_t> map_index;
+    for(auto ins : iterator_for(prog))
+    {
+        if(not contains(ins_names, ins->name()))
+        {
+            continue;
+        }
+
+        shape::type_t orig_type = ins->get_shape().type();
+
+        // for the dot operator, there could be 2 or 3 input arguments
+        // if the 3rd argument is available, convert it to an int32.
+        std::vector<instruction_ref> converted_inputs;
+
+        // process all inputs, if input is a fp32 or fp64, convert it
+        // to a int8 type by adding a convert operator and replace
+        // the operator with the corresponding int8 version
+        auto inputs = ins->inputs();
+        std::vector<std::pair<float, float>> ins_quant_params;
+        for(auto input : inputs)
+        {
+            // calculate the index of each instruction to be quantized
+            if(map_index.count(input) == 0)
+            {
+                map_index[input] = quant_param_index++;
+            }
+            auto param = quant_params[map_index[input]];
+            ins_quant_params.push_back(param);
+
+            // In general, the target_type is int8, but for the dot
+            // operation, if it has 3 inputs, then the last one should
+            // be converted to int32_type
+            shape::type_t quant_type = shape::int8_type;
+            if(ins->name() == "dot" and inputs.size() == 3 and input == inputs.back())
+            {
+                quant_type = shape::int32_type;
+            }
+
+            auto s = input->get_shape();
+            if((s.type() == shape::float_type || s.type() == shape::double_type ||
+                s.type() == shape::int32_type) &&
+               s.type() != quant_type)
+            {
+                // if the input is a convert operator, uses its input
+                // as its current input
+                instruction_ref quant_input{};
+                if(input->name() == "convert")
+                {
+                    auto tmp_ins = input->inputs().front();
+                    if(tmp_ins->get_shape().type() == quant_type)
+                    {
+                        quant_input = input->inputs().front();
+                    }
+                    else
+                    {
+                        quant_input = insert_quant_ins(
+                            prog, input, quant_type, map_quant_ins, param.first, param.second);
+                    }
+                }
+                else
+                {
+                    quant_input = insert_quant_ins(
+                        prog, input, quant_type, map_quant_ins, param.first, param.second);
+                }
+                converted_inputs.push_back(quant_input);
+            }
+            else
+            {
+                converted_inputs.push_back(input);
+            }
+        }
+
+        // no change for the input, go to the next instruction
+        if(inputs == converted_inputs)
+        {
+            continue;
+        }
+
+        // When converting from other types to int8_type, there are parameters
+        // used as scale and shift(.0f), which will generate results diffrent from
+        // the original results. To adjust the output to be "correct(approximatly
+        // equal)", we need additional calculation for the adjustment
+        if(ins->name() == "dot")
+        {
+            auto dot_op = any_cast<op::dot>(ins->get_operator());
+            float new_alpha =
+                dot_op.alpha / (ins_quant_params[0].first * ins_quant_params[1].first);
+            float new_beta = dot_op.beta;
+            // We need additional checking about the quant_alpha value. If
+            // abs(quant_alpha) > 50 (some tmp value set here), we can convert
+            // it to an integer as the new_alpha in the quant_dot
+            float threshold = 50.0f;
+            if(fabs(new_alpha) >= threshold && fabs(new_beta) >= threshold)
+            {
+                int32_t quant_alpha = static_cast<int32_t>(new_alpha);
+                int32_t quant_beta  = static_cast<int32_t>(new_beta);
+                shape quant_shape   = compute_shape(op::quant_dot{1, 0}, converted_inputs);
+                if(quant_shape.type() == orig_type)
+                {
+                    prog.replace_instruction(
+                        ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
+                }
+                else
+                {
+                    auto quant_dot = prog.insert_instruction(
+                        ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
+                    prog.replace_instruction(ins, op::convert{orig_type}, quant_dot);
+                }
+            }
+            // either alpha or beta cannot be quantized because of too big
+            // relative rounding error
+            else
+            {
+                auto q_dot = prog.insert_instruction(ins, op::quant_dot{1, 0}, converted_inputs);
+                if(inputs.size() == 3 and dot_op.beta != 0.0f)
+                {
+                    auto alpha_ab = prog.insert_instruction(
+                        ins, op::convert{orig_type, new_alpha, 0.0f}, q_dot);
+                    auto c_shape = q_dot->get_shape();
+                    std::vector<float> vec_beta(c_shape.elements(), dot_op.beta);
+                    auto l_beta =
+                        prog.add_literal(literal({shape::float_type, c_shape.lens()}, vec_beta));
+                    instruction_ref beta_c{};
+                    if(orig_type != shape::float_type)
+                    {
+                        auto fp32_c = prog.insert_instruction(
+                            ins, op::convert{shape::float_type}, inputs.back());
+                        auto fp32_beta_c = prog.insert_instruction(ins, op::mul{}, l_beta, fp32_c);
+                        beta_c = prog.insert_instruction(ins, op::convert{orig_type}, fp32_beta_c);
+                    }
+                    else
+                    {
+                        beta_c = prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
+                    }
+                    prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
+                }
+                else
+                {
+                    prog.replace_instruction(ins, op::convert{orig_type, new_alpha, 0.0f}, q_dot);
+                }
+            }
+        }
+        else if(ins->name() == "convolution")
+        {
+            // Current MIOpen convolution does not support alpha and beta,
+            // so we need a separate multiply to adjust the output
+            auto conv_op       = any_cast<op::convolution>(ins->get_operator());
+            auto padding       = conv_op.padding;
+            auto stride        = conv_op.stride;
+            auto dilation      = conv_op.dilation;
+            auto padding_mode  = conv_op.padding_mode;
+            auto group         = conv_op.group;
+            auto adjust_factor = 1.0f / (ins_quant_params[0].first * ins_quant_params[1].first);
+
+            auto quant_conv = prog.insert_instruction(
+                ins,
+                op::quant_convolution{padding, stride, dilation, padding_mode, group},
+                converted_inputs);
+            prog.replace_instruction(ins, op::convert{orig_type, adjust_factor, 0.0f}, quant_conv);
+        }
+        else
+        {
+            MIGRAPHX_THROW("QUANTIZE_INT8: does not support operator" + ins->name());
+        }
+    }
+
+    if(quant_param_index != quant_params.size())
+    {
+        MIGRAPHX_THROW("QUANTIZE_INT8: number of scales does not match");
+    }
+}
+
+void quantize_int8(program& prog, const std::vector<std::string>& ins_names)
+{
+    quantize_int8(prog, ins_names, *prog.int8_quant_params);
+}
+
+void quantize_int8(program& prog)
+{
+    std::vector<std::string> ins_names = {"dot", "convolution"};
+    quantize_int8(prog, ins_names);
+}
+
 // For the input of each input argument, we need to insert a
 // capture operator to compute the scale and shift
 void capture_arguments(program& prog,
@@ -132,7 +342,7 @@ void capture_arguments(program& prog,

    size_t num_quant_params = 0;
    // the int8 quantization only support dot and convolution
-    std::vector<std::string> op_names = {"dot", "convolution"};
+    std::vector<std::string> op_names = {"dot", "convolution", "quant_dot", "quant_convolution"};
    if(!std::all_of(ins_names.begin(), ins_names.end(), [&](auto name) {
           return std::find(op_names.begin(), op_names.end(), name) != op_names.end();
       }))