fixe mismatch between cpu and gpu execution.

ffa6a45a · Shucai Xiao · e6158f10 · ffa6a45a · ffa6a45a
Commit ffa6a45a authored Jun 05, 2019 by Shucai Xiao
Hide whitespace changes
Inline Side-by-side

Showing with 27 additions and 125 deletions

src/quantization.cpp src/quantization.cpp +24 -124

src/targets/cpu/lowering.cpp src/targets/cpu/lowering.cpp +3 -1

No files found.
--- a/src/quantization.cpp
+++ b/src/quantization.cpp
@@ -278,99 +278,33 @@ void quantize_int8(program& prog,
                    prog.replace_instruction(ins, op::convert{orig_type}, quant_dot);
                }
            }
-            // only alpha can be quantized, quantization of beta will cause
+            // either alpha or beta cannot be quantized because of too big 
-            // big error, so we have to manually do the multiplication and
+            // relative rounding error
-            // addition
-            else if(fabs(new_alpha) >= threshold)
-            {
-                // truncate to the nearest integer
-                new_alpha           = new_alpha > 0.0 ? new_alpha + 0.5 : new_alpha - 0.5;
-                int32_t quant_alpha = static_cast<int32_t>(new_alpha);
-                int32_t quant_beta  = 0;
-                if(orig_type == shape::int32_type)
-                {
-                    if(inputs.size() == 2 or dot_op.beta == 0.0f)
-                    {
-                        prog.replace_instruction(
-                            ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
-                    }
-                    // if there are 3 inputs, we need to consider the third argument
-                    else
-                    {
-                        auto q_dot = prog.insert_instruction(
-                            ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
-                        std::vector<float> vec_beta(q_dot->get_shape().elements(), dot_op.beta);
-                        auto l_beta = prog.add_literal(literal{orig_type, vec_beta});
-                        auto beta_c =
-                            prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
-                        prog.replace_instruction(ins, op::add{}, q_dot, beta_c);
-                    }
-                }
-                else
-                {
-                    if(inputs.size() == 2 or dot_op.beta == 0.0f)
-                    {
-                        auto q_dot = prog.insert_instruction(
-                            ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
-                        prog.replace_instruction(ins, op::convert{orig_type}, q_dot);
-                    }
-                    // if there are 3 inputs, we need to consider the third argument
-                    else
-                    {
-                        auto q_dot = prog.insert_instruction(
-                            ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
-                        auto oq_dot = prog.insert_instruction(ins, op::convert{orig_type}, q_dot);
-                        std::vector<float> vec_beta(q_dot->get_shape().elements(), dot_op.beta);
-                        auto l_beta = prog.add_literal(literal{oq_dot->get_shape(), vec_beta});
-                        auto beta_c =
-                            prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
-                        prog.replace_instruction(ins, op::add{}, oq_dot, beta_c);
-                    }
-                }
-            }
            else
            {
                auto q_dot = prog.insert_instruction(ins, op::quant_dot{1, 0}, converted_inputs);
-                std::vector<float> vec_alpha(q_dot->get_shape().elements(), new_alpha);
+                if (inputs.size() == 3 and dot_op.beta != 0.0f)
-                if(orig_type == shape::int32_type)
                {
-                    auto l_alpha = prog.add_literal(literal(ins->get_shape(), vec_alpha));
+                    auto alpha_ab = prog.insert_instruction(ins, op::convert{orig_type, new_alpha, 0.0f}, q_dot);
-                    if(converted_inputs.size() == 2 or dot_op.beta == 0.0f)
+                    auto c_shape = q_dot->get_shape();
+                    std::vector<float> vec_beta(c_shape.elements(), dot_op.beta);
+                    auto l_beta = prog.add_literal(literal({shape::float_type, c_shape.lens()}, vec_beta));
+                    instruction_ref beta_c{};
+                    if (orig_type != shape::float_type)
                    {
-                        prog.replace_instruction(ins, op::mul{}, l_alpha, q_dot);
+                        auto fp32_c = prog.insert_instruction(ins, op::convert{shape::float_type}, inputs.back());
+                        auto fp32_beta_c = prog.insert_instruction(ins, op::mul{}, l_beta, fp32_c);
+                        beta_c = prog.insert_instruction(ins, op::convert{orig_type}, fp32_beta_c);
                    }
-                    // case of 3 arguments
                    else
                    {
-                        std::vector<float> vec_beta(ins->get_shape().elements(), new_beta);
+                        beta_c = prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
-                        auto l_beta   = prog.add_literal(literal(ins->get_shape(), vec_beta));
-                        auto alpha_ab = prog.insert_instruction(ins, op::mul{}, l_alpha, q_dot);
-                        auto beta_c =
-                            prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
-                        prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
                    }
+                    prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
                }
                else
                {
-                    auto oq_dot  = prog.insert_instruction(ins, op::convert{orig_type}, q_dot);
+                    prog.replace_instruction(ins, op::convert{orig_type, new_alpha, 0.0f}, q_dot);
-                    auto l_alpha = prog.add_literal(literal(ins->get_shape(), vec_alpha));
-                    if(converted_inputs.size() == 2 or dot_op.beta == 0.0f)
-                    {
-                        prog.replace_instruction(ins, op::mul{}, l_alpha, oq_dot);
-                    }
-                    // case of 3 arguments
-                    else
-                    {
-                        std::vector<float> vec_beta(ins->get_shape().elements(), new_beta);
-                        auto l_beta   = prog.add_literal(literal(ins->get_shape(), vec_beta));
-                        auto alpha_ab = prog.insert_instruction(ins, op::mul{}, l_alpha, oq_dot);
-                        auto beta_c =
-                            prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
-                        prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
-                        // auto gemm_res = prog.insert_instruction(ins, op::add{}, alpha_ab,
-                        // beta_c); prog.replace_instruction(ins, op::capture{0, print_gemm_res},
-                        // gemm_res);
-                    }
                }
            }
        }
@@ -384,49 +318,15 @@ void quantize_int8(program& prog,
            auto dilation      = conv_op.dilation;
            auto padding_mode  = conv_op.padding_mode;
            auto group         = conv_op.group;
-            auto adjust_factor = 1.0 / (ins_quant_params[0].first * ins_quant_params[1].first);
+            auto adjust_factor = 1.0f / (ins_quant_params[0].first * ins_quant_params[1].first);
-            shape quant_shape =
+            auto quant_conv = prog.insert_instruction(
-                compute_shape(op::quant_convolution{padding, stride, dilation, padding_mode, group},
+                ins,
-                              converted_inputs);
+                op::quant_convolution{padding, stride, dilation, padding_mode, group},
-            std::vector<float> vec_factor(quant_shape.elements(), adjust_factor);
+                converted_inputs);
-            auto fl = prog.add_literal(literal{{orig_type, quant_shape.lens()}, vec_factor});
+            auto fp_conv = prog.insert_instruction(
-            if(quant_shape.type() == orig_type)
+                ins, op::convert{shape::float_type, adjust_factor, 0.0f}, quant_conv);
-            {
+            prog.replace_instruction(ins, op::convert{orig_type, 1.0f, 0.0f}, fp_conv);
-                if(adjust_factor == 1.0f)
-                {
-                    prog.replace_instruction(
-                        ins,
-                        op::quant_convolution{padding, stride, dilation, padding_mode, group},
-                        converted_inputs);
-                }
-                else
-                {
-                    auto quant_conv = prog.insert_instruction(
-                        ins,
-                        op::quant_convolution{padding, stride, dilation, padding_mode, group},
-                        converted_inputs);
-                    prog.replace_instruction(ins, op::mul{}, quant_conv, fl);
-                    // auto q_conv = prog.insert_instruction(ins, op::mul{}, quant_conv, fl);
-                    // prog.replace_instruction(ins, op::capture{10000, print_conv_res}, q_conv);
-                }
-            }
-            else
-            {
-                auto quant_conv = prog.insert_instruction(
-                    ins,
-                    op::quant_convolution{padding, stride, dilation, padding_mode, group},
-                    converted_inputs);
-                if(adjust_factor == 1.0f)
-                {
-                    prog.replace_instruction(ins, op::convert{orig_type}, quant_conv);
-                }
-                else
-                {
-                    auto oq_conv = prog.insert_instruction(ins, op::convert{orig_type}, quant_conv);
-                    prog.replace_instruction(ins, op::mul{}, oq_conv, fl);
-                }
-            }
        }
        else
        {

--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -9,6 +9,8 @@
 #include <migraphx/cpu/gemm.hpp>
 #include <unordered_map>
 #include <utility>
+#include <fstream>
+#include <iomanip>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -247,7 +249,7 @@ struct cpu_quant_convolution
                        const auto in_ch = group_id * wei_c + k;
                        if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
                        {
-                            acc += input(o, in_ch, in_x, in_y) * weights(w, k, x, y);
+                            acc += static_cast<int32_t>(input(o, in_ch, in_x, in_y)) * weights(w, k, x, y);
                        }
                    });
                    output(o, w, i, j) = acc;