Commit ffa6a45a authored by Shucai Xiao's avatar Shucai Xiao
Browse files

fixe mismatch between cpu and gpu execution.

parent e6158f10
...@@ -278,99 +278,33 @@ void quantize_int8(program& prog, ...@@ -278,99 +278,33 @@ void quantize_int8(program& prog,
prog.replace_instruction(ins, op::convert{orig_type}, quant_dot); prog.replace_instruction(ins, op::convert{orig_type}, quant_dot);
} }
} }
// only alpha can be quantized, quantization of beta will cause // either alpha or beta cannot be quantized because of too big
// big error, so we have to manually do the multiplication and // relative rounding error
// addition
else if(fabs(new_alpha) >= threshold)
{
// truncate to the nearest integer
new_alpha = new_alpha > 0.0 ? new_alpha + 0.5 : new_alpha - 0.5;
int32_t quant_alpha = static_cast<int32_t>(new_alpha);
int32_t quant_beta = 0;
if(orig_type == shape::int32_type)
{
if(inputs.size() == 2 or dot_op.beta == 0.0f)
{
prog.replace_instruction(
ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
}
// if there are 3 inputs, we need to consider the third argument
else
{
auto q_dot = prog.insert_instruction(
ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
std::vector<float> vec_beta(q_dot->get_shape().elements(), dot_op.beta);
auto l_beta = prog.add_literal(literal{orig_type, vec_beta});
auto beta_c =
prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
prog.replace_instruction(ins, op::add{}, q_dot, beta_c);
}
}
else
{
if(inputs.size() == 2 or dot_op.beta == 0.0f)
{
auto q_dot = prog.insert_instruction(
ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
prog.replace_instruction(ins, op::convert{orig_type}, q_dot);
}
// if there are 3 inputs, we need to consider the third argument
else
{
auto q_dot = prog.insert_instruction(
ins, op::quant_dot{quant_alpha, quant_beta}, converted_inputs);
auto oq_dot = prog.insert_instruction(ins, op::convert{orig_type}, q_dot);
std::vector<float> vec_beta(q_dot->get_shape().elements(), dot_op.beta);
auto l_beta = prog.add_literal(literal{oq_dot->get_shape(), vec_beta});
auto beta_c =
prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
prog.replace_instruction(ins, op::add{}, oq_dot, beta_c);
}
}
}
else else
{ {
auto q_dot = prog.insert_instruction(ins, op::quant_dot{1, 0}, converted_inputs); auto q_dot = prog.insert_instruction(ins, op::quant_dot{1, 0}, converted_inputs);
std::vector<float> vec_alpha(q_dot->get_shape().elements(), new_alpha); if (inputs.size() == 3 and dot_op.beta != 0.0f)
if(orig_type == shape::int32_type)
{ {
auto l_alpha = prog.add_literal(literal(ins->get_shape(), vec_alpha)); auto alpha_ab = prog.insert_instruction(ins, op::convert{orig_type, new_alpha, 0.0f}, q_dot);
if(converted_inputs.size() == 2 or dot_op.beta == 0.0f) auto c_shape = q_dot->get_shape();
std::vector<float> vec_beta(c_shape.elements(), dot_op.beta);
auto l_beta = prog.add_literal(literal({shape::float_type, c_shape.lens()}, vec_beta));
instruction_ref beta_c{};
if (orig_type != shape::float_type)
{ {
prog.replace_instruction(ins, op::mul{}, l_alpha, q_dot); auto fp32_c = prog.insert_instruction(ins, op::convert{shape::float_type}, inputs.back());
auto fp32_beta_c = prog.insert_instruction(ins, op::mul{}, l_beta, fp32_c);
beta_c = prog.insert_instruction(ins, op::convert{orig_type}, fp32_beta_c);
} }
// case of 3 arguments
else else
{ {
std::vector<float> vec_beta(ins->get_shape().elements(), new_beta); beta_c = prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
auto l_beta = prog.add_literal(literal(ins->get_shape(), vec_beta));
auto alpha_ab = prog.insert_instruction(ins, op::mul{}, l_alpha, q_dot);
auto beta_c =
prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
}
} }
else prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
{
auto oq_dot = prog.insert_instruction(ins, op::convert{orig_type}, q_dot);
auto l_alpha = prog.add_literal(literal(ins->get_shape(), vec_alpha));
if(converted_inputs.size() == 2 or dot_op.beta == 0.0f)
{
prog.replace_instruction(ins, op::mul{}, l_alpha, oq_dot);
} }
// case of 3 arguments
else else
{ {
std::vector<float> vec_beta(ins->get_shape().elements(), new_beta); prog.replace_instruction(ins, op::convert{orig_type, new_alpha, 0.0f}, q_dot);
auto l_beta = prog.add_literal(literal(ins->get_shape(), vec_beta));
auto alpha_ab = prog.insert_instruction(ins, op::mul{}, l_alpha, oq_dot);
auto beta_c =
prog.insert_instruction(ins, op::mul{}, l_beta, inputs.back());
prog.replace_instruction(ins, op::add{}, alpha_ab, beta_c);
// auto gemm_res = prog.insert_instruction(ins, op::add{}, alpha_ab,
// beta_c); prog.replace_instruction(ins, op::capture{0, print_gemm_res},
// gemm_res);
}
} }
} }
} }
...@@ -384,49 +318,15 @@ void quantize_int8(program& prog, ...@@ -384,49 +318,15 @@ void quantize_int8(program& prog,
auto dilation = conv_op.dilation; auto dilation = conv_op.dilation;
auto padding_mode = conv_op.padding_mode; auto padding_mode = conv_op.padding_mode;
auto group = conv_op.group; auto group = conv_op.group;
auto adjust_factor = 1.0 / (ins_quant_params[0].first * ins_quant_params[1].first); auto adjust_factor = 1.0f / (ins_quant_params[0].first * ins_quant_params[1].first);
shape quant_shape =
compute_shape(op::quant_convolution{padding, stride, dilation, padding_mode, group},
converted_inputs);
std::vector<float> vec_factor(quant_shape.elements(), adjust_factor);
auto fl = prog.add_literal(literal{{orig_type, quant_shape.lens()}, vec_factor});
if(quant_shape.type() == orig_type)
{
if(adjust_factor == 1.0f)
{
prog.replace_instruction(
ins,
op::quant_convolution{padding, stride, dilation, padding_mode, group},
converted_inputs);
}
else
{
auto quant_conv = prog.insert_instruction( auto quant_conv = prog.insert_instruction(
ins, ins,
op::quant_convolution{padding, stride, dilation, padding_mode, group}, op::quant_convolution{padding, stride, dilation, padding_mode, group},
converted_inputs); converted_inputs);
prog.replace_instruction(ins, op::mul{}, quant_conv, fl); auto fp_conv = prog.insert_instruction(
// auto q_conv = prog.insert_instruction(ins, op::mul{}, quant_conv, fl); ins, op::convert{shape::float_type, adjust_factor, 0.0f}, quant_conv);
// prog.replace_instruction(ins, op::capture{10000, print_conv_res}, q_conv); prog.replace_instruction(ins, op::convert{orig_type, 1.0f, 0.0f}, fp_conv);
}
}
else
{
auto quant_conv = prog.insert_instruction(
ins,
op::quant_convolution{padding, stride, dilation, padding_mode, group},
converted_inputs);
if(adjust_factor == 1.0f)
{
prog.replace_instruction(ins, op::convert{orig_type}, quant_conv);
}
else
{
auto oq_conv = prog.insert_instruction(ins, op::convert{orig_type}, quant_conv);
prog.replace_instruction(ins, op::mul{}, oq_conv, fl);
}
}
} }
else else
{ {
......
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#include <migraphx/cpu/gemm.hpp> #include <migraphx/cpu/gemm.hpp>
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
#include <fstream>
#include <iomanip>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -247,7 +249,7 @@ struct cpu_quant_convolution ...@@ -247,7 +249,7 @@ struct cpu_quant_convolution
const auto in_ch = group_id * wei_c + k; const auto in_ch = group_id * wei_c + k;
if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w) if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
{ {
acc += input(o, in_ch, in_x, in_y) * weights(w, k, x, y); acc += static_cast<int32_t>(input(o, in_ch, in_x, in_y)) * weights(w, k, x, y);
} }
}); });
output(o, w, i, j) = acc; output(o, w, i, j) = acc;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment