Unverified Commit 94fcfc02 authored by Paul Fultz II's avatar Paul Fultz II Committed by GitHub
Browse files

Run optimize_module for int8 quantization (#2300)

parent 19c87449
...@@ -70,6 +70,10 @@ void quantize_int8(program& prog, ...@@ -70,6 +70,10 @@ void quantize_int8(program& prog,
MIGRAPHX_THROW("QUANTIZE_INT8: only support DOT and CONVOLUTION operation"); MIGRAPHX_THROW("QUANTIZE_INT8: only support DOT and CONVOLUTION operation");
} }
// Run optimize_module() before converting to int8 to const eval and fold in FP32 to
// avoid loss of precision.
run_passes(prog, {optimize_module{}});
std::shared_ptr<std::vector<std::pair<float, float>>> int8_quant_params = std::shared_ptr<std::vector<std::pair<float, float>>> int8_quant_params =
std::make_shared<std::vector<std::pair<float, float>>>(); std::make_shared<std::vector<std::pair<float, float>>>();
std::shared_ptr<std::vector<float>> max_abs_vals = std::make_shared<std::vector<float>>(); std::shared_ptr<std::vector<float>> max_abs_vals = std::make_shared<std::vector<float>>();
...@@ -143,10 +147,7 @@ void quantize_int8(program& prog, ...@@ -143,10 +147,7 @@ void quantize_int8(program& prog,
run_passes(prog, run_passes(prog,
{quantize_int8_pass{ins_names, *int8_quant_params}, {quantize_int8_pass{ins_names, *int8_quant_params},
eliminate_common_subexpression{}, optimize_module{},
dead_code_elimination{},
simplify_reshapes{},
dead_code_elimination{},
simplify_qdq{}, simplify_qdq{},
dead_code_elimination{}}); dead_code_elimination{}});
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment