Unverified Commit 94fcfc02 authored by Paul Fultz II's avatar Paul Fultz II Committed by GitHub
Browse files

Run optimize_module for int8 quantization (#2300)

parent 19c87449
......@@ -70,6 +70,10 @@ void quantize_int8(program& prog,
MIGRAPHX_THROW("QUANTIZE_INT8: only support DOT and CONVOLUTION operation");
}
// Run optimize_module() before converting to int8 to const eval and fold in FP32 to
// avoid loss of precision.
run_passes(prog, {optimize_module{}});
std::shared_ptr<std::vector<std::pair<float, float>>> int8_quant_params =
std::make_shared<std::vector<std::pair<float, float>>>();
std::shared_ptr<std::vector<float>> max_abs_vals = std::make_shared<std::vector<float>>();
......@@ -143,10 +147,7 @@ void quantize_int8(program& prog,
run_passes(prog,
{quantize_int8_pass{ins_names, *int8_quant_params},
eliminate_common_subexpression{},
dead_code_elimination{},
simplify_reshapes{},
dead_code_elimination{},
optimize_module{},
simplify_qdq{},
dead_code_elimination{}});
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment