"vscode:/vscode.git/clone" did not exist on "a77c56dd8e1b2e37d47469f6cc54e273792a121e"
Commit 277ae59c authored by Shucai Xiao's avatar Shucai Xiao
Browse files

Merge branch 'int8_quantize' into op_capture

parents 43d39b4e 4cf9bd01
......@@ -20,10 +20,10 @@ void convert(hipStream_t stream,
if(target_type == shape::int8_type)
{
gs_launch(stream, result.get_shape().elements())([=](auto i) {
float res = input_ptr[i] * scale + shift;
int factor = (res > 0) ? 1 : -1;
output_ptr[i] =
std::min<int8_t>(std::max<float>(-128, res + factor * 0.5), 127);
float res = input_ptr[i] * scale + shift;
int factor = (res > 0) ? 1 : -1;
output_ptr[i] = static_cast<int8_t>(
std::min<float>(std::max<float>(-128.0f, res + factor * 0.5), 127.0f));
});
}
else
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment