Commit cd26ada8 authored by Umang Yadav's avatar Umang Yadav
Browse files

Handle underflow case separately to avoid sanitization errors

parent 6414ee38
......@@ -149,13 +149,19 @@ constexpr uint8_t cast_to_f8(T f_x, bool stoch = false, uint32_t rng = 0)
else
{ // fp32/fp16 is normal with implicit 1
act_exponent = exponent - bias;
if(act_exponent <= f8_denormal_act_exponent)
/*
check if FP8 is underflowing to 0.0. Wm is added to check to allow FP8 to go into denorm
range. e.g. act_exponent for FP32/16 is -9 and e4m3fnuz has denorm_act exponent = -7 in
that case fp32/16 mantissa can be shifted right by two to make
exponent -7 and then it can be representable as e4m3fnuz denorm. So for fp32/fp16, exponent
-10 is the cut point to convert to e4m3fp8fnuz due to implicit 1 in mantissa. If fp32/16
act_exponent is less than -10 then it underflows to zero*/
if(act_exponent < (f8_denormal_act_exponent - Wm))
{
return NegativeZeroNan ? 0x00 : ((sign) ? 0x80 : 0x00);
}
else if(act_exponent <= f8_denormal_act_exponent)
{
/* This is the case where fp32/fp16 is normal but it is in f8 denormal range.
For example fp8 FNUZ mode, denormal exponent is -7, but if the fp32/fp16
actual exponent is -7, it is actually larger due to the implict 1,
Therefore it needs to be adjust to -6 and mantissa shift right by 1.
So for fp32/fp16, exponent -8 is the cut point to convert to fp8 FNUZ */
exponent_diff = f8_denormal_act_exponent - act_exponent;
}
else
......
......@@ -150,7 +150,8 @@ struct npy_format_descriptor<migraphx::fp8::fp8e4m3fnuz>
static std::string format()
{
// following: https://docs.python.org/3/library/struct.html#format-characters
return "B";
// TODO: need to figure out correct encoding
return "z";
}
static constexpr auto name() { return _("fp8e4m3fnuz"); }
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment