Handle underflow case separately to avoid sanitization errors

cd26ada8 · Umang Yadav · 6414ee38 · cd26ada8 · cd26ada8
Commit cd26ada8 authored Nov 16, 2023 by Umang Yadav
Show whitespace changes
Inline Side-by-side

Showing with 14 additions and 7 deletions

src/include/migraphx/float8_impl.hpp src/include/migraphx/float8_impl.hpp +12 -6

src/py/migraphx_py.cpp src/py/migraphx_py.cpp +2 -1

No files found.
--- a/src/include/migraphx/float8_impl.hpp
+++ b/src/include/migraphx/float8_impl.hpp
@@ -149,13 +149,19 @@ constexpr uint8_t cast_to_f8(T f_x, bool stoch = false, uint32_t rng = 0)
    else
    { // fp32/fp16 is normal with implicit 1
        act_exponent = exponent - bias;
-        if(act_exponent <= f8_denormal_act_exponent)
+        /*
+        check if FP8 is underflowing to 0.0. Wm is added to check to allow FP8 to go into denorm
+        range. e.g. act_exponent for FP32/16 is -9 and e4m3fnuz has denorm_act exponent = -7 in
+        that case  fp32/16 mantissa can be shifted right by two to make
+        exponent -7 and then it can be representable as e4m3fnuz denorm. So for fp32/fp16, exponent
+        -10 is the cut point to convert to e4m3fp8fnuz due to implicit 1 in mantissa. If fp32/16
+        act_exponent is less than -10 then it underflows to zero*/
+        if(act_exponent < (f8_denormal_act_exponent - Wm))
+        {
+            return NegativeZeroNan ? 0x00 : ((sign) ? 0x80 : 0x00);
+        }
+        else if(act_exponent <= f8_denormal_act_exponent)
        {
-            /* This is the case where fp32/fp16 is normal but it is in f8 denormal range.
-            For example fp8 FNUZ mode, denormal exponent is -7, but if the fp32/fp16
-            actual exponent is -7, it is actually larger due to the implict 1,
-            Therefore it needs to be adjust to -6 and mantissa shift right by 1.
-            So for fp32/fp16, exponent -8 is the cut point to convert to fp8 FNUZ */
            exponent_diff = f8_denormal_act_exponent - act_exponent;
        }
        else

--- a/src/py/migraphx_py.cpp
+++ b/src/py/migraphx_py.cpp
@@ -150,7 +150,8 @@ struct npy_format_descriptor<migraphx::fp8::fp8e4m3fnuz>
    static std::string format()
    {
        // following: https://docs.python.org/3/library/struct.html#format-characters
-        return "B";
+        // TODO: need to figure out correct encoding 
+        return "z";
    }
    static constexpr auto name() { return _("fp8e4m3fnuz"); }
 };