Add TypeConvert class and start refactoring

4c6c750a · Rosty Geyyer · dbd8f94b · 4c6c750a · 4c6c750a · 4c6c750a
Commit 4c6c750a authored Apr 06, 2023 by Rosty Geyyer
3 changed files
--- a/include/ck/utility/data_type.hpp
+++ b/include/ck/utility/data_type.hpp
@@ -942,125 +942,160 @@ using int8x16_t = typename vector_type<int8_t, 16>::type;
 using int8x32_t = typename vector_type<int8_t, 32>::type;
 using int8x64_t = typename vector_type<int8_t, 64>::type;

-// Convert X to Y
-template <typename Y, typename X>
-__host__ __device__ constexpr Y type_convert(X x)
+class TypeConvert
 {
-    static_assert(!std::is_reference_v<Y> && !std::is_reference_v<X>);
+    public:
+    // constructor
+    __host__ __device__ TypeConvert()
+    {
+        BF16ConvertRTN_ = false; // use round to zero by default
+    }

-    return static_cast<Y>(x);
-}
+    // switch bf16 conversion mode to rtn
+    __host__ __device__ void SetBF16ConvertRTN() { BF16ConvertRTN_ = true; }

-// convert bfp16 to fp32
-template <>
-inline __host__ __device__ constexpr float type_convert<float, bhalf_t>(bhalf_t x)
-{
-    union
+    // switch bf16 conversion mode to rtz
+    __host__ __device__ void SetBF16ConvertRTZ() { BF16ConvertRTN_ = false; }
+
+    // convert for all types except bf16
+    template <typename Y, typename X>
+    __host__ __device__ constexpr Y convert(X x)
    {
-        uint32_t int32;
-        float fp32;
-    } u = {uint32_t(x) << 16};
+        static_assert(!std::is_reference_v<Y> && !std::is_reference_v<X>);

-    return u.fp32;
-}
+        return static_cast<Y>(x);
+    }

-// convert fp32 to bfp16
-template <>
-inline __host__ __device__ constexpr bhalf_t type_convert<bhalf_t, float>(float x)
-{
-    union
+    // convert bfp16 to fp32
+    template <>
+    inline __host__ __device__ constexpr float convert<float, bhalf_t>(bhalf_t x)
    {
-        float fp32;
-        uint32_t int32;
-    } u = {x};
-
-    // When the exponent bits are not all 1s, then the value is zero, normal,
-    // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus
-    // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd).
-    // This causes the bfloat16's mantissa to be incremented by 1 if the 16
-    // least significant bits of the float mantissa are greater than 0x8000,
-    // or if they are equal to 0x8000 and the least significant bit of the
-    // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when
-    // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already
-    // has the value 0x7f, then incrementing it causes it to become 0x00 and
-    // the exponent is incremented by one, which is the next higher FP value
-    // to the unrounded bfloat16 value. When the bfloat16 value is subnormal
-    // with an exponent of 0x00 and a mantissa of 0x7f, it may be rounded up
-    // to a normal value with an exponent of 0x01 and a mantissa of 0x00.
-    // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
-    // incrementing it causes it to become an exponent of 0xFF and a mantissa
-    // of 0x00, which is Inf, the next higher value to the unrounded value.
-    bool flag0 = ~u.int32 & 0x7f800000;
-
-    // When all of the exponent bits are 1, the value is Inf or NaN.
-    // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero
-    // mantissa bit. Quiet NaN is indicated by the most significant mantissa
-    // bit being 1. Signaling NaN is indicated by the most significant
-    // mantissa bit being 0 but some other bit(s) being 1. If any of the
-    // lower 16 bits of the mantissa are 1, we set the least significant bit
-    // of the bfloat16 mantissa, in order to preserve signaling NaN in case
-    // the bfloat16's mantissa bits are all 0.
-    bool flag1 = !flag0 && (u.int32 & 0xffff);
-
-    u.int32 += flag0 ? 0x7fff + ((u.int32 >> 16) & 1) : 0; // Round to nearest, round to even
-    u.int32 |= flag1 ? 0x10000 : 0x0;                      // Preserve signaling NaN
-
-    return uint16_t(u.int32 >> 16);
-}
+        union
+        {
+            uint32_t int32;
+            float fp32;
+        } u = {uint32_t(x) << 16};

-// convert bfp16 to fp16 via fp32
-template <>
-inline __host__ __device__ constexpr half_t type_convert<half_t, bhalf_t>(bhalf_t x)
-{
-    float x_fp32 = type_convert<float>(x);
+        return u.fp32;
+    }

-    return static_cast<half_t>(x_fp32);
-}
+    // convert fp32 to bfp16
+    template <>
+    inline __host__ __device__ constexpr bhalf_t convert<bhalf_t, float>(float x)
+    {
+        // if using rtn
+        if(BF16ConvertRTN_)
+        {
+            union
+            {
+                float fp32;
+                uint32_t int32;
+            } u = {x};
+
+            // When the exponent bits are not all 1s, then the value is zero, normal,
+            // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus
+            // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd).
+            // This causes the bfloat16's mantissa to be incremented by 1 if the 16
+            // least significant bits of the float mantissa are greater than 0x8000,
+            // or if they are equal to 0x8000 and the least significant bit of the
+            // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when
+            // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already
+            // has the value 0x7f, then incrementing it causes it to become 0x00 and
+            // the exponent is incremented by one, which is the next higher FP value
+            // to the unrounded bfloat16 value. When the bfloat16 value is subnormal
+            // with an exponent of 0x00 and a mantissa of 0x7f, it may be rounded up
+            // to a normal value with an exponent of 0x01 and a mantissa of 0x00.
+            // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
+            // incrementing it causes it to become an exponent of 0xFF and a mantissa
+            // of 0x00, which is Inf, the next higher value to the unrounded value.
+            bool flag0 = ~u.int32 & 0x7f800000;
+
+            // When all of the exponent bits are 1, the value is Inf or NaN.
+            // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero
+            // mantissa bit. Quiet NaN is indicated by the most significant mantissa
+            // bit being 1. Signaling NaN is indicated by the most significant
+            // mantissa bit being 0 but some other bit(s) being 1. If any of the
+            // lower 16 bits of the mantissa are 1, we set the least significant bit
+            // of the bfloat16 mantissa, in order to preserve signaling NaN in case
+            // the bfloat16's mantissa bits are all 0.
+            bool flag1 = !flag0 && (u.int32 & 0xffff);
+
+            u.int32 +=
+                flag0 ? 0x7fff + ((u.int32 >> 16) & 1) : 0; // Round to nearest, round to even
+            u.int32 |= flag1 ? 0x10000 : 0x0;               // Preserve signaling NaN
+
+            return uint16_t(u.int32 >> 16);
+        }
+        // if using rtz
+        else
+        {
+            union
+            {
+                float fp32;
+                uint32_t int32;
+            } u = {x};
+
+            return uint16_t(u.int32 >> 16);
+        }
+    }

-// convert fp16 to bfp16 via fp32
-template <>
-inline __host__ __device__ constexpr bhalf_t type_convert<bhalf_t, half_t>(half_t x)
-{
-    float x_fp32 = static_cast<float>(x);
+    // convert bfp16 to fp16 via fp32
+    template <>
+    inline __host__ __device__ constexpr half_t convert<half_t, bhalf_t>(bhalf_t x)
+    {
+        float x_fp32 = convert<float>(x);

-    return type_convert<bhalf_t>(x_fp32);
-}
+        return static_cast<half_t>(x_fp32);
+    }

-// convert bfp16 to int32 via fp32
-template <>
-inline __host__ __device__ constexpr int32_t type_convert<int32_t, bhalf_t>(bhalf_t x)
-{
-    float x_fp32 = type_convert<float>(x);
+    // convert fp16 to bfp16 via fp32
+    template <>
+    inline __host__ __device__ constexpr bhalf_t convert<bhalf_t, half_t>(half_t x)
+    {
+        float x_fp32 = static_cast<float>(x);

-    return static_cast<int32_t>(x_fp32);
-}
+        return convert<bhalf_t>(x_fp32);
+    }

-// convert int32 to bfp16 via fp32
-template <>
-inline __host__ __device__ constexpr bhalf_t type_convert<bhalf_t, int32_t>(int32_t x)
-{
-    float x_fp32 = static_cast<float>(x);
+    // convert bfp16 to int32 via fp32
+    template <>
+    inline __host__ __device__ constexpr int32_t convert<int32_t, bhalf_t>(bhalf_t x)
+    {
+        float x_fp32 = convert<float>(x);

-    return type_convert<bhalf_t>(x_fp32);
-}
+        return static_cast<int32_t>(x_fp32);
+    }

-// convert bfp16 to int8 via fp32
-template <>
-inline __host__ __device__ constexpr int8_t type_convert<int8_t, bhalf_t>(bhalf_t x)
-{
-    float x_fp32 = type_convert<float>(x);
+    // convert int32 to bfp16 via fp32
+    template <>
+    inline __host__ __device__ constexpr bhalf_t convert<bhalf_t, int32_t>(int32_t x)
+    {
+        float x_fp32 = static_cast<float>(x);

-    return static_cast<int8_t>(x_fp32);
-}
+        return convert<bhalf_t>(x_fp32);
+    }

-// convert int8 to bfp16 via fp32
-template <>
-inline __host__ __device__ constexpr bhalf_t type_convert<bhalf_t, int8_t>(int8_t x)
-{
-    float x_fp32 = static_cast<float>(x);
+    // convert bfp16 to int8 via fp32
+    template <>
+    inline __host__ __device__ constexpr int8_t convert<int8_t, bhalf_t>(bhalf_t x)
+    {
+        float x_fp32 = convert<float>(x);

-    return type_convert<bhalf_t>(x_fp32);
-}
+        return static_cast<int8_t>(x_fp32);
+    }
+
+    // convert int8 to bfp16 via fp32
+    template <>
+    inline __host__ __device__ constexpr bhalf_t convert<bhalf_t, int8_t>(int8_t x)
+    {
+        float x_fp32 = static_cast<float>(x);
+
+        return convert<bhalf_t>(x_fp32);
+    }
+
+    private:
+    bool BF16ConvertRTN_;
+};

 template <typename T>
 struct NumericLimits

--- a/include/ck/utility/inner_product.hpp
+++ b/include/ck/utility/inner_product.hpp
@@ -87,10 +87,11 @@ __device__ void inner_product<half2_t, half2_t, float>(const half2_t& a, const h
 #else
    const vector_type<half_t, 2> a_vector{a};
    const vector_type<half_t, 2> b_vector{b};
+    TypeConvert type_convert = TypeConvert();

    static_for<0, 2, 1>{}([&](auto i) {
-        c += type_convert<int32_t>(a_vector.AsType<half_t>()[i]) *
-             type_convert<int32_t>(b_vector.AsType<half_t>()[i]);
+        c += type_convert.convert<int32_t>(a_vector.AsType<half_t>()[i]) *
+             type_convert.convert<int32_t>(b_vector.AsType<half_t>()[i]);
    });
 #endif
 }
@@ -138,7 +139,8 @@ __device__ void inner_product<half8_t, half8_t, float>(const half8_t& a, const h
 template <>
 __device__ void inner_product<int8_t, int8_t, int32_t>(const int8_t& a, const int8_t& b, int32_t& c)
 {
-    c += type_convert<int32_t>(a) * type_convert<int32_t>(b);
+    TypeConvert type_convert = TypeConvert();
+    c += type_convert.convert<int32_t>(a) * type_convert.convert<int32_t>(b);
 }

 template <>
@@ -174,10 +176,11 @@ inner_product<int8x4_t, int8x4_t, int32_t>(const int8x4_t& a, const int8x4_t& b,
 #else
    const vector_type<int8_t, 4> a_vector{a};
    const vector_type<int8_t, 4> b_vector{b};
+    TypeConvert type_convert = TypeConvert();

    static_for<0, 4, 1>{}([&](auto i) {
-        c += type_convert<int32_t>(a_vector.AsType<int8_t>()[i]) *
-             type_convert<int32_t>(b_vector.AsType<int8_t>()[i]);
+        c += type_convert.convert<int32_t>(a_vector.AsType<int8_t>()[i]) *
+             type_convert.convert<int32_t>(b_vector.AsType<int8_t>()[i]);
    });
 #endif
 }

--- a/library/include/ck/library/utility/host_tensor.hpp
+++ b/library/include/ck/library/utility/host_tensor.hpp
@@ -270,8 +270,10 @@ struct Tensor
    {
        Tensor<OutT> ret(mDesc);

-        ck::ranges::transform(
-            mData, ret.mData.begin(), [](auto value) { return ck::type_convert<OutT>(value); });
+        ck::ranges::transform(mData, ret.mData.begin(), [](auto value) {
+            ck::TypeConvert type_convert = ck::TypeConvert();
+            return type_convert.convert<OutT>(value);
+        });

        return ret;
    }