remove temporary codes

b5a3ea2d · danyao12 · 86717157 · b5a3ea2d · b5a3ea2d
Commit b5a3ea2d authored Jul 25, 2023 by danyao12
Showing with 0 additions and 112 deletions

include/ck/utility/generic_memory_space_atomic.hpp include/ck/utility/generic_memory_space_atomic.hpp +0 -89

include/ck/utility/type_convert.hpp include/ck/utility/type_convert.hpp +0 -23

No files found.
--- a/include/ck/utility/generic_memory_space_atomic.hpp
+++ b/include/ck/utility/generic_memory_space_atomic.hpp
@@ -71,78 +71,6 @@ __device__ double2_t atomic_add<double2_t>(double2_t* p_dst, const double2_t& x)
    return vy.template AsType<double2_t>()[I0];
 }

-inline __host__ __device__ half2_t add_fp16x2_t(const half2_t& a, const half2_t& b)
-{
-    half2_t rtn;
-    rtn[0] = a[0] + b[0];
-    rtn[1] = a[1] + b[1];
-    return rtn;
-}
-
-union U32FP162_ADDR
-{
-    uint32_t* u32_a;
-    half2_t* fp162_a;
-};
-
-union U32FP162
-{
-    uint32_t u32;
-    half2_t fp162;
-};
-
-template <>
-__device__ half2_t atomic_add<half2_t>(half2_t* p_dst, const half2_t& x)
-{
-    U32FP162_ADDR dword_addr;
-    U32FP162 cur_v;
-    U32FP162 new_;
-    uint32_t old_v, new_v;
-    dword_addr.fp162_a = p_dst;
-    cur_v.u32          = *dword_addr.u32_a;
-
-    do
-    {
-        old_v      = cur_v.u32;
-        new_.fp162 = add_fp16x2_t(cur_v.fp162, x);
-        new_v      = new_.u32;
-        cur_v.u32  = atomicCAS(dword_addr.u32_a, old_v, new_v);
-    } while(cur_v.u32 != old_v);
-
-    return x;
-}
-
-// template <>
-// __device__ half2_t atomic_add<half2_t>(half2_t* p_dst, const half2_t& x)
-// {
-//     uint32_t * dword_addr = reinterpret_cast<uint32_t*>(p_dst);
-//     uint32_t cur_v = *dword_addr;
-//     uint32_t old_v, new_v;
-
-//     do {
-//         old_v = cur_v;
-//         half2_t new_ = add_fp16x2_t(*reinterpret_cast<half2_t*>(&cur_v), x);
-//         new_v = *reinterpret_cast<uint32_t*>(&new_);
-//         cur_v = atomicCAS(dword_addr, old_v, new_v);
-//     }while(cur_v != old_v);
-
-//     return x;
-// }
-
-// union U16BF16 {
-//     uint16_t u16;
-//     bhalf_t bf16;
-// };
-
-// inline __host__ __device__ bhalf_t add_bf16_t(const bhalf_t& a, const bhalf_t& b){
-//     U16BF16 xa {.bf16 = a};
-//     U16BF16 xb {.bf16 = b};
-
-//     U16BF16 xr;
-//     xr.u16 = xa.u16 + xb.u16;
-//     return xr.bf16;
-// }
-
 inline __host__ __device__ bhalf_t add_bf16_t(const bhalf_t& a, const bhalf_t& b)
 {
    return type_convert<bhalf_t>(type_convert<float>(a) + type_convert<float>(b));
@@ -189,23 +117,6 @@ __device__ bhalf2_t atomic_add<bhalf2_t>(bhalf2_t* p_dst, const bhalf2_t& x)
    return x;
 }

-// template <>
-// __device__ bhalf2_t atomic_add<bhalf2_t>(bhalf2_t* p_dst, const bhalf2_t& x)
-// {
-//     uint32_t * dword_addr = reinterpret_cast<uint32_t*>(p_dst);
-//     uint32_t cur_v = *dword_addr;
-//     uint32_t old_v, new_v;
-
-//     do {
-//         old_v = cur_v;
-//         bhalf2_t new_ = add_bf16x2_t(*reinterpret_cast<bhalf2_t*>(&cur_v), x);
-//         new_v = *reinterpret_cast<uint32_t*>(&new_);
-//         cur_v = atomicCAS(dword_addr, old_v, new_v);
-//     }while(cur_v != old_v);
-
-//     return x;
-// }
-
 // Caution: DO NOT REMOVE
 // intentionally have only declaration but no definition to cause compilation failure when trying to
 // instantiate this template. The purpose is to make the implementation of atomic_max explicit for

--- a/include/ck/utility/type_convert.hpp
+++ b/include/ck/utility/type_convert.hpp
@@ -120,29 +120,6 @@ inline __host__ __device__ half_t type_convert<half_t, f8_t>(f8_t x)
    return utils::cast_from_f8<half_t, negative_zero_nan>(x);
 }

-template <>
-inline __host__ __device__ bhalf2_t type_convert<bhalf2_t, half2_t>(half2_t x)
-{
-    float y0{0}, y1{0};
-    bhalf2_t y{0};
-    asm volatile("\n \
-            v_cvt_f32_f16 %0, %1 \n \
-            "
-                 : "=v"(y0)
-                 : "v"(x));
-    asm volatile("\n \
-            v_cvt_f32_f16 %0, %1 src0_sel:WORD_1\n \
-            "
-                 : "=v"(y1)
-                 : "v"(x));
-    asm volatile("\n \
-            v_pack_b32_f16 %0, %1, %2 op_sel:[1, 1] \n \
-            "
-                 : "=v"(y)
-                 : "v"(y0), "v"(y1));
-    return y;
-}
-
 // Declare a template function for bf16 conversion using RTN
 template <typename Y, typename X>
 __host__ __device__ constexpr Y bf16_convert_rtn(X x);