"...composable_kernel.git" did not exist on "97e6d514f7b21c4031fa4106148e2c17e103fd9a"
Commit 81757933 authored by Junhao's avatar Junhao
Browse files

add RTZ switch

parent f1a49daf
......@@ -965,6 +965,19 @@ inline __host__ __device__ constexpr float type_convert<float, bhalf_t>(bhalf_t
}
// convert fp32 to bfp16
#if FLASH_ATTENTION_INTERNAL_USE_RTZ
template <>
inline __host__ __device__ constexpr bhalf_t type_convert<bhalf_t, float>(float x)
{
union
{
float fp32;
uint32_t int32;
} u = {static_cast<float>(x)};
return uint16_t(u.int32 >> 16);
}
#else
template <>
inline __host__ __device__ constexpr bhalf_t type_convert<bhalf_t, float>(float x)
{
......@@ -1007,6 +1020,7 @@ inline __host__ __device__ constexpr bhalf_t type_convert<bhalf_t, float>(float
return uint16_t(u.int32 >> 16);
}
#endif
// convert fp16 to bf16
template <>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment