Commit deda3f92 authored by Rostyslav Geyyer's avatar Rostyslav Geyyer
Browse files

Use asm instruction instead of a builtin

parent 306fd506
...@@ -131,8 +131,8 @@ inline __host__ __device__ float type_convert<float, f8_t>(f8_t x) ...@@ -131,8 +131,8 @@ inline __host__ __device__ float type_convert<float, f8_t>(f8_t x)
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) #if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
float fval; float fval;
uint32_t i32val = static_cast<uint32_t>(x); uint32_t i32val = static_cast<uint32_t>(x);
fval = __builtin_amdgcn_cvt_f32_fp8(i32val, 0); // fval = __builtin_amdgcn_cvt_f32_fp8(i32val, 0);
// asm volatile("v_cvt_f32_fp8 %0, %1 src0_sel:BYTE_0" : "=v"(fval) : "v"(i32val)); asm volatile("v_cvt_f32_fp8 %0, %1 src0_sel:BYTE_0" : "=v"(fval) : "v"(i32val));
return fval; return fval;
#else #else
constexpr bool negative_zero_nan = true; constexpr bool negative_zero_nan = true;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment