Merge host and device implementations

f1c2ec74 · Rostyslav Geyyer · ee568bc2 · f1c2ec74
Commit f1c2ec74 authored May 24, 2023 by Rostyslav Geyyer
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 35 deletions

include/ck/utility/data_type.hpp include/ck/utility/data_type.hpp +6 -35

No files found.
--- a/include/ck/utility/data_type.hpp
+++ b/include/ck/utility/data_type.hpp
@@ -1190,17 +1190,13 @@ __host__ __device__ uint32_t prand_generator(int id, T val, uint32_t seed = seed
    return 0;
 }

-// Declare a template function for fp8 conversion using SR on host
+// Declare a template function for fp8 conversion using SR
 template <typename Y, typename X>
-__host__ constexpr Y f8_convert_sr(X x);
+__host__ __device__ constexpr Y f8_convert_sr(X x);

-// Declare a template function for fp8 conversion using SR on device
-template <typename Y, typename X>
-__device__ constexpr Y f8_convert_sr(X x);
-
-// convert fp32 to fp8 with stochastic rounding on host
+// convert fp32 to fp8 with stochastic rounding
 template <>
-inline __host__ f8_t f8_convert_sr<f8_t, float>(float x)
+inline __host__ __device__ f8_t f8_convert_sr<f8_t, float>(float x)
 {
    constexpr bool negative_zero_nan = true;
    constexpr bool clip              = true;
@@ -1211,21 +1207,9 @@ inline __host__ f8_t f8_convert_sr<f8_t, float>(float x)
    return cast_to_f8<float, negative_zero_nan, clip, (rm == f8_rounding_mode::stochastic)>(x, rng);
 }

-// convert fp32 to fp8 with stochastic rounding on device
-template <>
-inline __device__ f8_t f8_convert_sr<f8_t, float>(float x)
-{
-    constexpr bool negative_zero_nan = true;
-    constexpr bool clip              = true;
-    constexpr f8_rounding_mode rm    = f8_rounding_mode::stochastic;
-    constexpr int seed               = 42;
-    uint32_t rng = prand_generator<float, seed>(reinterpret_cast<uintptr_t>(&x), x);
-    return cast_to_f8<float, negative_zero_nan, clip, (rm == f8_rounding_mode::stochastic)>(x, rng);
-}
-
-// convert fp16 to fp8 with stochastic rounding on host
+// convert fp16 to fp8 with stochastic rounding
 template <>
-inline __host__ f8_t f8_convert_sr<f8_t, half_t>(half_t x)
+inline __host__ __device__ f8_t f8_convert_sr<f8_t, half_t>(half_t x)
 {
    constexpr bool negative_zero_nan = true;
    constexpr bool clip              = true;
@@ -1237,19 +1221,6 @@ inline __host__ f8_t f8_convert_sr<f8_t, half_t>(half_t x)
                                                                                             rng);
 }

-// convert fp16 to fp8 with stochastic rounding on device
-template <>
-inline __device__ f8_t f8_convert_sr<f8_t, half_t>(half_t x)
-{
-    constexpr bool negative_zero_nan = true;
-    constexpr bool clip              = true;
-    constexpr f8_rounding_mode rm    = f8_rounding_mode::stochastic;
-    constexpr int seed               = 42;
-    uint32_t rng = prand_generator<half_t, seed>(reinterpret_cast<uintptr_t>(&x), x);
-    return cast_to_f8<half_t, negative_zero_nan, clip, (rm == f8_rounding_mode::stochastic)>(x,
-                                                                                             rng);
-}
-
 template <typename T>
 struct NumericLimits
 {