Add element op

28187354 · Rostyslav Geyyer · 653f9515 · 28187354 · 28187354
Commit 28187354 authored May 12, 2023 by Rostyslav Geyyer
Showing with 19 additions and 9 deletions

include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp ...or_operation/gpu/element/unary_element_wise_operation.hpp +17 -0

include/ck/utility/data_type.hpp include/ck/utility/data_type.hpp +2 -9

No files found.
--- a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
+++ b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
@@ -127,6 +127,23 @@ struct ConvertBF16RTN
    }
 };

+struct ConvertF8SR
+{
+    // convert to fp8 using stochastic rounding (SR)
+    template <typename Y, typename X>
+    __host__ __device__ void operator()(Y& y, const X& x) const
+    {
+        // check Y datatype
+        static_assert(is_same<Y, f8_t>::value, "Data type is not supported by this operation!");
+
+        // check X datatype
+        static_assert(is_same<X, float>::value || is_same<X, half_t>::value,
+                      "Data type is not supported by this operation!");
+
+        y = f8_convert_sr<Y>(x);
+    }
+};
+
 struct Scale
 {
    __host__ __device__ Scale(float scale) : scale_(scale) {}

--- a/include/ck/utility/data_type.hpp
+++ b/include/ck/utility/data_type.hpp
@@ -1174,11 +1174,11 @@ __host__ __device__ uint32_t prand_generator(int id, T val)

 // Declare a template function for fp8 conversion using SR
 template <typename Y, typename X>
-__host__ __device__ constexpr Y fp8_convert_sr(X x);
+__host__ __device__ constexpr Y f8_convert_sr(X x);

 // convert fp32 to fp8 with stochastic rounding
 template <>
-inline __host__ __device__ f8_t fp8_convert_sr<f8_t, float>(float x)
+inline __host__ __device__ f8_t f8_convert_sr<f8_t, float>(float x)
 {
    constexpr bool negative_zero_nan = true;
    constexpr bool clip              = true;
@@ -1188,13 +1188,6 @@ inline __host__ __device__ f8_t fp8_convert_sr<f8_t, float>(float x)
    return cast_to_f8<negative_zero_nan, clip, (rm == f8_rounding_mode::stochastic)>(x, rng);
 }

-// convert fp8 to fp32
-template <>
-inline __host__ __device__ float fp8_convert_sr<float, f8_t>(f8_t x)
-{
-    return type_convert<float>(x);
-}
-
 template <typename T>
 struct NumericLimits
 {