Add UnaryConvertPrecision struct for high-precision workloads

4797b291 · Rosty Geyyer · ab9dec04 · 4797b291
Commit 4797b291 authored Apr 17, 2023 by Rosty Geyyer
Show whitespace changes
Inline Side-by-side

Showing with 12 additions and 0 deletions

include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp ...or_operation/gpu/element/unary_element_wise_operation.hpp +12 -0

No files found.
--- a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
+++ b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
@@ -86,6 +86,18 @@ struct UnaryConvert
    }
 };

+struct UnaryConvertPrecision : UnaryConvert
+{
+    template <typename Y, typename X>
+    __host__ __device__ void operator()(Y& y, const X& x) const;
+
+    template <>
+    __host__ __device__ void operator()<bhalf_t, float>(bhalf_t& y, const float& x) const
+    {
+        y = type_convert_bf16_rtn(x);
+    }
+};
+
 struct Scale
 {
    __host__ __device__ Scale(float scale) : scale_(scale) {}