add relu

9857ca19 · letaoqin · 622ce8f4 · 9857ca19 · 9857ca19 · 9857ca19
Commit 9857ca19 authored Oct 11, 2024 by letaoqin
3 changed files
--- a/example/66_gemm_bias_activation/gemm_bias_add.hpp
+++ b/example/66_gemm_bias_activation/gemm_bias_add.hpp
@@ -5,6 +5,63 @@
 #include "ck/ck.hpp"
 #include "ck/stream_config.hpp"
+#include "ck/utility/data_type.hpp"
+#include "ck/utility/type_convert.hpp"
+namespace ck {
+namespace impl {
+template <typename Activation>
+struct AddActivation
+{
+    template <typename Y, typename X0, typename X1>
+    __host__ __device__ constexpr void operator()(Y& y, const X0& x0, const X1& x1) const;
+    template <>
+    __host__ __device__ constexpr void
+    operator()<float>(float& y, const float& x0, const float& x1) const
+    {
+        Activation{}.template operator()<float>(y, x0 + x1);
+    };
+    template <>
+    __host__ __device__ constexpr void
+    operator()<float>(float& y, const float& x0, const ck::half_t& x1) const
+    {
+        float x = x0 + ck::type_convert<float>(x1);
+        Activation{}.template operator()<float>(y, x);
+    };
+    template <>
+    __host__ __device__ constexpr void
+    operator()<ck::half_t>(ck::half_t& y, const float& x0, const float& x1) const
+    {
+        float result = 0;
+        Activation{}.template operator()<float>(result, x0 + x1);
+        y = ck::type_convert<half_t>(result);
+    };
+    template <>
+    __host__ __device__ constexpr void
+    operator()<ck::half_t>(ck::half_t& y, const float& x0, const ck::half_t& x1) const
+    {
+        float result = 0;
+        Activation{}.template operator()<float>(result, x0 + x1);
+        y = ck::type_convert<half_t>(result);
+    };
+    template <>
+    __host__ __device__ constexpr void
+    operator()<half_t>(half_t& y, const half_t& x0, const half_t& x1) const
+    {
+        float x      = type_convert<float>(x0) + type_convert<float>(x1);
+        float result = 0;
+        Activation{}.template operator()<float>(result, x);
+        y = ck::type_convert<half_t>(result);
+    };
+};
+} // namespace impl
+} // namespace ck
 enum class ActivationType
 {

--- a/example/66_gemm_bias_activation/gemm_bias_add_fp16.cpp
+++ b/example/66_gemm_bias_activation/gemm_bias_add_fp16.cpp
@@ -27,60 +27,18 @@ using DsLayout = ck::Tuple<D0Layout>;
 using CLayout  = Row;
 using PassThrough = ck::tensor_operation::element_wise::PassThrough;
-using Add         = ck::tensor_operation::element_wise::Add;
+using Relu        = ck::tensor_operation::element_wise::Relu;
 using AElementOp   = PassThrough;
 using BElementOp   = PassThrough;
-using CDEElementOp = Add;
+using CDEElementOp = ck::impl::AddActivation<Relu>;
+;
 template <ck::index_t... Is>
 using S = ck::Sequence<Is...>;
 static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecialization::MNKPadding;
-namespace ck {
-namespace impl {
-template <typename Activation>
-struct AddActivation
-{
-    template <typename Y, typename X0, typename X1>
-    __host__ __device__ constexpr void operator()(Y& y, const X0& x0, const X1& x1) const;
-    template <>
-    __host__ __device__ constexpr void
-    operator()<float>(float& y, const float& x0, const float& x1) const
-    {
-        Activation{}.template operator()<float>(y, x0 + x1);
-    };
-    template <>
-    __host__ __device__ constexpr void
-    operator()<float>(float& y, const float& x0, const half_t& x1) const
-    {
-        float x = x0 + type_convert<float>(x1);
-        Activation{}.template operator()<float>(y, x);
-    };
-    template <>
-    __host__ __device__ constexpr void
-    operator()<half_t>(half_t& y, const float& x0, const float& x1) const
-    {
-        float result = 0;
-        Activation{}.template operator()<float>(result, x0 + x1);
-        y = type_convert<half_t>(result);
-    };
-    template <>
-    __host__ __device__ constexpr void
-    operator()<half_t>(half_t& y, const float& x0, const half_t& x1) const
-    {
-        float result = 0;
-        Activation{}.template operator()<float>(result, x0 + x1);
-        y = type_convert<half_t>(result);
-    };
-};
-} // namespace impl
-} // namespace ck
 // clang-format off
 template <typename ADataType,   typename BDataType, typename DsDataType,  typename CDataType>
 using DeviceOpInstance_64_16_16_64 = ck::tensor_operation::device::DeviceGemmMultiD_Xdl_CShuffle_V3<

--- a/example/66_gemm_bias_activation/gemm_bias_add_xdl_fp16.cpp
+++ b/example/66_gemm_bias_activation/gemm_bias_add_xdl_fp16.cpp
@@ -14,6 +14,7 @@
 #include "ck/library/utility/literals.hpp"
 #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
 #include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
+#include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp"
 #include "ck/library/utility/check_err.hpp"
 #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
@@ -39,11 +40,11 @@ using DsLayout = ck::Tuple<D0Layout>;
 using ELayout  = Row;
 using PassThrough = ck::tensor_operation::element_wise::PassThrough;
-using Add         = ck::tensor_operation::element_wise::Add;
+using Relu        = ck::tensor_operation::element_wise::Relu;
 using AElementOp = PassThrough;
 using BElementOp = PassThrough;
-using CElementOp = Add;
+using CElementOp = ck::impl::AddActivation<Relu>;
 using ReferenceGemmInstance = ck::tensor_operation::host::ReferenceGemm<A0DataType,
                                                                        B0DataType,