gemm/conv activation fusion example

dd29eb09 · Chao Liu · ac0d8066 · dd29eb09 · dd29eb09 · dd29eb09
Commit dd29eb09 authored Apr 14, 2022 by Chao Liu
4 changed files
--- a/example/01_gemm/gemm_xdl_fp16.cpp
+++ b/example/01_gemm/gemm_xdl_fp16.cpp
@@ -38,9 +38,43 @@ using ALayout = ck::tensor_layout::gemm::RowMajor;
 using BLayout = ck::tensor_layout::gemm::ColumnMajor;
 using CLayout = ck::tensor_layout::gemm::RowMajor;

-using AElementOp = ck::tensor_operation::element_wise::PassThrough;
+struct Relu
+{
+    __host__ __device__ constexpr void operator()(float& y, const float& x) const
+    {
+        const float a = x;
+        y             = a > 0 ? a : 0;
+    }
+
+    __host__ __device__ constexpr void operator()(ck::half_t& y, const ck::half_t& x) const
+    {
+        const ck::half_t a = x;
+        y                  = a > 0 ? a : 0;
+    }
+};
+
+struct Hardswish
+{
+    __host__ __device__ constexpr void operator()(float& y, const float& x) const
+    {
+        float a = x;
+        float b = a + float{3};
+        float c = (b > 0) * (b > float{6} ? float{6} : b) * a * float{0.166667};
+        y       = c;
+    }
+
+    __host__ __device__ constexpr void operator()(ck::half_t& y, const ck::half_t& x) const
+    {
+        float a = x;
+        float b = a + float{3};
+        float c = (b > 0) * (b > float{6} ? float{6} : b) * a * float{0.166667};
+        y       = c;
+    }
+};
+
+using AElementOp = Relu;
 using BElementOp = ck::tensor_operation::element_wise::PassThrough;
-using CElementOp = ck::tensor_operation::element_wise::PassThrough;
+using CElementOp = Hardswish;

 static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;


--- a/example/09_convnd_fwd/convnd_fwd_xdl.cpp
+++ b/example/09_convnd_fwd/convnd_fwd_xdl.cpp
@@ -25,9 +25,43 @@ using AccDataType = float;
 template <ck::index_t... Is>
 using S = ck::Sequence<Is...>;

-using InElementOp  = ck::tensor_operation::element_wise::PassThrough;
+struct Relu
+{
+    __host__ __device__ constexpr void operator()(float& y, const float& x) const
+    {
+        const float a = x;
+        y             = a > 0 ? a : 0;
+    }
+
+    __host__ __device__ constexpr void operator()(ck::half_t& y, const ck::half_t& x) const
+    {
+        const ck::half_t a = x;
+        y                  = a > 0 ? a : 0;
+    }
+};
+
+struct Hardswish
+{
+    __host__ __device__ constexpr void operator()(float& y, const float& x) const
+    {
+        float a = x;
+        float b = a + float{3};
+        float c = (b > 0) * (b > float{6} ? float{6} : b) * a * float{0.166667};
+        y       = c;
+    }
+
+    __host__ __device__ constexpr void operator()(ck::half_t& y, const ck::half_t& x) const
+    {
+        float a = x;
+        float b = a + float{3};
+        float c = (b > 0) * (b > float{6} ? float{6} : b) * a * float{0.166667};
+        y       = c;
+    }
+};
+
+using InElementOp  = Relu;
 using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
-using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
+using OutElementOp = Hardswish;

 static constexpr auto ConvFwdDefault =
    ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;

--- a/example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
+++ b/example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
@@ -29,9 +29,43 @@ using InLayout  = ck::tensor_layout::convolution::NHWC;
 using WeiLayout = ck::tensor_layout::convolution::KYXC;
 using OutLayout = ck::tensor_layout::convolution::NHWK;

-using InElementOp  = ck::tensor_operation::element_wise::PassThrough;
+struct Relu
+{
+    __host__ __device__ constexpr void operator()(float& y, const float& x) const
+    {
+        const float a = x;
+        y             = a > 0 ? a : 0;
+    }
+
+    __host__ __device__ constexpr void operator()(ck::half_t& y, const ck::half_t& x) const
+    {
+        const ck::half_t a = x;
+        y                  = a > 0 ? a : 0;
+    }
+};
+
+struct Hardswish
+{
+    __host__ __device__ constexpr void operator()(float& y, const float& x) const
+    {
+        float a = x;
+        float b = a + float{3};
+        float c = (b > 0) * (b > float{6} ? float{6} : b) * a * float{0.166667};
+        y       = c;
+    }
+
+    __host__ __device__ constexpr void operator()(ck::half_t& y, const ck::half_t& x) const
+    {
+        float a = x;
+        float b = a + float{3};
+        float c = (b > 0) * (b > float{6} ? float{6} : b) * a * float{0.166667};
+        y       = c;
+    }
+};
+
+using InElementOp  = Relu;
 using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
-using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
+using OutElementOp = Hardswish;

 static constexpr auto ConvFwdDefault =
    ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;

--- a/example/11_conv2d_bwd_weight/conv2d_bwd_weight_xdl.cpp
+++ b/example/11_conv2d_bwd_weight/conv2d_bwd_weight_xdl.cpp
@@ -72,8 +72,13 @@ using DeviceConvBwdWeightInstance = ck::tensor_operation::device::
        8>;                               // CBlockTransferScalarPerVector_NWaveNPerXdl
 // clang-format on

-using ReferenceConvBwdWeightInstance = ck::tensor_operation::host::
-    ReferenceConvBwdWeight<InDataType, WeiDataType, OutDataType, InElementOp, WeiElementOp, OutElementOp>;
+using ReferenceConvBwdWeightInstance =
+    ck::tensor_operation::host::ReferenceConvBwdWeight<InDataType,
+                                                       WeiDataType,
+                                                       OutDataType,
+                                                       InElementOp,
+                                                       WeiElementOp,
+                                                       OutElementOp>;

 int main(int argc, char* argv[])
 {