activ_type argument

b53926e9 · Jing Zhang · fe427fd1 · b53926e9 · b53926e9 · b53926e9
Commit b53926e9 authored Jul 23, 2021 by Jing Zhang
5 changed files
--- a/composable_kernel/include/driver/driver_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw_outpad.hpp
+++ b/composable_kernel/include/driver/driver_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw_outpad.hpp
@@ -35,7 +35,8 @@ struct DriverStaticConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
              typename ConvStrides,
              typename ConvDilations,
              typename InLeftPads,
-              typename InRightPads>
+              typename InRightPads,
+              index_t activ_type>
    __host__ void Run(const DynamicTensorDescriptor<Wei...>& wei_k_c_y_x_global_desc,
                      const DynamicTensorDescriptor<In...>& in_n_c_hi_wi_global_desc,
                      const DynamicTensorDescriptor<Out...>& out_n_k0_ho_wo_k1_global_desc,
@@ -43,6 +44,7 @@ struct DriverStaticConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
                      const ConvDilations& conv_dilations,
                      const InLeftPads& in_left_pads,
                      const InRightPads& in_right_pads_,
+                      Number<activ_type>,
                      const FloatAB* __restrict__ p_wei_global,
                      const FloatAB* __restrict__ p_in_global,
                      FloatC* __restrict__ p_out_global) const
@@ -297,6 +299,7 @@ struct DriverStaticConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
                                                               const FloatAB*,
                                                               const FloatAB*,
                                                               FloatC*,
+                                                               Number<activ_type>,
                                                               integral_constant<bool, true>,
                                                               integral_constant<bool, true>>;

@@ -308,6 +311,7 @@ struct DriverStaticConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
                                  p_wei_global,
                                  p_in_global,
                                  p_out_global,
+                                  Number<activ_type>{},
                                  integral_constant<bool, true>{},
                                  integral_constant<bool, true>{});
                }
@@ -317,6 +321,7 @@ struct DriverStaticConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
                                                               const FloatAB*,
                                                               const FloatAB*,
                                                               FloatC*,
+                                                               Number<activ_type>,
                                                               integral_constant<bool, true>,
                                                               integral_constant<bool, false>>;

@@ -328,6 +333,7 @@ struct DriverStaticConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
                                  p_wei_global,
                                  p_in_global,
                                  p_out_global,
+                                  Number<activ_type>{},
                                  integral_constant<bool, true>{},
                                  integral_constant<bool, false>{});
                }
@@ -337,6 +343,7 @@ struct DriverStaticConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
                                                               const FloatAB*,
                                                               const FloatAB*,
                                                               FloatC*,
+                                                               Number<activ_type>,
                                                               integral_constant<bool, false>,
                                                               integral_constant<bool, true>>;

@@ -348,6 +355,7 @@ struct DriverStaticConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
                                  p_wei_global,
                                  p_in_global,
                                  p_out_global,
+                                  Number<activ_type>{},
                                  integral_constant<bool, false>{},
                                  integral_constant<bool, true>{});
                }
@@ -357,6 +365,7 @@ struct DriverStaticConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
                                                               const FloatAB*,
                                                               const FloatAB*,
                                                               FloatC*,
+                                                               Number<activ_type>,
                                                               integral_constant<bool, false>,
                                                               integral_constant<bool, false>>;

@@ -368,6 +377,7 @@ struct DriverStaticConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
                                  p_wei_global,
                                  p_in_global,
                                  p_out_global,
+                                  Number<activ_type>{},
                                  integral_constant<bool, false>{},
                                  integral_constant<bool, false>{});
                }

--- a/composable_kernel/include/tensor_operation/gridwise_static_gemm_v2.hpp
+++ b/composable_kernel/include/tensor_operation/gridwise_static_gemm_v2.hpp
@@ -72,11 +72,12 @@ struct GridwiseStaticGemm_km_kn_mn_v3
        return a_block_space_size * sizeof(FloatAB);
    }

-    template <bool HasMainKBlockLoop, bool HasDoubleTailKBlockLoop>
+    template <index_t activ_type, bool HasMainKBlockLoop, bool HasDoubleTailKBlockLoop>
    __device__ void Run(const FloatAB* __restrict__ p_a_global,
                        const FloatAB* __restrict__ p_b_global,
                        FloatC* __restrict__ p_c_global,
                        FloatAB* __restrict__ p_shared_block,
+                        Number<activ_type>,
                        integral_constant<bool, HasMainKBlockLoop>,
                        integral_constant<bool, HasDoubleTailKBlockLoop>) const
    {
@@ -348,7 +349,6 @@ struct GridwiseStaticGemm_km_kn_mn_v3

        // activ
        {
-            constexpr index_t activ_type = 2;
            static_for<0, c_k_n_ho_wo_thread_desc.GetElementSpaceSize(), 1>{}([&](auto i) {
                if constexpr(activ_type == 1)
                    c_thread_buf(i) = c_thread_buf[i] >= 0 ? c_thread_buf[i] : 0.0;
@@ -392,10 +392,11 @@ struct GridwiseStaticGemm_km_kn_mn_v3
    }

    // pass tensor descriptor by reference
-    template <bool HasMainKBlockLoop, bool HasDoubleTailKBlockLoop>
+    template <index_t activ_type, bool HasMainKBlockLoop, bool HasDoubleTailKBlockLoop>
    __device__ void Run(const FloatAB* __restrict__ p_a_global,
                        const FloatAB* __restrict__ p_b_global,
                        FloatC* __restrict__ p_c_global,
+                        Number<activ_type>,
                        integral_constant<bool, HasMainKBlockLoop>,
                        integral_constant<bool, HasDoubleTailKBlockLoop>) const
    {
@@ -407,6 +408,7 @@ struct GridwiseStaticGemm_km_kn_mn_v3
            p_b_global,
            p_c_global,
            p_shared_block,
+            Number<activ_type>{},
            integral_constant<bool, HasMainKBlockLoop>{},
            integral_constant<bool, HasDoubleTailKBlockLoop>{});
    }

--- a/host/driver_offline/conv_fwd_driver_offline.cpp
+++ b/host/driver_offline/conv_fwd_driver_offline.cpp
@@ -437,6 +437,8 @@ int main(int argc, char* argv[])
    }
 #endif

+    constexpr ck::index_t activ_type = 2;
+
 #if USE_CONV_FWD_V5R1_NCHW
    if(algo == ConvForwardAlgo::V5R1NCHW)
    {
@@ -452,17 +454,17 @@ int main(int argc, char* argv[])
 #else
        device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw
 #endif
-            <in_data_t, 8, 8, acc_data_t, out_data_t>(tmp[I0],
-                                                      tmp[I1],
-                                                      tmp[I2],
-                                                      tmp[I3],
-                                                      tmp[I4],
-                                                      tmp[I5],
-                                                      tmp[I6],
-                                                      in,
-                                                      wei,
-                                                      out_device,
-                                                      nrepeat);
+            <in_data_t, 8, 8, activ_type, acc_data_t, out_data_t>(tmp[I0],
+                                                                  tmp[I1],
+                                                                  tmp[I2],
+                                                                  tmp[I3],
+                                                                  tmp[I4],
+                                                                  tmp[I5],
+                                                                  tmp[I6],
+                                                                  in,
+                                                                  wei,
+                                                                  out_device,
+                                                                  nrepeat);
    }
 #endif

@@ -529,8 +531,8 @@ int main(int argc, char* argv[])
                                      make_tuple(conv_dilation_h, conv_dilation_w),
                                      make_tuple(in_left_pad_h, in_left_pad_w),
                                      make_tuple(in_right_pad_h, in_right_pad_w),
-                                      layout,
-                                      ActivType_t::sigmoid);
+                                      activ_type,
+                                      layout);

        check_error(out_host, out_device);


--- a/host/driver_offline/include/device_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
+++ b/host/driver_offline/include/device_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
@@ -7,6 +7,7 @@
 template <typename TInWei,
          ck::index_t InWeiVectorSize,
          ck::index_t OutVectorSize,
+          ck::index_t activ_type,
          typename TAcc,
          typename TOut,
          typename InLengths,
@@ -152,6 +153,7 @@ void device_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(
                    conv_dilations,
                    in_left_pads,
                    in_right_pads,
+                    Number<activ_type>{},
                    static_cast<typename vector_type<TInWei, InWeiVectorSize>::type*>(
                        wei_k_c0_y_x_c1_device_buf.GetDeviceBuffer()),
                    static_cast<typename vector_type<TInWei, InWeiVectorSize>::type*>(

--- a/host/host_tensor/include/host_conv.hpp
+++ b/host/host_tensor/include/host_conv.hpp
 #pragma once
 #include "host_tensor.hpp"

-typedef enum
-{
-    passthrough = 0,
-    relu,
-    sigmoid
-} ActivType_t;
-
 template <typename TIn,
          typename TWei,
          typename TOut,
@@ -96,13 +89,13 @@ void host_direct_convolution(const Tensor<TIn>& in,
 }

 template <typename T>
-inline auto activ(T v, const ActivType_t activ_type)
+inline auto activ(T v, const ck::index_t activ_type)
 {
    switch(activ_type)
    {
-    case passthrough: return v;
-    case relu: return (v >= 0 ? v : 0);
-    case sigmoid: return (1 / (1 + exp(-v)));
+    case 0: return v;
+    case 1: return (v >= 0 ? v : 0);
+    case 2: return (1 / (1 + exp(-v)));
    default: throw std::runtime_error("unsupported activ type"); break;
    }
 }
@@ -121,8 +114,8 @@ void host_direct_convolution_activ(const Tensor<TIn>& in,
                                   const ConvDilations& conv_dilations,
                                   const InLeftPads& in_left_pads,
                                   const InRightPads& in_right_pads,
-                                   const ConvTensorLayout layout = ConvTensorLayout::NCHW,
-                                   const ActivType_t activ_type  = ActivType_t::passthrough)
+                                   const ck::index_t activ_type,
+                                   const ConvTensorLayout layout = ConvTensorLayout::NCHW)
 {
    using namespace ck;