test

da207144 · Jing Zhang · 26c42b94 · da207144 · da207144 · da207144
Commit da207144 authored Oct 15, 2021 by Jing Zhang
5 changed files
--- a/composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v2_add.hpp
+++ b/composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v2_add.hpp
@@ -315,6 +315,8 @@ struct GridwiseGemmDlops_km_kn_mn_v3_add

    static constexpr auto NPerBlock = I1;

+    static constexpr FloatAcc alpha = 0.30000001192092896;
+
    __host__ __device__ static constexpr index_t GetSharedMemoryNumberOfByte()
    {
        constexpr auto max_lds_align = Number<ABlockTransferDstScalarPerVector_E2>{};
@@ -995,28 +997,8 @@ struct GridwiseGemmDlops_km_kn_mn_v3_add
            }
        }

-        // activ
-        if constexpr(activ_type > 0)
-        {
-            static_for<0, c_k1_n_h2_w2_thread_gemm_desc.GetElementSpaceSize(), 1>{}([&](auto i) {
-                if constexpr(activ_type == 1)
-                {
-                    c_thread_buf(i) = c_thread_buf[i] >= 0 ? c_thread_buf[i] : 0.0;
-                }
-                else if constexpr(activ_type == 2)
-                {
-                    FloatAcc x = 1.0 + exp(-c_thread_buf[i]);
-
-                    asm volatile("\n \
-                        v_rcp_f32 %0, %1 \n"
-                                 : "=v"(x)
-                                 : "0"(x));
-
-                    c_thread_buf(i) = x;
-                }
-            });
-        }

+        // Bias
        if constexpr(bias_type == 1)
        {
            constexpr auto bias_k0_k1_thread_desc =
@@ -1068,6 +1050,28 @@ struct GridwiseGemmDlops_km_kn_mn_v3_add
 #endif
        }

+        // Activ
+        if constexpr(activ_type > 0)
+        {
+            static_for<0, c_k1_n_h2_w2_thread_gemm_desc.GetElementSpaceSize(), 1>{}([&](auto i) {
+                if constexpr(activ_type == 1)
+                {
+                    c_thread_buf(i) =
+                        c_thread_buf[i] >= 0 ? c_thread_buf[i] : alpha * c_thread_buf[i];
+                }
+                else if constexpr(activ_type == 2)
+                {
+                    FloatAcc x = 1.0 + exp(-c_thread_buf[i]);
+
+                    asm volatile("\n \
+                        v_rcp_f32 %0, %1 \n"
+                                 : "=v"(x)
+                                 : "0"(x));
+
+                    c_thread_buf(i) = x;
+                }
+            });
+        }
 #if 1
        // Output
        if constexpr(out_type == 1)

--- a/host/driver_offline/include/driver_convolution_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
+++ b/host/driver_offline/include/driver_convolution_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
@@ -303,7 +303,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
            decltype(a_e0_e1_k_e2_global_move_slice_window_step_hack),
            decltype(b_e0_e1_n_h0_h1_h2_w0_w1_w2_e2_global_move_slice_window_step_hack),
            activ_type,
-            0, // bias_type
+            1, // bias_type
            1, // out_type
            0  // add_type
            >;

--- a/host/driver_offline/src/conv_fwd_driver_offline_nchwc.cpp
+++ b/host/driver_offline/src/conv_fwd_driver_offline_nchwc.cpp
@@ -263,7 +263,7 @@ int main(int argc, char* argv[])
                          in_right_pads_dev);
    };

-    constexpr ck::ActivTypeEnum_t activ_type = ActivTypeEnum_t::None;
+    constexpr ck::ActivTypeEnum_t activ_type = ActivTypeEnum_t::LeakyRelu;

 #if USE_CONV_FWD_V5R1_NCHWC
    if(algo == ConvForwardAlgo::V5R1NCHWC)

--- a/host/driver_offline/src/conv_maxpool_fwd_driver_offline_nchwc.cpp
+++ b/host/driver_offline/src/conv_maxpool_fwd_driver_offline_nchwc.cpp
@@ -93,9 +93,9 @@ int main(int argc, char* argv[])
    const bool do_log          = std::stoi(argv[4]);
    const int nrepeat          = std::stoi(argv[5]);

-    constexpr index_t activ_type = 0;
+    constexpr index_t activ_type = 1;

-#if 0
+#if 1
    constexpr auto N             = Number<1>{};
    constexpr auto Hi            = Number<1080>{};
    constexpr auto Wi            = Number<1920>{};

--- a/host/host_tensor/include/host_conv.hpp
+++ b/host/host_tensor/include/host_conv.hpp
@@ -4,10 +4,11 @@
 template <typename T>
 inline auto activ(T v, const ck::index_t activ_type)
 {
+    const T alpha = 0.30000001192092896; 
    switch(activ_type)
    {
    case 0: return v;
-    case 1: return (v >= 0 ? v : 0);
+    case 1: return (v >= 0 ? v : alpha * v);
    case 2: return (1 / (1 + exp(-v)));
    default: throw std::runtime_error("unsupported activ type"); break;
    }
@@ -273,7 +274,8 @@ void host_direct_convolution_maxpool_nchwc(const Tensor<TIn>& in,
            }
        }

-        v = activ(v, activ_type) + bias(k0, k1);
+        v += bias(k0, k1);
+        v = activ(v, activ_type); 

        out_host(n, k0, ho, wo, k1) = v;
    };