clean

cb6475c7 · Chao Liu · 6ff3fe5d · cb6475c7 · cb6475c7
Commit cb6475c7 authored Aug 23, 2019 by Chao Liu
2 changed files
--- a/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_padded.hpp
+++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_padded.hpp
@@ -138,7 +138,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded
        constexpr auto out_k_h_w_n_thread_desc = make_ConstantTensorDescriptor_packed(
            Sequence<KPerThread, HoPerThread, WoPerThread, NPerThread>{});
-#if 0
+#if 1
        // blockwise input copy
        //   format is [C, Hi, Wi, N]
        auto blockwise_in_copy =
@@ -180,7 +180,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded
                {0, 0, 0, 0});
 #endif
-#if 0
+#if 1
        // blockwise wei copy
        //   format is [CPerBlock, KPerBlock]
        const auto blockwise_wei_copy =
@@ -278,7 +278,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded
        // set threadwise output tensor to 0
        threadwise_matrix_set_zero(c_k_wn_thread_mtx_desc, p_out_thread);
-#if 0
+#if 1
        for(index_t y = 0; y < Y; ++y)
        {
            for(index_t x = 0; x < X; ++x)
@@ -318,10 +318,8 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded
                for(index_t c_block_data_begin = 0; c_block_data_begin < C;
                    c_block_data_begin += CPerBlock)
                {
-#if 1 // debug
                    blockwise_in_copy.Run();
                    blockwise_wei_copy.Run();
-#endif
                    __syncthreads();

--- a/driver/src/driver.cpp
+++ b/driver/src/driver.cpp
@@ -74,7 +74,7 @@ int main(int argc, char* argv[])
 #if 1
    constexpr index_t N  = 64;
-    constexpr index_t C  = 8;
+    constexpr index_t C  = 1536;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 256;
@@ -368,7 +368,7 @@ int main(int argc, char* argv[])
 #if 0
    device_convolution_direct_v2_nchw_kcyx_nkhw
        (in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
-#elif 0
+#elif 1
    device_convolution_implicit_gemm_v1_chwn_cyxk_khwn(
        in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
 #elif 1