Commit cb6475c7 authored by Chao Liu's avatar Chao Liu
Browse files

clean

parent 6ff3fe5d
...@@ -138,7 +138,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded ...@@ -138,7 +138,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded
constexpr auto out_k_h_w_n_thread_desc = make_ConstantTensorDescriptor_packed( constexpr auto out_k_h_w_n_thread_desc = make_ConstantTensorDescriptor_packed(
Sequence<KPerThread, HoPerThread, WoPerThread, NPerThread>{}); Sequence<KPerThread, HoPerThread, WoPerThread, NPerThread>{});
#if 0 #if 1
// blockwise input copy // blockwise input copy
// format is [C, Hi, Wi, N] // format is [C, Hi, Wi, N]
auto blockwise_in_copy = auto blockwise_in_copy =
...@@ -180,7 +180,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded ...@@ -180,7 +180,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded
{0, 0, 0, 0}); {0, 0, 0, 0});
#endif #endif
#if 0 #if 1
// blockwise wei copy // blockwise wei copy
// format is [CPerBlock, KPerBlock] // format is [CPerBlock, KPerBlock]
const auto blockwise_wei_copy = const auto blockwise_wei_copy =
...@@ -278,7 +278,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded ...@@ -278,7 +278,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded
// set threadwise output tensor to 0 // set threadwise output tensor to 0
threadwise_matrix_set_zero(c_k_wn_thread_mtx_desc, p_out_thread); threadwise_matrix_set_zero(c_k_wn_thread_mtx_desc, p_out_thread);
#if 0 #if 1
for(index_t y = 0; y < Y; ++y) for(index_t y = 0; y < Y; ++y)
{ {
for(index_t x = 0; x < X; ++x) for(index_t x = 0; x < X; ++x)
...@@ -318,10 +318,8 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded ...@@ -318,10 +318,8 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded
for(index_t c_block_data_begin = 0; c_block_data_begin < C; for(index_t c_block_data_begin = 0; c_block_data_begin < C;
c_block_data_begin += CPerBlock) c_block_data_begin += CPerBlock)
{ {
#if 1 // debug
blockwise_in_copy.Run(); blockwise_in_copy.Run();
blockwise_wei_copy.Run(); blockwise_wei_copy.Run();
#endif
__syncthreads(); __syncthreads();
......
...@@ -74,7 +74,7 @@ int main(int argc, char* argv[]) ...@@ -74,7 +74,7 @@ int main(int argc, char* argv[])
#if 1 #if 1
constexpr index_t N = 64; constexpr index_t N = 64;
constexpr index_t C = 8; constexpr index_t C = 1536;
constexpr index_t HI = 8; constexpr index_t HI = 8;
constexpr index_t WI = 8; constexpr index_t WI = 8;
constexpr index_t K = 256; constexpr index_t K = 256;
...@@ -368,7 +368,7 @@ int main(int argc, char* argv[]) ...@@ -368,7 +368,7 @@ int main(int argc, char* argv[])
#if 0 #if 0
device_convolution_direct_v2_nchw_kcyx_nkhw device_convolution_direct_v2_nchw_kcyx_nkhw
(in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat); (in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
#elif 0 #elif 1
device_convolution_implicit_gemm_v1_chwn_cyxk_khwn( device_convolution_implicit_gemm_v1_chwn_cyxk_khwn(
in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat); in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
#elif 1 #elif 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment