Commit 34321734 authored by Jing Zhang's avatar Jing Zhang
Browse files

fixed comments

parent 5f11dccc
...@@ -180,7 +180,6 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad ...@@ -180,7 +180,6 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
Sequence<0, 0, 0, 0, 0>{}, Sequence<0, 0, 0, 0, 0>{},
Sequence<0, 0, 0, 0, 0>{})); Sequence<0, 0, 0, 0, 0>{}));
#if 1
// GEMM // GEMM
using gridwise_gemm = GridwiseDynamicGemm_km_kn_mn_v3< using gridwise_gemm = GridwiseDynamicGemm_km_kn_mn_v3<
BlockSize, BlockSize,
...@@ -363,7 +362,6 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad ...@@ -363,7 +362,6 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
std::cout << "Average time : " << ave_time << " ms, " << perf << " TFlop/s" std::cout << "Average time : " << ave_time << " ms, " << perf << " TFlop/s"
<< std::endl; << std::endl;
} }
#endif
} }
}; };
} // namespace ck } // namespace ck
......
...@@ -133,12 +133,13 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3 ...@@ -133,12 +133,13 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3
constexpr auto EPerBlock = a_block_mtx.GetLength(I0); constexpr auto EPerBlock = a_block_mtx.GetLength(I0);
constexpr auto KPerThreadSubC = 4; constexpr auto KPerThreadSubC = 4;
constexpr auto HPerThreadSubC = 2;
constexpr auto WPerThreadSubC = 2; constexpr auto HoPerThreadSubC = 2;
constexpr auto WoPerThreadSubC = 2;
static_assert(KPerThread % KPerThreadSubC == 0, ""); static_assert(KPerThread % KPerThreadSubC == 0, "");
static_assert(HPerThread % HPerThreadSubC == 0, ""); static_assert(HPerThread % HoPerThreadSubC == 0, "");
static_assert(WPerThread % WPerThreadSubC == 0, ""); static_assert(WPerThread % WoPerThreadSubC == 0, "");
// thread A, B for GEMM // thread A, B for GEMM
constexpr auto a_thread_mtx = make_dynamic_naive_tensor_descriptor_packed_v2( constexpr auto a_thread_mtx = make_dynamic_naive_tensor_descriptor_packed_v2(
...@@ -161,8 +162,8 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3 ...@@ -161,8 +162,8 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3
constexpr auto threadwise_gemm = ThreadwiseGemm_km_kn_mn_v3<decltype(a_thread_mtx), constexpr auto threadwise_gemm = ThreadwiseGemm_km_kn_mn_v3<decltype(a_thread_mtx),
decltype(b_thread_mtx), decltype(b_thread_mtx),
decltype(c_thread_mtx), decltype(c_thread_mtx),
HPerThreadSubC, HoPerThreadSubC,
WPerThreadSubC>{}; WoPerThreadSubC>{};
// loop over k // loop over k
#pragma unroll #pragma unroll
for(index_t e_begin = 0; e_begin < EPerBlock; e_begin += EPerThreadLoop) for(index_t e_begin = 0; e_begin < EPerBlock; e_begin += EPerThreadLoop)
...@@ -176,10 +177,10 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3 ...@@ -176,10 +177,10 @@ struct BlockwiseGemm_km_kn_m0m1n0n1_v3
p_a_thread); p_a_thread);
#pragma unroll #pragma unroll
for(index_t h_begin = 0; h_begin < HPerThread; h_begin += HPerThreadSubC) for(index_t h_begin = 0; h_begin < HPerThread; h_begin += HoPerThreadSubC)
{ {
#pragma unroll #pragma unroll
for(index_t w_begin = 0; w_begin < WPerThread; w_begin += WPerThreadSubC) for(index_t w_begin = 0; w_begin < WPerThread; w_begin += WoPerThreadSubC)
{ {
threadwise_gemm.Run(p_a_thread, threadwise_gemm.Run(p_a_thread,
p_b_thread + b_thread_mtx.CalculateOffset(make_tuple( p_b_thread + b_thread_mtx.CalculateOffset(make_tuple(
......
...@@ -36,9 +36,9 @@ ...@@ -36,9 +36,9 @@
#endif #endif
// buffer resourse // buffer resourse
#if CK_AMD_GPU_GFX906 || CK_AMD_GPU_GFX908 #if defined(CK_AMD_GPU_GFX906) || defined(CK_AMD_GPU_GFX908)
#define CK_BUFFER_RESOURCE_3RD_DWORD 0x00020000 #define CK_BUFFER_RESOURCE_3RD_DWORD 0x00020000
#elif CK_AMD_GPU_GFX1030 #elif defined(CK_AMD_GPU_GFX1030)
#define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000 #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000
#endif #endif
......
...@@ -164,26 +164,18 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw( ...@@ -164,26 +164,18 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(
#endif #endif
constexpr auto conv_driver = constexpr auto conv_driver =
// DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad< #if 0
DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad<
#else
DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad< DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad<
BlockSize, #endif
typename vector_type<TInWei, InWeiVectorSize>::type, BlockSize,
TAcc, typename vector_type<TInWei, InWeiVectorSize>::type, TAcc, TOut, KPerBlock,
TOut, HoPerBlock, WoPerBlock, EPerBlock, KPerThread, HoPerThread, WoPerThread,
KPerBlock, EPerThread, ABlockTransferThreadSliceLengths_E_K,
HoPerBlock, ABlockTransferThreadClusterLengths_E_K, ABlockTransferSrcScalarPerVector_E,
WoPerBlock, ABlockTransferDstScalarPerVector_K, BThreadTransferSrcScalarPerVector_W,
EPerBlock, CThreadTransferDstScalarPerVector_W > {};
KPerThread,
HoPerThread,
WoPerThread,
EPerThread,
ABlockTransferThreadSliceLengths_E_K,
ABlockTransferThreadClusterLengths_E_K,
ABlockTransferSrcScalarPerVector_E,
ABlockTransferDstScalarPerVector_K,
BThreadTransferSrcScalarPerVector_W,
CThreadTransferDstScalarPerVector_W>{};
conv_driver.Run(wei_k_c0_y_x_desc, conv_driver.Run(wei_k_c0_y_x_desc,
in_n_c0_hi_wi_desc, in_n_c0_hi_wi_desc,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment