"include/vscode:/vscode.git/clone" did not exist on "091570f594e6e6b7109e290ce878f4f9b8ad1e9f"
Commit 07237cab authored by root's avatar root
Browse files

tweak

parent 53f322f3
...@@ -102,6 +102,8 @@ struct GridwiseDynamicGemm_km_kn_mn_v3 ...@@ -102,6 +102,8 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
const index_t k_block_work_id = get_block_1d_id() / hwo_block_work_num; const index_t k_block_work_id = get_block_1d_id() / hwo_block_work_num;
const index_t hwo_block_work_id = get_block_1d_id() - k_block_work_id * hwo_block_work_num; const index_t hwo_block_work_id = get_block_1d_id() - k_block_work_id * hwo_block_work_num;
const index_t ho_block_work_id = hwo_block_work_id / wo_block_work_num;
const index_t wo_block_work_id = hwo_block_work_id - ho_block_work_id * wo_block_work_num;
#else #else
// Hack: this force result into SGPR // Hack: this force result into SGPR
const index_t k_block_work_num = __builtin_amdgcn_readfirstlane(K / KPerBlock); const index_t k_block_work_num = __builtin_amdgcn_readfirstlane(K / KPerBlock);
...@@ -112,10 +114,11 @@ struct GridwiseDynamicGemm_km_kn_mn_v3 ...@@ -112,10 +114,11 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
const index_t k_block_work_id = const index_t k_block_work_id =
__builtin_amdgcn_readfirstlane(get_block_1d_id() / hwo_block_work_num); __builtin_amdgcn_readfirstlane(get_block_1d_id() / hwo_block_work_num);
const index_t hwo_block_work_id = get_block_1d_id() - k_block_work_id * hwo_block_work_num; const index_t hwo_block_work_id = get_block_1d_id() - k_block_work_id * hwo_block_work_num;
#endif
const index_t ho_block_work_id = hwo_block_work_id / wo_block_work_num; const index_t ho_block_work_id =
__builtin_amdgcn_readfirstlane(hwo_block_work_id / wo_block_work_num);
const index_t wo_block_work_id = hwo_block_work_id - ho_block_work_id * wo_block_work_num; const index_t wo_block_work_id = hwo_block_work_id - ho_block_work_id * wo_block_work_num;
#endif
// lds max alignment // lds max alignment
constexpr auto max_lds_align = constexpr auto max_lds_align =
......
...@@ -85,7 +85,7 @@ ...@@ -85,7 +85,7 @@
// experimental implementation // experimental implementation
#ifndef CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK #ifndef CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK
#define CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK 1 #define CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK 0
#endif #endif
#ifndef CK_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK #ifndef CK_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK
......
...@@ -76,8 +76,8 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(InDesc ...@@ -76,8 +76,8 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(InDesc
constexpr index_t EPerBlock = 2; constexpr index_t EPerBlock = 2;
constexpr index_t KPerThread = 16; constexpr index_t KPerThread = 16;
constexpr index_t HoPerThread = 4; constexpr index_t HoPerThread = 2;
constexpr index_t WoPerThread = 1; constexpr index_t WoPerThread = 2;
constexpr index_t EPerThread = 2; constexpr index_t EPerThread = 2;
using GemmABlockTransferThreadSliceLengths_GemmK_GemmM = Sequence<9, 1>; using GemmABlockTransferThreadSliceLengths_GemmK_GemmM = Sequence<9, 1>;
......
...@@ -719,7 +719,7 @@ int main(int argc, char* argv[]) ...@@ -719,7 +719,7 @@ int main(int argc, char* argv[])
LeftPads{}, LeftPads{},
RightPads{}, RightPads{},
nrepeat); nrepeat);
#elif 1 #elif 0
device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw<in_data_t, device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw<in_data_t,
in_vector_size, in_vector_size,
acc_data_t, acc_data_t,
...@@ -736,7 +736,7 @@ int main(int argc, char* argv[]) ...@@ -736,7 +736,7 @@ int main(int argc, char* argv[])
LeftPads{}, LeftPads{},
RightPads{}, RightPads{},
nrepeat); nrepeat);
#elif 1 #elif 0
device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk<in_data_t, device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk<in_data_t,
in_vector_size, in_vector_size,
acc_data_t, acc_data_t,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment