Commit b59d5490 authored by ltqin's avatar ltqin
Browse files

fixed MPerBlock=96

parent 0eed5076
...@@ -165,7 +165,7 @@ struct ThreadwiseTensorSliceTransfer_v1r3 ...@@ -165,7 +165,7 @@ struct ThreadwiseTensorSliceTransfer_v1r3
static_for<1, nDim, 1>{}([&](auto i) { static_for<1, nDim, 1>{}([&](auto i) {
index_t tmp = ordered_access_idx[I0]; index_t tmp = ordered_access_idx[I0];
static_for<0, i, 1>{}([&](auto j) { static_for<1, i, 1>{}([&](auto j) {
tmp = tmp * ordered_access_lengths[j] + ordered_access_idx[j]; tmp = tmp * ordered_access_lengths[j] + ordered_access_idx[j];
}); });
......
...@@ -426,7 +426,6 @@ struct DeviceGemmSplitKXdl ...@@ -426,7 +426,6 @@ struct DeviceGemmSplitKXdl
float ave_time = 0; float ave_time = 0;
const auto Run = [&](const auto& kernel) { const auto Run = [&](const auto& kernel) {
#if CK_RUN_KERNEL_AND_TIME
ave_time = launch_and_time_kernel(kernel, ave_time = launch_and_time_kernel(kernel,
nrepeat, nrepeat,
dim3(grid_size), dim3(grid_size),
...@@ -442,23 +441,29 @@ struct DeviceGemmSplitKXdl ...@@ -442,23 +441,29 @@ struct DeviceGemmSplitKXdl
arg.b_element_op_, arg.b_element_op_,
arg.c_element_op_, arg.c_element_op_,
arg.block_2_ctile_map_); arg.block_2_ctile_map_);
#else if(kbatch > 1)
nrepeat++; {
launch_kernel(kernel, hipGetErrorString(
dim3(grid_size), hipMemset(arg.p_c_grid_,
dim3(BlockSize), 0,
0, arg.c_grid_desc_m0_n0_m1_n1_m2_m3_m4_n2_.GetElementSpaceSize() *
arg.p_a_grid_, sizeof(CDataType)));
arg.p_b_grid_,
arg.p_c_grid_, launch_kernel(kernel,
arg.a_grid_desc_kbatch_k0_m_k1_, dim3(grid_size),
arg.b_grid_desc_kbatch_k0_n_k1_, dim3(BlockSize),
arg.c_grid_desc_m0_n0_m1_n1_m2_m3_m4_n2_, 0,
arg.a_element_op_, arg.p_a_grid_,
arg.b_element_op_, arg.p_b_grid_,
arg.c_element_op_, arg.p_c_grid_,
arg.block_2_ctile_map_); arg.a_grid_desc_kbatch_k0_m_k1_,
#endif arg.b_grid_desc_kbatch_k0_n_k1_,
arg.c_grid_desc_m0_n0_m1_n1_m2_m3_m4_n2_,
arg.a_element_op_,
arg.b_element_op_,
arg.c_element_op_,
arg.block_2_ctile_map_);
}
}; };
if(has_main_k0_block_loop) if(has_main_k0_block_loop)
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment