"...composable_kernel.git" did not exist on "3da5c19e629174c234fe86c17ebd04732ea548b7"
Commit 031ddf35 authored by coderfeli's avatar coderfeli
Browse files

fix performance regression on blockgemm v3 pipe

parent 400cac28
...@@ -477,6 +477,9 @@ struct BlockwiseGemmXdlops_pipeline_v3<BlockGemmPipelineScheduler::Intrawave, ...@@ -477,6 +477,9 @@ struct BlockwiseGemmXdlops_pipeline_v3<BlockGemmPipelineScheduler::Intrawave,
b_thread_buf_tail); b_thread_buf_tail);
}); });
}); });
HotLoopScheduler();
__builtin_amdgcn_sched_barrier(0);
} }
} }
...@@ -692,6 +695,9 @@ struct BlockwiseGemmXdlops_pipeline_v3<BlockGemmPipelineScheduler::Intrawave, ...@@ -692,6 +695,9 @@ struct BlockwiseGemmXdlops_pipeline_v3<BlockGemmPipelineScheduler::Intrawave,
}); });
}); });
HotLoopScheduler();
__builtin_amdgcn_sched_barrier(0);
static_for<0, KRepeat, 1>{}([&](auto k0) { static_for<0, KRepeat, 1>{}([&](auto k0) {
static_for<0, MRepeat, 1>{}([&](auto m0) { static_for<0, MRepeat, 1>{}([&](auto m0) {
static_for<0, NRepeat, 1>{}([&](auto n0) { static_for<0, NRepeat, 1>{}([&](auto n0) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment