"...resnet50_tensorflow.git" did not exist on "2bde248588c8388f9d6cd4d48712ff9ca52d6288"
Commit 031ddf35 authored by coderfeli's avatar coderfeli
Browse files

fix performance regression on blockgemm v3 pipe

parent 400cac28
......@@ -477,6 +477,9 @@ struct BlockwiseGemmXdlops_pipeline_v3<BlockGemmPipelineScheduler::Intrawave,
b_thread_buf_tail);
});
});
HotLoopScheduler();
__builtin_amdgcn_sched_barrier(0);
}
}
......@@ -692,6 +695,9 @@ struct BlockwiseGemmXdlops_pipeline_v3<BlockGemmPipelineScheduler::Intrawave,
});
});
HotLoopScheduler();
__builtin_amdgcn_sched_barrier(0);
static_for<0, KRepeat, 1>{}([&](auto k0) {
static_for<0, MRepeat, 1>{}([&](auto m0) {
static_for<0, NRepeat, 1>{}([&](auto n0) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment