Commit 362e470d authored by ltqin's avatar ltqin
Browse files

add sched barrier

parent 9e03ca59
......@@ -587,7 +587,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1
read_a_lds_data();
read_b_last_half_data();
// s_nop();
sched_barrier();
static_for<0, BaseMultK0 / 2, 1>{}([&](auto ii) {
blockwise_gemm.Run(a_thread_buf(Number<ii>{}),
......@@ -598,7 +598,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1
read_a_lds_data();
read_b_first_half_data();
// s_nop();
sched_barrier();
static_for<BaseMultK0 / 2, BaseMultK0, 1>{}([&](auto ii) {
blockwise_gemm.Run(a_thread_buf(Number<ii - BaseMultK0 / 2>{}),
......@@ -626,7 +626,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1
read_a_lds_data();
read_b_last_half_data();
// s_nop();
sched_barrier();
static_for<0, BaseMultK0 / 2, 1>{}([&](auto ii) {
blockwise_gemm.Run(
......@@ -640,7 +640,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_b_lds_v1
read_b_first_half_data();
}
// s_nop();
sched_barrier();
static_for<BaseMultK0 / 2, BaseMultK0, 1>{}([&](auto ii) {
blockwise_gemm.Run(a_thread_buf(Number<ii - BaseMultK0 / 2>{}),
......
......@@ -16,11 +16,15 @@ __device__ void block_sync_lds()
__syncthreads();
#endif
}
__device__ void s_nop()
__device__ void sched_barrier()
{
#if 1
asm volatile("\
s_nop 0 \n \
" ::);
#else
__builtin_amdgcn_sched_barrier(0);
#endif
}
__device__ void s_barrier()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment