"test/vscode:/vscode.git/clone" did not exist on "028171e90ab7b0002ffe62afc0c7f4f1cd44ba2a"
Commit bfdda4fe authored by illsilin's avatar illsilin
Browse files

update s_barrier and s_waitcnt for gfx12

parent 22509c0b
......@@ -479,7 +479,14 @@ struct BlockwiseGemmXdlopsInterwave_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1
// sync point.
if constexpr(k.value != 0 || KPerInnerLoop == KPerThread)
{
#ifdef __gfx12__
asm volatile("\
s_barrier_signal \n \
s_barrier_wait \
" ::);
#else
asm volatile("s_barrier" ::);
#endif
__builtin_amdgcn_sched_barrier(0);
}
static_for<0, KPerInnerLoop, KPack>{}([&](auto k_) {
......
......@@ -10,10 +10,18 @@ namespace ck {
__device__ void block_sync_lds()
{
#if CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
#ifdef __gfx12__
asm volatile("\
s_wait_idle lgkmcnt(0) \n \
s_barrier_signal \n \
s_barrier_wait \
" ::);
#else
asm volatile("\
s_waitcnt lgkmcnt(0) \n \
s_barrier \
" ::);
#endif
#else
__syncthreads();
#endif
......@@ -21,11 +29,20 @@ __device__ void block_sync_lds()
__device__ void block_sync_lds_direct_load()
{
#ifdef __gfx12__
asm volatile("\
s_wait_idle vmcnt(0) \n \
s_wait_idle lgkmcnt(0) \n \
s_barrier_signal \n \
s_barrier_wait \
" ::);
#else
asm volatile("\
s_waitcnt vmcnt(0) \n \
s_waitcnt lgkmcnt(0) \n \
s_barrier \
" ::);
#endif
}
__device__ void s_nop()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment