Commit 8831b0d8 authored by Jing Zhang's avatar Jing Zhang
Browse files

fixed block_sync_lds

parent 28d672d5
......@@ -155,7 +155,7 @@
#define CK_USE_SR_F8_CONVERSION 1
// block synchronization only s_wait lgkmcnt(0), not vmcnt(0)
#define CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM 0
#define CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM 1
// experimental feature: multi index implemented as array
#define CK_EXPERIMENTAL_USE_DYNAMICALLY_INDEXED_MULTI_INDEX 0
......
......@@ -85,9 +85,6 @@ inline bool is_navi3_supported()
ck::get_device_name() == "gfx1102" || ck::get_device_name() == "gfx1103";
}
inline bool is_navi4_supported()
{
return ck::get_device_name() == "gfx1200";
}
inline bool is_navi4_supported() { return ck::get_device_name() == "gfx1200"; }
} // namespace ck
......@@ -10,10 +10,18 @@ namespace ck {
__device__ void block_sync_lds()
{
#if CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
#ifdef __gfx12__
asm volatile("\
s_wait_dscnt 0x0 \n \
s_barrier_signal -1 \n \
s_barrier_wait -1 \
" ::);
#else
asm volatile("\
s_waitcnt lgkmcnt(0) \n \
s_barrier \
" ::);
#endif
#else
__syncthreads();
#endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment