Unverified Commit 9fa379ea authored by Illia Silin's avatar Illia Silin Committed by GitHub
Browse files

Merge pull request #47 from ROCm/navi4x_wmma

Navi4x wmma GEMM
parents 9de63596 9a9cb884
......@@ -10,10 +10,18 @@ namespace ck {
__device__ void block_sync_lds()
{
#if CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
#ifdef __gfx12__
asm volatile("\
s_wait_dscnt 0x0 \n \
s_barrier_signal -1 \n \
s_barrier_wait -1 \
" ::);
#else
asm volatile("\
s_waitcnt lgkmcnt(0) \n \
s_barrier \
" ::);
#endif
#else
__syncthreads();
#endif
......
......@@ -108,8 +108,6 @@ if(DTYPES MATCHES "fp32" OR DTYPES MATCHES "fp64" OR NOT DEFINED DTYPES)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_contraction_scale_instance)
endif()
if(DL_KERNELS)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_multi_d_instance)
endif()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment