Commit 9464c5ef authored by Anthony Chang's avatar Anthony Chang
Browse files

use value field from ck::integral_constant

parent 29d881df
...@@ -287,7 +287,7 @@ struct BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1 ...@@ -287,7 +287,7 @@ struct BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1
// NOTE: sync thread at the start of each MAC cluster except for the first MAC cluster // NOTE: sync thread at the start of each MAC cluster except for the first MAC cluster
// we want waves in a workgroup in sync to prevent waves from other workgroups hijacking // we want waves in a workgroup in sync to prevent waves from other workgroups hijacking
// MAC resource // MAC resource
if constexpr(int(k) != 0 || KPerInnerLoop == KPerThread) if constexpr(k.value != 0 || KPerInnerLoop == KPerThread)
{ {
asm volatile("s_barrier" ::); asm volatile("s_barrier" ::);
__builtin_amdgcn_sched_barrier(); __builtin_amdgcn_sched_barrier();
...@@ -318,9 +318,9 @@ struct BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1 ...@@ -318,9 +318,9 @@ struct BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1
// moved here B) reduce VMEM FIFO congestion by applying small delays to // moved here B) reduce VMEM FIFO congestion by applying small delays to
// different wavefronts It is performed near the end of MAC cluster to // different wavefronts It is performed near the end of MAC cluster to
// minimize lgkmcnt penalty // minimize lgkmcnt penalty
if constexpr(int(k) == KPerThread - KPerInnerLoop && if constexpr(k.value == KPerThread - KPerInnerLoop &&
int(k_) == KPerInnerLoop - KPack && int(m0) == MRepeat - 1 && k_.value == KPerInnerLoop - KPack && m0.value == MRepeat - 1 &&
int(n0) == NRepeat - 1) n0.value == NRepeat - 1)
{ {
__builtin_amdgcn_sched_barrier(); __builtin_amdgcn_sched_barrier();
block_sync_lds(); block_sync_lds();
...@@ -333,7 +333,7 @@ struct BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1 ...@@ -333,7 +333,7 @@ struct BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1
a_thread_vec.template AsType<mfma_input_type>(), a_thread_vec.template AsType<mfma_input_type>(),
b_thread_vec.template AsType<mfma_input_type>(), b_thread_vec.template AsType<mfma_input_type>(),
c_thread_buf.GetVectorTypeReference(Number<c_offset>{})); c_thread_buf.GetVectorTypeReference(Number<c_offset>{}));
if constexpr(int(k_) == 0 && int(m0) == 0 && int(n0) == 0) if constexpr(k_.value == 0 && m0.value == 0 && n0.value == 0)
{ {
__builtin_amdgcn_sched_barrier(); __builtin_amdgcn_sched_barrier();
__builtin_amdgcn_s_setprio(1); __builtin_amdgcn_s_setprio(1);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment