Commit a9255313 authored by Po-Yen, Chen's avatar Po-Yen, Chen
Browse files

Use macro to decide mfma cluster size

parent 3fea2678
...@@ -100,36 +100,34 @@ struct GridwiseGemmPipeline_v2 ...@@ -100,36 +100,34 @@ struct GridwiseGemmPipeline_v2
// global read i + 2 // global read i + 2
b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf); b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf);
#if defined(ENABLE_PIPELINE_V2_OPT) #if !defined(NUM_MFMA_PER_CLUSTER)
__builtin_amdgcn_sched_group_barrier(0x020, 2, 0); // VMEM read #define NUM_MFMA_PER_CLUSTER 1
__builtin_amdgcn_sched_group_barrier(0x008, 1, 0); // MFMA #endif
__builtin_amdgcn_sched_group_barrier(0x020, 1, 0); // VMEM read
__builtin_amdgcn_sched_group_barrier(0x008, 1, 0); // MFMA
__builtin_amdgcn_sched_group_barrier(0x020, 2, 0); // VMEM read #if defined(ENABLE_PIPELINE_V2_OPT)
__builtin_amdgcn_sched_group_barrier(0x008, 1, 0); // MFMA __builtin_amdgcn_sched_group_barrier(0x020, 2, 0); // VMEM read
__builtin_amdgcn_sched_group_barrier(0x008, NUM_MFMA_PER_CLUSTER, 0); // MFMA
__builtin_amdgcn_sched_group_barrier(0x020, 1, 0); // VMEM read __builtin_amdgcn_sched_group_barrier(0x020, 1, 0); // VMEM read
__builtin_amdgcn_sched_group_barrier(0x008, 1, 0); // MFMA __builtin_amdgcn_sched_group_barrier(0x008, NUM_MFMA_PER_CLUSTER, 0); // MFMA
__builtin_amdgcn_sched_group_barrier(0x020, 2, 0); // VMEM read __builtin_amdgcn_sched_group_barrier(0x020, 2, 0); // VMEM read
__builtin_amdgcn_sched_group_barrier(0x008, 1, 0); // MFMA __builtin_amdgcn_sched_group_barrier(0x008, NUM_MFMA_PER_CLUSTER, 0); // MFMA
__builtin_amdgcn_sched_group_barrier(0x020, 1, 0); // VMEM read __builtin_amdgcn_sched_group_barrier(0x020, 1, 0); // VMEM read
__builtin_amdgcn_sched_group_barrier(0x008, 1, 0); // MFMA __builtin_amdgcn_sched_group_barrier(0x008, NUM_MFMA_PER_CLUSTER, 0); // MFMA
__builtin_amdgcn_sched_group_barrier(0x020, 2, 0); // VMEM read __builtin_amdgcn_sched_group_barrier(0x020, 2, 0); // VMEM read
__builtin_amdgcn_sched_group_barrier(0x008, 1, 0); // MFMA __builtin_amdgcn_sched_group_barrier(0x008, NUM_MFMA_PER_CLUSTER, 0); // MFMA
__builtin_amdgcn_sched_group_barrier(0x020, 1, 0); // VMEM read __builtin_amdgcn_sched_group_barrier(0x020, 1, 0); // VMEM read
__builtin_amdgcn_sched_group_barrier(0x008, 1, 0); // MFMA __builtin_amdgcn_sched_group_barrier(0x008, NUM_MFMA_PER_CLUSTER, 0); // MFMA
__builtin_amdgcn_sched_group_barrier(0x020, 2, 0); // VMEM read __builtin_amdgcn_sched_group_barrier(0x020, 2, 0); // VMEM read
__builtin_amdgcn_sched_group_barrier(0x008, 1, 0); // MFMA __builtin_amdgcn_sched_group_barrier(0x008, NUM_MFMA_PER_CLUSTER, 0); // MFMA
__builtin_amdgcn_sched_group_barrier(0x020, 1, 0); // VMEM read __builtin_amdgcn_sched_group_barrier(0x020, 1, 0); // VMEM read
__builtin_amdgcn_sched_group_barrier(0x008, 1, 0); // MFMA __builtin_amdgcn_sched_group_barrier(0x008, NUM_MFMA_PER_CLUSTER, 0); // MFMA
#endif #endif
++i; ++i;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment