Commit cbf68933 authored by Anthony Chang's avatar Anthony Chang
Browse files

set up inter-wave configuration

parent 64af1c84
...@@ -126,7 +126,7 @@ ...@@ -126,7 +126,7 @@
#define CK_EXPERIMENTAL_USE_MEMCPY_FOR_BIT_CAST 1 #define CK_EXPERIMENTAL_USE_MEMCPY_FOR_BIT_CAST 1
// experimental feature: optimize for inter-wave scheduling policy // experimental feature: optimize for inter-wave scheduling policy
#define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING 0 #define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING 1
#define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING_MAC_CLUSTERS 1 #define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING_MAC_CLUSTERS 1
// hack: have underlying assumption that need to be satsified, otherwise it's a bug // hack: have underlying assumption that need to be satsified, otherwise it's a bug
......
...@@ -557,6 +557,10 @@ struct BlockwiseGemmXdlopsInterwave_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1 ...@@ -557,6 +557,10 @@ struct BlockwiseGemmXdlopsInterwave_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1
AThreadCopy a_thread_copy_{CalculateAThreadOriginDataIndex()}; AThreadCopy a_thread_copy_{CalculateAThreadOriginDataIndex()};
BThreadCopy b_thread_copy_{CalculateBThreadOriginDataIndex()}; BThreadCopy b_thread_copy_{CalculateBThreadOriginDataIndex()};
#else
static_assert(false,
"Configuration error: using BlockwiseGemmXdlopsInterwave without defining "
"macro CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING=1");
#endif // #if CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING #endif // #if CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING
}; };
......
...@@ -426,9 +426,8 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3 ...@@ -426,9 +426,8 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3
// c_mtx[MPerBlock, NPerBlock] is distributed among threads, and saved in // c_mtx[MPerBlock, NPerBlock] is distributed among threads, and saved in
// register // register
// sanity check // sanity check
auto blockwise_gemm = BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_Selector<
auto blockwise_gemm = BlockSize,
BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1<BlockSize,
FloatAB, FloatAB,
FloatAcc, FloatAcc,
decltype(a_block_desc_k0_m_k1), decltype(a_block_desc_k0_m_k1),
...@@ -437,7 +436,8 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3 ...@@ -437,7 +436,8 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3
NPerXDL, NPerXDL,
MXdlPerWave, MXdlPerWave,
NXdlPerWave, NXdlPerWave,
K1>{}; K1,
LoopSched>();
auto c_thread_buf = blockwise_gemm.GetCThreadBuffer(); auto c_thread_buf = blockwise_gemm.GetCThreadBuffer();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment