Commit c62165da authored by Anthony Chang's avatar Anthony Chang
Browse files

comments

parent 462399b9
...@@ -611,10 +611,11 @@ struct GridwiseBatchedGemmGemm_Xdl_CShuffle ...@@ -611,10 +611,11 @@ struct GridwiseBatchedGemmGemm_Xdl_CShuffle
MXdlPerWave, MXdlPerWave,
Gemm1NXdlPerWave, Gemm1NXdlPerWave,
Gemm1KPack, Gemm1KPack,
false, false, // TransposeC
Gemm1KPack, // AMmaKStride Gemm1KPack, // AMmaKStride
Gemm1KPack * XdlopsGemm<FloatAB, MPerXdl, NPerXdl, Gemm1KPack, false>{}.K0PerXdlops>{ Gemm1KPack * XdlopsGemm<FloatAB, MPerXdl, NPerXdl, Gemm1KPack, false>{}.K0PerXdlops>{
make_tuple(0, 0, 0, 0)}; // TransposeC // BMmaKStride
make_tuple(0, 0, 0, 0)}; // A_origin
auto c_thread_buf = gemm1_blockwise_gemm.GetCThreadBuffer(); auto c_thread_buf = gemm1_blockwise_gemm.GetCThreadBuffer();
...@@ -699,6 +700,7 @@ struct GridwiseBatchedGemmGemm_Xdl_CShuffle ...@@ -699,6 +700,7 @@ struct GridwiseBatchedGemmGemm_Xdl_CShuffle
a1_thread_desc_k0_m_k1, a1_thread_desc_k0_m_k1,
make_tuple(I0, I0, I0), make_tuple(I0, I0, I0),
a1_thread_buf); a1_thread_buf);
block_sync_lds(); block_sync_lds();
gemm1_blockwise_gemm.Run(a1_thread_buf, b1_block_buf, c_thread_buf); gemm1_blockwise_gemm.Run(a1_thread_buf, b1_block_buf, c_thread_buf);
......
...@@ -617,7 +617,8 @@ struct GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle ...@@ -617,7 +617,8 @@ struct GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle
true, // TransposeC true, // TransposeC
Gemm1KPack, // AMmaKStride Gemm1KPack, // AMmaKStride
Gemm1KPack * XdlopsGemm<FloatAB, MPerXdl, NPerXdl, Gemm1KPack, false>{}.K0PerXdlops>{ Gemm1KPack * XdlopsGemm<FloatAB, MPerXdl, NPerXdl, Gemm1KPack, false>{}.K0PerXdlops>{
make_tuple(0, 0, 0, 0)}; // TransposeC // BMmaKStride
make_tuple(0, 0, 0, 0)}; // A_origin
auto acc1_thread_buf = gemm1_blockwise_gemm.GetCThreadBuffer(); auto acc1_thread_buf = gemm1_blockwise_gemm.GetCThreadBuffer();
...@@ -735,7 +736,7 @@ struct GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle ...@@ -735,7 +736,7 @@ struct GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle
b1_blockwise_copy.MoveSrcSliceWindow(b1_grid_desc_bk0_n_bk1, b1_blockwise_copy.MoveSrcSliceWindow(b1_grid_desc_bk0_n_bk1,
b1_block_slice_copy_step); b1_block_slice_copy_step);
block_sync_lds(); // wait for gemm0 LDS read block_sync_lds(); // wait for reduction LDS read
b1_blockwise_copy.RunWrite(b1_block_desc_bk0_n_bk1, b1_block_buf); b1_blockwise_copy.RunWrite(b1_block_desc_bk0_n_bk1, b1_block_buf);
...@@ -774,6 +775,7 @@ struct GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle ...@@ -774,6 +775,7 @@ struct GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle
a1_thread_desc_k0_m_k1, a1_thread_desc_k0_m_k1,
make_tuple(I0, I0, I0), make_tuple(I0, I0, I0),
a1_thread_buf); a1_thread_buf);
block_sync_lds(); block_sync_lds();
gemm1_blockwise_gemm.Run(a1_thread_buf, b1_block_buf, acc1_thread_buf); gemm1_blockwise_gemm.Run(a1_thread_buf, b1_block_buf, acc1_thread_buf);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment