Commit 26b4fe97 authored by ltqin's avatar ltqin
Browse files

change load data order

parent 82b6858d
...@@ -586,13 +586,15 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3r1 ...@@ -586,13 +586,15 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3r1
b_block_slice_copy_step, b_block_slice_copy_step,
b_k0_n_k1_grid_move_slice_window_step_hack); b_k0_n_k1_grid_move_slice_window_step_hack);
block_sync_lds();
// LDS double buffer: load last data from device mem // LDS double buffer: load last data from device mem
a_blockwise_copy.RunRead( a_blockwise_copy.RunRead(
a_grid_desc_k0_m_k1, a_grid_buf, a_k0_m_k1_grid_step_hacks); a_grid_desc_k0_m_k1, a_grid_buf, a_k0_m_k1_grid_step_hacks);
block_sync_lds();
b_blockwise_copy.RunRead( b_blockwise_copy.RunRead(
b_grid_desc_k0_n_k1, b_grid_buf, b_k0_n_k1_grid_step_hacks); b_grid_desc_k0_n_k1, b_grid_buf, b_k0_n_k1_grid_step_hacks);
// gemm even data // gemm even data
blockwise_gemm.Run(a_block_even_buf, b_block_even_buf, c_thread_buf); blockwise_gemm.Run(a_block_even_buf, b_block_even_buf, c_thread_buf);
// write data into odd buffer // write data into odd buffer
...@@ -607,13 +609,15 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3r1 ...@@ -607,13 +609,15 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3r1
b_block_slice_copy_step, b_block_slice_copy_step,
b_k0_n_k1_grid_move_slice_window_step_hack); b_k0_n_k1_grid_move_slice_window_step_hack);
block_sync_lds();
// LDS double buffer: load last data from device mem // LDS double buffer: load last data from device mem
a_blockwise_copy.RunRead( a_blockwise_copy.RunRead(
a_grid_desc_k0_m_k1, a_grid_buf, a_k0_m_k1_grid_step_hacks); a_grid_desc_k0_m_k1, a_grid_buf, a_k0_m_k1_grid_step_hacks);
block_sync_lds();
b_blockwise_copy.RunRead( b_blockwise_copy.RunRead(
b_grid_desc_k0_n_k1, b_grid_buf, b_k0_n_k1_grid_step_hacks); b_grid_desc_k0_n_k1, b_grid_buf, b_k0_n_k1_grid_step_hacks);
// gemm odd data // gemm odd data
blockwise_gemm.Run(a_block_odd_buf, b_block_odd_buf, c_thread_buf); blockwise_gemm.Run(a_block_odd_buf, b_block_odd_buf, c_thread_buf);
// write data into even buffer // write data into even buffer
...@@ -634,12 +638,12 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3r1 ...@@ -634,12 +638,12 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3r1
b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc_k0_n_k1, b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc_k0_n_k1,
b_block_slice_copy_step, b_block_slice_copy_step,
b_k0_n_k1_grid_move_slice_window_step_hack); b_k0_n_k1_grid_move_slice_window_step_hack);
block_sync_lds();
// LDS double buffer: load last data from device mem // LDS double buffer: load last data from device mem
a_blockwise_copy.RunRead(a_grid_desc_k0_m_k1, a_grid_buf, a_k0_m_k1_grid_step_hacks); a_blockwise_copy.RunRead(a_grid_desc_k0_m_k1, a_grid_buf, a_k0_m_k1_grid_step_hacks);
block_sync_lds();
b_blockwise_copy.RunRead(b_grid_desc_k0_n_k1, b_grid_buf, b_k0_n_k1_grid_step_hacks); b_blockwise_copy.RunRead(b_grid_desc_k0_n_k1, b_grid_buf, b_k0_n_k1_grid_step_hacks);
// gemm even data // gemm even data
blockwise_gemm.Run(a_block_even_buf, b_block_even_buf, c_thread_buf); blockwise_gemm.Run(a_block_even_buf, b_block_even_buf, c_thread_buf);
// write data into odd buffer // write data into odd buffer
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment