Commit b4c2a6bb authored by Po-Yen, Chen's avatar Po-Yen, Chen
Browse files

Put partial ds_read reading logics in previous iteration

parent ecef4987
...@@ -77,14 +77,15 @@ struct GridwiseGemmPipeline_v2 ...@@ -77,14 +77,15 @@ struct GridwiseGemmPipeline_v2
{ {
index_t i = 0; index_t i = 0;
block_sync_lds();
blockwise_gemm.PrepareRun(a_block_buf);
do do
{ {
__builtin_amdgcn_iglp_opt(2); // __builtin_amdgcn_iglp_opt(2);
block_sync_lds();
// GEMM i // GEMM i
blockwise_gemm.PrepareRun(a_block_buf);
blockwise_gemm.Run(b_block_buf, c_thread_buf); blockwise_gemm.Run(b_block_buf, c_thread_buf);
block_sync_lds(); block_sync_lds();
...@@ -103,6 +104,10 @@ struct GridwiseGemmPipeline_v2 ...@@ -103,6 +104,10 @@ struct GridwiseGemmPipeline_v2
// global read i + 2 // global read i + 2
b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf); b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf);
block_sync_lds();
blockwise_gemm.PrepareRun(a_block_buf);
++i; ++i;
} while(i < (num_loop - 2)); } while(i < (num_loop - 2));
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment