Commit 67ae9716 authored by Po-Yen, Chen's avatar Po-Yen, Chen
Browse files

Fix scheduling of ds_read & buffer_load

parent a4996831
...@@ -51,10 +51,14 @@ struct GridwiseGemmPipeline_v2 ...@@ -51,10 +51,14 @@ struct GridwiseGemmPipeline_v2
CThreadBuffer& c_thread_buf, CThreadBuffer& c_thread_buf,
index_t num_loop) index_t num_loop)
{ {
__builtin_amdgcn_sched_barrier(0);
// global read 0 // global read 0
a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf); a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf);
b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf); b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf);
__builtin_amdgcn_sched_barrier(0);
// move to 1 // move to 1
a_blockwise_copy.MoveSrcSliceWindow(a_grid_desc, a_block_copy_step); a_blockwise_copy.MoveSrcSliceWindow(a_grid_desc, a_block_copy_step);
b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step); b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step);
...@@ -62,16 +66,22 @@ struct GridwiseGemmPipeline_v2 ...@@ -62,16 +66,22 @@ struct GridwiseGemmPipeline_v2
// Initialize C // Initialize C
c_thread_buf.Clear(); c_thread_buf.Clear();
__builtin_amdgcn_sched_barrier(0);
// LDS write 0 // LDS write 0
a_blockwise_copy.RunWrite(a_block_desc, a_block_buf); a_blockwise_copy.RunWrite(a_block_desc, a_block_buf);
// global Read 1 // global Read 1
a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf); a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf);
__builtin_amdgcn_sched_barrier(0);
// LDS write 0 // LDS write 0
b_blockwise_copy.RunWrite(b_block_desc, b_block_buf); b_blockwise_copy.RunWrite(b_block_desc, b_block_buf);
// global Read 1 // global Read 1
b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf); b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf);
__builtin_amdgcn_sched_barrier(0);
// main body // main body
if constexpr(HasMainLoop) if constexpr(HasMainLoop)
{ {
...@@ -90,16 +100,22 @@ struct GridwiseGemmPipeline_v2 ...@@ -90,16 +100,22 @@ struct GridwiseGemmPipeline_v2
a_blockwise_copy.MoveSrcSliceWindow(a_grid_desc, a_block_copy_step); a_blockwise_copy.MoveSrcSliceWindow(a_grid_desc, a_block_copy_step);
b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step); b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step);
__builtin_amdgcn_sched_barrier(0);
// LDS write i + 1 // LDS write i + 1
a_blockwise_copy.RunWrite(a_block_desc, a_block_buf); a_blockwise_copy.RunWrite(a_block_desc, a_block_buf);
// global read i + 2 // global read i + 2
a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf); a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf);
__builtin_amdgcn_sched_barrier(0);
// LDS write i + 1 // LDS write i + 1
b_blockwise_copy.RunWrite(b_block_desc, b_block_buf); b_blockwise_copy.RunWrite(b_block_desc, b_block_buf);
// global read i + 2 // global read i + 2
b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf); b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf);
__builtin_amdgcn_sched_barrier(0);
++i; ++i;
} while(i < (num_loop - 2)); } while(i < (num_loop - 2));
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment