Commit 59e3cb05 authored by Qianfeng Zhang's avatar Qianfeng Zhang
Browse files

Remove last block_sync_lds() in the loop

parent 3ee41b40
...@@ -426,8 +426,6 @@ struct BlockFmhaPipelineQRKSVSAsync ...@@ -426,8 +426,6 @@ struct BlockFmhaPipelineQRKSVSAsync
} }
move_tile_window(v_dram_window, {0, kK1}); move_tile_window(v_dram_window, {0, kK1});
__builtin_amdgcn_sched_barrier(0);
if constexpr(NumVLdsBuffers > 1) if constexpr(NumVLdsBuffers > 1)
{ {
v_buf = load_tile(v_dram_window); // load next v_buf v_buf = load_tile(v_dram_window); // load next v_buf
...@@ -623,7 +621,6 @@ struct BlockFmhaPipelineQRKSVSAsync ...@@ -623,7 +621,6 @@ struct BlockFmhaPipelineQRKSVSAsync
get_slice_tile(v_lds_window, get_slice_tile(v_lds_window,
sequence<((k1_loops - 1) % NumVLdsBuffers) * kN1, 0>{}, sequence<((k1_loops - 1) % NumVLdsBuffers) * kN1, 0>{},
sequence<(((k1_loops - 1) % NumVLdsBuffers) + 1) * kN1, kK1>{})); sequence<(((k1_loops - 1) % NumVLdsBuffers) + 1) * kN1, kK1>{}));
block_sync_lds();
} }
} while(++i_total_loops < num_total_loop); } while(++i_total_loops < num_total_loop);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment