Commit ee324424 authored by Qianfeng Zhang's avatar Qianfeng Zhang
Browse files

Move the code line of clear_tile(s_acc)

parent 1a6a3715
...@@ -335,12 +335,13 @@ struct BlockFmhaPipelineQRKSVSAsync ...@@ -335,12 +335,13 @@ struct BlockFmhaPipelineQRKSVSAsync
{ {
store_tile(k_lds_windows[I0], k_tiles[I0]); store_tile(k_lds_windows[I0], k_tiles[I0]);
clear_tile(s_acc); // initialize C
static_for<0, k0_loops - 1, 1>{}([&](auto i_k0) { static_for<0, k0_loops - 1, 1>{}([&](auto i_k0) {
k_tiles[number<i_k0 + 1>{}] = load_tile(k_dram_window); k_tiles[number<i_k0 + 1>{}] = load_tile(k_dram_window);
move_tile_window(k_dram_window, {0, kK0}); move_tile_window(k_dram_window, {0, kK0});
if constexpr(i_k0 == 0)
clear_tile(s_acc);
block_sync_lds(); block_sync_lds();
// execute current unroll of gemm_0 // execute current unroll of gemm_0
gemm_0(s_acc, gemm_0(s_acc,
...@@ -372,8 +373,6 @@ struct BlockFmhaPipelineQRKSVSAsync ...@@ -372,8 +373,6 @@ struct BlockFmhaPipelineQRKSVSAsync
{ {
store_tile(k_lds_windows[I0], k_tiles[I0]); store_tile(k_lds_windows[I0], k_tiles[I0]);
clear_tile(s_acc); // initialize C
static_for<0, k0_loops, 1>{}([&](auto i_k0) { static_for<0, k0_loops, 1>{}([&](auto i_k0) {
if constexpr(i_k0 < k0_loops - 1) if constexpr(i_k0 < k0_loops - 1)
{ {
...@@ -381,6 +380,9 @@ struct BlockFmhaPipelineQRKSVSAsync ...@@ -381,6 +380,9 @@ struct BlockFmhaPipelineQRKSVSAsync
move_tile_window(k_dram_window, {0, kK0}); move_tile_window(k_dram_window, {0, kK0});
}; };
if constexpr(i_k0 == 0)
clear_tile(s_acc);
block_sync_lds(); block_sync_lds();
// execute current unroll of gemm_0 // execute current unroll of gemm_0
gemm_0(s_acc, gemm_0(s_acc,
...@@ -443,8 +445,6 @@ struct BlockFmhaPipelineQRKSVSAsync ...@@ -443,8 +445,6 @@ struct BlockFmhaPipelineQRKSVSAsync
{ {
store_tile(k_lds_windows[I0], k_tiles[I0]); store_tile(k_lds_windows[I0], k_tiles[I0]);
clear_tile(s_acc); // initialize C
static_for<0, k0_loops, 1>{}([&](auto i_k0) { static_for<0, k0_loops, 1>{}([&](auto i_k0) {
if constexpr(i_k0 < k0_loops - 1) if constexpr(i_k0 < k0_loops - 1)
{ {
...@@ -452,6 +452,9 @@ struct BlockFmhaPipelineQRKSVSAsync ...@@ -452,6 +452,9 @@ struct BlockFmhaPipelineQRKSVSAsync
move_tile_window(k_dram_window, {0, kK0}); move_tile_window(k_dram_window, {0, kK0});
}; };
if constexpr(i_k0 == 0)
clear_tile(s_acc);
block_sync_lds(); block_sync_lds();
// execute current unroll of gemm_0 // execute current unroll of gemm_0
gemm_0(s_acc, gemm_0(s_acc,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment