Commit d55852bc authored by Qianfeng Zhang's avatar Qianfeng Zhang
Browse files

Tune the lines of codes to make them more tidy

parent ee324424
...@@ -333,9 +333,10 @@ struct BlockFmhaPipelineQRKSVSAsync ...@@ -333,9 +333,10 @@ struct BlockFmhaPipelineQRKSVSAsync
{ {
if(num_total_loop > 1) // there are multiple iterations if(num_total_loop > 1) // there are multiple iterations
{ {
store_tile(k_lds_windows[I0], k_tiles[I0]);
static_for<0, k0_loops - 1, 1>{}([&](auto i_k0) { static_for<0, k0_loops - 1, 1>{}([&](auto i_k0) {
store_tile(k_lds_windows[number<i_k0 % NumKLdsBuffers>{}],
k_tiles[number<i_k0>{}]);
k_tiles[number<i_k0 + 1>{}] = load_tile(k_dram_window); k_tiles[number<i_k0 + 1>{}] = load_tile(k_dram_window);
move_tile_window(k_dram_window, {0, kK0}); move_tile_window(k_dram_window, {0, kK0});
...@@ -347,11 +348,11 @@ struct BlockFmhaPipelineQRKSVSAsync ...@@ -347,11 +348,11 @@ struct BlockFmhaPipelineQRKSVSAsync
gemm_0(s_acc, gemm_0(s_acc,
q_tiles[number<i_k0>{}], q_tiles[number<i_k0>{}],
k_lds_windows[number<i_k0 % NumKLdsBuffers>{}]); k_lds_windows[number<i_k0 % NumKLdsBuffers>{}]);
store_tile(k_lds_windows[number<(i_k0 + 1) % NumKLdsBuffers>{}],
k_tiles[number<i_k0 + 1>{}]);
}); });
store_tile(k_lds_windows[number<(k0_loops - 1) % NumKLdsBuffers>{}],
k_tiles[number<k0_loops - 1>{}]);
move_tile_window(k_dram_window, {kN0, -k0_loops * kK0}); move_tile_window(k_dram_window, {kN0, -k0_loops * kK0});
static_for<0, k0_loops, 1>{}([&](auto i_k0) { static_for<0, k0_loops, 1>{}([&](auto i_k0) {
...@@ -371,9 +372,10 @@ struct BlockFmhaPipelineQRKSVSAsync ...@@ -371,9 +372,10 @@ struct BlockFmhaPipelineQRKSVSAsync
} }
else // there is only single iteration else // there is only single iteration
{ {
store_tile(k_lds_windows[I0], k_tiles[I0]);
static_for<0, k0_loops, 1>{}([&](auto i_k0) { static_for<0, k0_loops, 1>{}([&](auto i_k0) {
store_tile(k_lds_windows[number<i_k0 % NumKLdsBuffers>{}],
k_tiles[number<i_k0>{}]);
if constexpr(i_k0 < k0_loops - 1) if constexpr(i_k0 < k0_loops - 1)
{ {
k_tiles[number<i_k0 + 1>{}] = load_tile(k_dram_window); k_tiles[number<i_k0 + 1>{}] = load_tile(k_dram_window);
...@@ -388,12 +390,6 @@ struct BlockFmhaPipelineQRKSVSAsync ...@@ -388,12 +390,6 @@ struct BlockFmhaPipelineQRKSVSAsync
gemm_0(s_acc, gemm_0(s_acc,
q_tiles[number<i_k0>{}], q_tiles[number<i_k0>{}],
k_lds_windows[number<i_k0 % NumKLdsBuffers>{}]); k_lds_windows[number<i_k0 % NumKLdsBuffers>{}]);
if constexpr(i_k0 < k0_loops - 1)
{
store_tile(k_lds_windows[number<(i_k0 + 1) % NumKLdsBuffers>{}],
k_tiles[number<i_k0 + 1>{}]);
};
}); });
// move_tile_window(k_dram_window, {0, -k0_loops * kK0}); // move_tile_window(k_dram_window, {0, -k0_loops * kK0});
...@@ -443,9 +439,10 @@ struct BlockFmhaPipelineQRKSVSAsync ...@@ -443,9 +439,10 @@ struct BlockFmhaPipelineQRKSVSAsync
} }
else else
{ {
store_tile(k_lds_windows[I0], k_tiles[I0]);
static_for<0, k0_loops, 1>{}([&](auto i_k0) { static_for<0, k0_loops, 1>{}([&](auto i_k0) {
store_tile(k_lds_windows[number<i_k0 % NumKLdsBuffers>{}],
k_tiles[number<i_k0 % 2>{}]);
if constexpr(i_k0 < k0_loops - 1) if constexpr(i_k0 < k0_loops - 1)
{ {
k_tiles[number<(i_k0 + 1) % 2>{}] = load_tile(k_dram_window); k_tiles[number<(i_k0 + 1) % 2>{}] = load_tile(k_dram_window);
...@@ -460,12 +457,6 @@ struct BlockFmhaPipelineQRKSVSAsync ...@@ -460,12 +457,6 @@ struct BlockFmhaPipelineQRKSVSAsync
gemm_0(s_acc, gemm_0(s_acc,
q_tiles[number<i_k0>{}], q_tiles[number<i_k0>{}],
k_lds_windows[number<i_k0 % NumKLdsBuffers>{}]); k_lds_windows[number<i_k0 % NumKLdsBuffers>{}]);
if constexpr(i_k0 < k0_loops - 1)
{
store_tile(k_lds_windows[number<(i_k0 + 1) % NumKLdsBuffers>{}],
k_tiles[number<(i_k0 + 1) % 2>{}]);
};
}); });
if(i_total_loops < num_total_loop - 1) if(i_total_loops < num_total_loop - 1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment