Commit e472af36 authored by Qianfeng Zhang's avatar Qianfeng Zhang
Browse files

Tune the location of a scheduler barrier code line

parent d362410d
...@@ -664,9 +664,9 @@ struct BlockFmhaPipelineQRKSVSAsync ...@@ -664,9 +664,9 @@ struct BlockFmhaPipelineQRKSVSAsync
k_tiles[I0] = load_tile(k_dram_window); k_tiles[I0] = load_tile(k_dram_window);
move_tile_window(k_dram_window, {0, kK0}); move_tile_window(k_dram_window, {0, kK0});
}; };
}
__builtin_amdgcn_sched_barrier(0); __builtin_amdgcn_sched_barrier(0);
}
// STAGE 3, KV gemm // STAGE 3, KV gemm
if constexpr(k1_loops > 1) if constexpr(k1_loops > 1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment