Commit f1a49daf authored by danyao12's avatar danyao12
Browse files

add block_sync_lds in deterministic mode

parent 99ebfeba
......@@ -1789,6 +1789,11 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V1
auto y_dot_ygrad_thread_buf = make_static_buffer<AddressSpaceEnum::Vgpr, FloatGemmAcc>(
y_dot_ygrad_thread_desc_mblock_mrepeat_mwave_mperxdl.GetElementSpaceSize());
if constexpr(Deterministic)
{
block_sync_lds();
}
// load ygrad
gemm_tile_ygrad_blockwise_copy.Run(ygrad_grid_desc_o0_m_o1,
ygrad_grid_buf,
......
......@@ -1698,6 +1698,11 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V2
auto y_dot_ygrad_thread_buf = make_static_buffer<AddressSpaceEnum::Vgpr, FloatGemmAcc>(
y_dot_ygrad_thread_desc_mblock_mrepeat_mwave_mperxdl.GetElementSpaceSize());
if constexpr(Deterministic)
{
block_sync_lds();
}
//
// calculate Y dot dY
//
......
......@@ -913,6 +913,11 @@ struct GridwiseBatchedMultiheadAttentionForward_Xdl_CShuffle
0),
tensor_operation::element_wise::PassThrough{}};
if constexpr(Deterministic)
{
block_sync_lds();
}
do
{
auto n_block_data_idx_on_grid =
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment