Commit e6dce0db authored by Qianfeng Zhang's avatar Qianfeng Zhang
Browse files

Add block_sync_lds() between blockwise reduction again

parent 3ac68bcc
...@@ -478,6 +478,8 @@ struct GridwiseBatchNormBackwardWithBlockwiseWelford ...@@ -478,6 +478,8 @@ struct GridwiseBatchNormBackwardWithBlockwiseWelford
}; };
static_for<0, MThreadSliceSize, 1>{}([&](auto I) { static_for<0, MThreadSliceSize, 1>{}([&](auto I) {
if constexpr(I > 0)
block_sync_lds();
BlockwiseReduce::Reduce(reduce_work_buf, dscale_thread_buf(I)); BlockwiseReduce::Reduce(reduce_work_buf, dscale_thread_buf(I));
block_sync_lds(); block_sync_lds();
BlockwiseReduce::Reduce(reduce_work_buf, dbias_thread_buf(I)); BlockwiseReduce::Reduce(reduce_work_buf, dbias_thread_buf(I));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment