"docs/source/en/vscode:/vscode.git/clone" did not exist on "dbcb15c25fee7122c5184eb0e0fcef8e29495227"
Commit 7a190876 authored by Qianfeng Zhang's avatar Qianfeng Zhang
Browse files

Add block_sync_lds between two consecutive blockwise reductions

parent bb220a7a
......@@ -330,6 +330,9 @@ struct GridwiseReduceSecondHalfBatchNormBackwardFinal
}
static_for<0, MThreadSliceSize, 1>{}([&](auto I) {
if constexpr(I > 0)
block_sync_lds();
BlockwiseReduce::Reduce(reduce_work_buf, dscale_thread_buf(I));
block_sync_lds();
BlockwiseReduce::Reduce(reduce_work_buf, dbias_thread_buf(I));
......
......@@ -495,6 +495,9 @@ struct GridwiseWelfordSecondHalfReduceFirstHalf
};
static_for<0, MThreadSliceSize, 1>{}([&](auto I) {
if constexpr(I > 0)
block_sync_lds();
BlockwiseReduce::Reduce(reduce_work_buf, reduce_dscale_thread_buf(I));
block_sync_lds();
BlockwiseReduce::Reduce(reduce_work_buf, reduce_dbias_thread_buf(I));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment