"vscode:/vscode.git/clone" did not exist on "bfa06cf28d6d60e25d3d9c0cd27735ff53dba69f"
Commit 7a190876 authored by Qianfeng Zhang's avatar Qianfeng Zhang
Browse files

Add block_sync_lds between two consecutive blockwise reductions

parent bb220a7a
...@@ -330,6 +330,9 @@ struct GridwiseReduceSecondHalfBatchNormBackwardFinal ...@@ -330,6 +330,9 @@ struct GridwiseReduceSecondHalfBatchNormBackwardFinal
} }
static_for<0, MThreadSliceSize, 1>{}([&](auto I) { static_for<0, MThreadSliceSize, 1>{}([&](auto I) {
if constexpr(I > 0)
block_sync_lds();
BlockwiseReduce::Reduce(reduce_work_buf, dscale_thread_buf(I)); BlockwiseReduce::Reduce(reduce_work_buf, dscale_thread_buf(I));
block_sync_lds(); block_sync_lds();
BlockwiseReduce::Reduce(reduce_work_buf, dbias_thread_buf(I)); BlockwiseReduce::Reduce(reduce_work_buf, dbias_thread_buf(I));
......
...@@ -495,6 +495,9 @@ struct GridwiseWelfordSecondHalfReduceFirstHalf ...@@ -495,6 +495,9 @@ struct GridwiseWelfordSecondHalfReduceFirstHalf
}; };
static_for<0, MThreadSliceSize, 1>{}([&](auto I) { static_for<0, MThreadSliceSize, 1>{}([&](auto I) {
if constexpr(I > 0)
block_sync_lds();
BlockwiseReduce::Reduce(reduce_work_buf, reduce_dscale_thread_buf(I)); BlockwiseReduce::Reduce(reduce_work_buf, reduce_dscale_thread_buf(I));
block_sync_lds(); block_sync_lds();
BlockwiseReduce::Reduce(reduce_work_buf, reduce_dbias_thread_buf(I)); BlockwiseReduce::Reduce(reduce_work_buf, reduce_dbias_thread_buf(I));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment