You need to sign in or sign up before continuing.
Unverified Commit a11680cc authored by Anthony Chang's avatar Anthony Chang Committed by GitHub
Browse files

fix standalone softmax race condition around blockwise reduction (#323)

parent 7f216620
......@@ -250,8 +250,10 @@ struct GridwiseSoftmax_mk_to_mk
reducedTiles++;
} while(reducedTiles < num_k_block_tile_iteration);
static_for<0, MThreadSliceSize, 1>{}(
[&](auto I) { BlockwiseMaxReduce::Reduce(reduce_work_buf, max_value_buf(I)); });
static_for<0, MThreadSliceSize, 1>{}([&](auto I) {
BlockwiseMaxReduce::Reduce(reduce_work_buf, max_value_buf(I));
block_sync_lds();
});
threadwise_src_load.MoveSrcSliceWindow(in_grid_desc_m_k, in_thread_copy_bwd_step);
......@@ -303,9 +305,10 @@ struct GridwiseSoftmax_mk_to_mk
reducedTiles++;
} while(reducedTiles < num_k_block_tile_iteration);
block_sync_lds(); // wait for reading being complete before writing to LDS
static_for<0, MThreadSliceSize, 1>{}([&](auto I) {
BlockwiseSumReduce::Reduce(reduce_work_buf, accu_value_buf(I));
// block_sync_lds();
block_sync_lds();
});
threadwise_src_load.MoveSrcSliceWindow(in_grid_desc_m_k, in_thread_copy_fwd_step);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment