"git@developer.sourcefind.cn:OpenDAS/ktransformers.git" did not exist on "3c6035aa8ad5c429299630f8c6f673896fbd7b16"
Unverified Commit a11680cc authored by Anthony Chang's avatar Anthony Chang Committed by GitHub
Browse files

fix standalone softmax race condition around blockwise reduction (#323)

parent 7f216620
...@@ -250,8 +250,10 @@ struct GridwiseSoftmax_mk_to_mk ...@@ -250,8 +250,10 @@ struct GridwiseSoftmax_mk_to_mk
reducedTiles++; reducedTiles++;
} while(reducedTiles < num_k_block_tile_iteration); } while(reducedTiles < num_k_block_tile_iteration);
static_for<0, MThreadSliceSize, 1>{}( static_for<0, MThreadSliceSize, 1>{}([&](auto I) {
[&](auto I) { BlockwiseMaxReduce::Reduce(reduce_work_buf, max_value_buf(I)); }); BlockwiseMaxReduce::Reduce(reduce_work_buf, max_value_buf(I));
block_sync_lds();
});
threadwise_src_load.MoveSrcSliceWindow(in_grid_desc_m_k, in_thread_copy_bwd_step); threadwise_src_load.MoveSrcSliceWindow(in_grid_desc_m_k, in_thread_copy_bwd_step);
...@@ -303,9 +305,10 @@ struct GridwiseSoftmax_mk_to_mk ...@@ -303,9 +305,10 @@ struct GridwiseSoftmax_mk_to_mk
reducedTiles++; reducedTiles++;
} while(reducedTiles < num_k_block_tile_iteration); } while(reducedTiles < num_k_block_tile_iteration);
block_sync_lds(); // wait for reading being complete before writing to LDS
static_for<0, MThreadSliceSize, 1>{}([&](auto I) { static_for<0, MThreadSliceSize, 1>{}([&](auto I) {
BlockwiseSumReduce::Reduce(reduce_work_buf, accu_value_buf(I)); BlockwiseSumReduce::Reduce(reduce_work_buf, accu_value_buf(I));
// block_sync_lds(); block_sync_lds();
}); });
threadwise_src_load.MoveSrcSliceWindow(in_grid_desc_m_k, in_thread_copy_fwd_step); threadwise_src_load.MoveSrcSliceWindow(in_grid_desc_m_k, in_thread_copy_fwd_step);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment