Commit e32db0e9 authored by carlushuang's avatar carlushuang
Browse files

remove scratch by manually reset coordinate

parent d51da77a
...@@ -668,7 +668,9 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk ...@@ -668,7 +668,9 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk
using Accumulation = ck::detail:: using Accumulation = ck::detail::
AccumulateWithNanCheck<false /*PropagateNan*/, reduce::Add, FloatAcc>; AccumulateWithNanCheck<false /*PropagateNan*/, reduce::Add, FloatAcc>;
static_for<0, MReduceIters, 1>{}([&](auto i_m_reduce) { // static_for<0, MReduceIters, 1>{}([&](auto i_m_reduce) {
for(int i_m = 0; i_m < MReduceIters; i_m++)
{
static_for<0, NReduceIters, 1>{}([&](auto i_n_reduce) { static_for<0, NReduceIters, 1>{}([&](auto i_n_reduce) {
acc_buf.Clear(); acc_buf.Clear();
for(auto i = tile_acc_offset_start; i < tile_acc_offset_end; i++) for(auto i = tile_acc_offset_start; i < tile_acc_offset_end; i++)
...@@ -721,14 +723,15 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk ...@@ -721,14 +723,15 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk
} }
} }
}); });
if constexpr(i_m_reduce != MReduceIters - 1) // if constexpr(i_m_reduce != MReduceIters - 1)
{ {
acc_load.MoveSrcSliceWindow(c_partial_acc_block_m_n, acc_load.MoveSrcSliceWindow(c_partial_acc_block_m_n,
partial_acc_load_step_m); partial_acc_load_step_m);
acc_store.MoveDstSliceWindow(c_grid_desc_mblock_mperblock_nblock_nperblock, acc_store.MoveDstSliceWindow(c_grid_desc_mblock_mperblock_nblock_nperblock,
partial_acc_store_step_m); partial_acc_store_step_m);
} }
}); }
//});
return; return;
} }
} }
...@@ -1004,8 +1007,8 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk ...@@ -1004,8 +1007,8 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk
Sequence<0, 1, 2, 3>, // typename DimAccessOrder, Sequence<0, 1, 2, 3>, // typename DimAccessOrder,
3, // index_t VectorDim, 3, // index_t VectorDim,
CBlockTransferScalarPerVector_NWaveNPerXDL, // index_t ScalarPerVector, CBlockTransferScalarPerVector_NWaveNPerXDL, // index_t ScalarPerVector,
true, // bool ThreadTransferSrcResetCoordinateAfterRun, false, // bool ThreadTransferSrcResetCoordinateAfterRun,
true> // bool ThreadTransferDstResetCoordinateAfterRun false> // bool ThreadTransferDstResetCoordinateAfterRun
{c_block_desc_mblock_mpershuffle_nblock_npershuffle, {c_block_desc_mblock_mpershuffle_nblock_npershuffle,
make_multi_index(0, 0, 0, 0), make_multi_index(0, 0, 0, 0),
c_block_desc_mshuffle_mpershuffle_nshuffle_npershuffle, c_block_desc_mshuffle_mpershuffle_nshuffle_npershuffle,
...@@ -1062,6 +1065,10 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk ...@@ -1062,6 +1065,10 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk
StreamKReductionStrategy::Reduction) StreamKReductionStrategy::Reduction)
{ {
// constexpr offset // constexpr offset
c_block_copy_lds_to_partial_acc.SetSrcSliceOrigin(
c_block_desc_mblock_mpershuffle_nblock_npershuffle,
make_tuple(0, 0, 0, 0));
c_block_copy_lds_to_partial_acc.SetDstSliceOrigin( c_block_copy_lds_to_partial_acc.SetDstSliceOrigin(
c_block_desc_mshuffle_mpershuffle_nshuffle_npershuffle, c_block_desc_mshuffle_mpershuffle_nshuffle_npershuffle,
make_tuple(mxdlperwave.value, 0, nxdlperwave.value, 0)); make_tuple(mxdlperwave.value, 0, nxdlperwave.value, 0));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment