"docs/source/en/vscode:/vscode.git/clone" did not exist on "b0966f5801f4ffb9f008c915a3db64032dcd1edd"
Commit 982c85a3 authored by rocking's avatar rocking
Browse files

Do not use snakeCurved, it makes determination of padding for welford difficult

parent 8749678a
...@@ -698,7 +698,8 @@ struct GridwiseGemmMultipleDWelfordFirstHalf_xdl_cshuffle ...@@ -698,7 +698,8 @@ struct GridwiseGemmMultipleDWelfordFirstHalf_xdl_cshuffle
M2, M2,
1, 1,
M4, M4,
1>>{}; 1>,
false>{};
// space filling curve for shuffled blockwise C in global mem // space filling curve for shuffled blockwise C in global mem
constexpr auto sfc_der_global = constexpr auto sfc_der_global =
...@@ -707,7 +708,8 @@ struct GridwiseGemmMultipleDWelfordFirstHalf_xdl_cshuffle ...@@ -707,7 +708,8 @@ struct GridwiseGemmMultipleDWelfordFirstHalf_xdl_cshuffle
Sequence<1, Sequence<1,
CShuffleMXdlPerWavePerShuffle * MWave * MPerXdl, CShuffleMXdlPerWavePerShuffle * MWave * MPerXdl,
1, 1,
CShuffleNXdlPerWavePerShuffle * NWave * NPerXdl>>{}; CShuffleNXdlPerWavePerShuffle * NWave * NPerXdl>,
false>{};
// LDS c_reduce_block_desc_mperblock_nperblock // LDS c_reduce_block_desc_mperblock_nperblock
constexpr auto c_reduce_block_desc_mperblock_nperblock = transform_tensor_descriptor( constexpr auto c_reduce_block_desc_mperblock_nperblock = transform_tensor_descriptor(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment