Commit 08bf57b0 authored by Chao Liu's avatar Chao Liu
Browse files

bug fix: BlockwiseGenericTensorSliceCopy_v2::MoveDstSlicingWindow

parent 86ceded9
...@@ -125,8 +125,8 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded ...@@ -125,8 +125,8 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded
constexpr auto out_k_h_w_n_thread_desc = make_ConstantTensorDescriptor_packed( constexpr auto out_k_h_w_n_thread_desc = make_ConstantTensorDescriptor_packed(
Sequence<KPerThread, HoPerThread, WoPerThread, NPerThread>{}); Sequence<KPerThread, HoPerThread, WoPerThread, NPerThread>{});
// blockwise copy // blockwise input copy
// input: format is [C, Hi, Wi, N] // format is [C, Hi, Wi, N]
auto blockwise_in_copy = auto blockwise_in_copy =
BlockwiseGenericTensorSliceCopy_v2<BlockSize, BlockwiseGenericTensorSliceCopy_v2<BlockSize,
decltype(in_c_h_w_n_global_desc), decltype(in_c_h_w_n_global_desc),
......
...@@ -417,6 +417,8 @@ struct BlockwiseGenericTensorSliceCopy_v1 ...@@ -417,6 +417,8 @@ struct BlockwiseGenericTensorSliceCopy_v1
} }
}; };
// Slice a (normal or merged) tensor, and copy it into another (normal or merged) tensor
// memory layout (ordering of dimensions) can be different between src and dst.
template <index_t BlockSize, template <index_t BlockSize,
class SrcDesc, class SrcDesc,
class DstDesc, class DstDesc,
...@@ -510,7 +512,7 @@ struct BlockwiseGenericTensorSliceCopy_v2 ...@@ -510,7 +512,7 @@ struct BlockwiseGenericTensorSliceCopy_v2
MoveDstSlicingWindow(T step_sizes, MoveDstSlicingWindow(T step_sizes,
integral_constant<bool, PositiveDirection> positive_direction) integral_constant<bool, PositiveDirection> positive_direction)
{ {
mThreadwiseLoad.MoveDstSlicingWindow(step_sizes, positive_direction); mThreadwiseStore.MoveDstSlicingWindow(step_sizes, positive_direction);
} }
private: private:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment