"git@developer.sourcefind.cn:orangecat/ollama.git" did not exist on "27bcce6d9fb1e002ef4393ff48d6cadb5b29da41"
Commit 08bf57b0 authored by Chao Liu's avatar Chao Liu
Browse files

bug fix: BlockwiseGenericTensorSliceCopy_v2::MoveDstSlicingWindow

parent 86ceded9
......@@ -125,8 +125,8 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded
constexpr auto out_k_h_w_n_thread_desc = make_ConstantTensorDescriptor_packed(
Sequence<KPerThread, HoPerThread, WoPerThread, NPerThread>{});
// blockwise copy
// input: format is [C, Hi, Wi, N]
// blockwise input copy
// format is [C, Hi, Wi, N]
auto blockwise_in_copy =
BlockwiseGenericTensorSliceCopy_v2<BlockSize,
decltype(in_c_h_w_n_global_desc),
......
......@@ -417,6 +417,8 @@ struct BlockwiseGenericTensorSliceCopy_v1
}
};
// Slice a (normal or merged) tensor, and copy it into another (normal or merged) tensor
// memory layout (ordering of dimensions) can be different between src and dst.
template <index_t BlockSize,
class SrcDesc,
class DstDesc,
......@@ -510,7 +512,7 @@ struct BlockwiseGenericTensorSliceCopy_v2
MoveDstSlicingWindow(T step_sizes,
integral_constant<bool, PositiveDirection> positive_direction)
{
mThreadwiseLoad.MoveDstSlicingWindow(step_sizes, positive_direction);
mThreadwiseStore.MoveDstSlicingWindow(step_sizes, positive_direction);
}
private:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment