"...test_cli/git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "4c916d344d0878129deb8a701d0ba9ff6cf5a4a2"
Commit 3abe105f authored by Chao Liu's avatar Chao Liu
Browse files

make DynamicTensorDescriptor constexpr

parent 2feca7e0
...@@ -139,7 +139,9 @@ struct DynamicGridwiseCol2Im_gemmkgemmn_nchw ...@@ -139,7 +139,9 @@ struct DynamicGridwiseCol2Im_gemmkgemmn_nchw
InMemoryDataOperation::AtomicAdd, InMemoryDataOperation::AtomicAdd,
1, 1,
1>( 1>(
col_gemmk_gemmn_global_desc,
make_multi_index(0, gemmn_block_data_on_global), make_multi_index(0, gemmn_block_data_on_global),
img_gemmk_gemmn_global_desc,
make_multi_index(0, gemmn_block_data_on_global)); make_multi_index(0, gemmn_block_data_on_global));
auto col_gemmk_gemmn_coord = auto col_gemmk_gemmn_coord =
......
...@@ -83,8 +83,7 @@ struct DynamicTensorDescriptor ...@@ -83,8 +83,7 @@ struct DynamicTensorDescriptor
__host__ __device__ explicit constexpr DynamicTensorDescriptor(const Transforms& transforms, __host__ __device__ explicit constexpr DynamicTensorDescriptor(const Transforms& transforms,
index_t element_space_size) index_t element_space_size)
: transforms_{transforms}, : transforms_{transforms},
hidden_lengths_{InitializeHiddenLengths(transforms_, element_space_size)}, hidden_lengths_{InitializeHiddenLengths(transforms_, element_space_size)}
visible_lengths_{hidden_lengths_}
{ {
static_assert(Transforms::Size() == ntransform_ && static_assert(Transforms::Size() == ntransform_ &&
LowerDimensionIdss::Size() == ntransform_ && LowerDimensionIdss::Size() == ntransform_ &&
...@@ -107,10 +106,14 @@ struct DynamicTensorDescriptor ...@@ -107,10 +106,14 @@ struct DynamicTensorDescriptor
template <index_t IDim> template <index_t IDim>
__host__ __device__ constexpr index_t GetLength(Number<IDim>) const __host__ __device__ constexpr index_t GetLength(Number<IDim>) const
{ {
return visible_lengths_[Number<IDim>{}]; return hidden_lengths_[VisibleDimensionIds::At(Number<IDim>{})];
} }
__host__ __device__ constexpr const auto& GetLengths() const { return visible_lengths_; } __host__ __device__ constexpr auto GetLengths() const
{
return unpack([&](auto... is) constexpr { return make_multi_index(GetLength(is)...); },
VisibleDimensionIds{});
}
// maybe this result should be saved as a member variable // maybe this result should be saved as a member variable
__host__ __device__ constexpr index_t GetElementSize() const __host__ __device__ constexpr index_t GetElementSize() const
...@@ -178,8 +181,6 @@ struct DynamicTensorDescriptor ...@@ -178,8 +181,6 @@ struct DynamicTensorDescriptor
// TODO maybe hidden_lengths_ should use reference_wrapper (reference to transforms_'s member // TODO maybe hidden_lengths_ should use reference_wrapper (reference to transforms_'s member
// variable lengths_) to save space on stack? // variable lengths_) to save space on stack?
const HiddenIndex hidden_lengths_; const HiddenIndex hidden_lengths_;
// visible_lenths_ contains a reference to hidden_lengths_
const ContainerElementPicker<const HiddenIndex, VisibleDimensionIds> visible_lengths_;
}; };
template <index_t NDimHidden, typename VisibleDimensionIds> template <index_t NDimHidden, typename VisibleDimensionIds>
...@@ -303,10 +304,11 @@ transform_dynamic_tensor_descriptor(const OldTensorDescriptor& old_tensor_desc, ...@@ -303,10 +304,11 @@ transform_dynamic_tensor_descriptor(const OldTensorDescriptor& old_tensor_desc,
// new visible dimension's hidden ids // new visible dimension's hidden ids
constexpr auto unordered_new_visible_dim_hidden_ids = constexpr auto unordered_new_visible_dim_hidden_ids =
unpack([](auto... xs) { return merge_sequences(xs...); }, up_dim_hidden_idss); unpack([](auto... xs) constexpr { return merge_sequences(xs...); }, up_dim_hidden_idss);
constexpr auto new_visible_dim_unordered2ordered = unpack( constexpr auto new_visible_dim_unordered2ordered =
[](auto... xs) { return merge_sequences(xs...); }, NewUpperDimensionNewVisibleIdss{}); unpack([](auto... xs) constexpr { return merge_sequences(xs...); },
NewUpperDimensionNewVisibleIdss{});
constexpr auto new_visible_dim_hidden_ids = constexpr auto new_visible_dim_hidden_ids =
unordered_new_visible_dim_hidden_ids.ReorderGivenOld2New(new_visible_dim_unordered2ordered); unordered_new_visible_dim_hidden_ids.ReorderGivenOld2New(new_visible_dim_unordered2ordered);
...@@ -395,8 +397,8 @@ make_dynamic_tensor_coordinate_step(const TensorDesc&, const VisibleIndex& idx_d ...@@ -395,8 +397,8 @@ make_dynamic_tensor_coordinate_step(const TensorDesc&, const VisibleIndex& idx_d
// 1) Need to do this transform // 1) Need to do this transform
// 2) all components of lower index diff will assume to be non-zero and need to be // 2) all components of lower index diff will assume to be non-zero and need to be
// computed // computed
const bool idx_diff_up_has_non_zero = const bool idx_diff_up_has_non_zero = container_reduce(
container_reduce(non_zero_diff_pick_up, [](auto a, auto b) { return a or b; }, false); non_zero_diff_pick_up, [](auto a, auto b) constexpr { return a or b; }, false);
do_transforms(itran) = idx_diff_up_has_non_zero; do_transforms(itran) = idx_diff_up_has_non_zero;
......
...@@ -41,13 +41,13 @@ struct BlockwiseDynamicTensorSliceTransfer_v1 ...@@ -41,13 +41,13 @@ struct BlockwiseDynamicTensorSliceTransfer_v1
const BlockDstDesc& block_dst_desc, const BlockDstDesc& block_dst_desc,
const Index& dst_block_slice_origin) const Index& dst_block_slice_origin)
{ {
static_assert(nDim == BlockSrcDesc::GetNumOfDimension() && static_assert(
nDim == BlockDstDesc::GetNumOfDimension() && nDim == remove_reference_t<remove_cv_t<BlockSrcDesc>>::GetNumOfDimension() &&
nDim == BlockSliceLengths::Size() && nDim == ThreadSliceLengths::Size() && nDim == remove_reference_t<remove_cv_t<BlockDstDesc>>::GetNumOfDimension() &&
nDim == ThreadClusterLengths::Size() && nDim == BlockSliceLengths::Size() && nDim == ThreadSliceLengths::Size() &&
nDim == ThreadClusterArrangeOrder::Size() && nDim == ThreadClusterLengths::Size() && nDim == ThreadClusterArrangeOrder::Size() &&
nDim == SrcDimAccessOrder::Size() && nDim == DstDimAccessOrder::Size(), nDim == SrcDimAccessOrder::Size() && nDim == DstDimAccessOrder::Size(),
"wrong! nDim not consistent"); "wrong! nDim not consistent");
static_assert( static_assert(
is_same<BlockSliceLengths, decltype(ThreadSliceLengths{} * ThreadClusterLengths{})>{}, is_same<BlockSliceLengths, decltype(ThreadSliceLengths{} * ThreadClusterLengths{})>{},
...@@ -156,7 +156,8 @@ struct BlockwiseDynamicTensorSliceTransfer_v1 ...@@ -156,7 +156,8 @@ struct BlockwiseDynamicTensorSliceTransfer_v1
ThreadwiseRead threadwise_read_; ThreadwiseRead threadwise_read_;
ThreadwiseWrite threadwise_write_; ThreadwiseWrite threadwise_write_;
static constexpr index_t thread_buffer_element_size_ = thread_buffer_desc_.GetElementSpace(); static constexpr index_t thread_buffer_element_size_ =
thread_buffer_desc_.GetElementSpaceSize();
BlockSrcData p_thread_buffer_[thread_buffer_element_size_]; BlockSrcData p_thread_buffer_[thread_buffer_element_size_];
}; };
......
...@@ -126,16 +126,20 @@ struct ThreadwiseDynamicTensorSliceTransfer_v1 ...@@ -126,16 +126,20 @@ struct ThreadwiseDynamicTensorSliceTransfer_v1
using SrcCoordStep = decltype(make_dynamic_tensor_coordinate_step(SrcDesc{}, Index{})); using SrcCoordStep = decltype(make_dynamic_tensor_coordinate_step(SrcDesc{}, Index{}));
using DstCoordStep = decltype(make_dynamic_tensor_coordinate_step(DstDesc{}, Index{})); using DstCoordStep = decltype(make_dynamic_tensor_coordinate_step(DstDesc{}, Index{}));
__device__ constexpr ThreadwiseDynamicTensorSliceTransfer_v1() = default;
__device__ constexpr ThreadwiseDynamicTensorSliceTransfer_v1(const SrcDesc& src_desc, __device__ constexpr ThreadwiseDynamicTensorSliceTransfer_v1(const SrcDesc& src_desc,
const Index& src_slice_origin, const Index& src_slice_origin,
const DstDesc& dst_desc, const DstDesc& dst_desc,
const Index& dst_slice_origin) const Index& dst_slice_origin)
: src_desc_(src_desc), : src_desc_(src_desc),
src_slice_origin_(src_slice_origin), src_slice_origin_(make_dynamic_tensor_coordinate(src_desc, src_slice_origin)),
dst_desc_(dst_desc), dst_desc_(dst_desc),
dst_slice_origin_(dst_slice_origin) dst_slice_origin_(make_dynamic_tensor_coordinate(dst_desc, dst_slice_origin))
{
}
__device__ constexpr ThreadwiseDynamicTensorSliceTransfer_v1()
: ThreadwiseDynamicTensorSliceTransfer_v1(
SrcDesc{}, make_zero_multi_index<nDim>(), DstDesc{}, make_zero_multi_index<nDim>())
{ {
} }
......
...@@ -71,6 +71,47 @@ struct ContainerElementPicker ...@@ -71,6 +71,47 @@ struct ContainerElementPicker
Arr& mArray; Arr& mArray;
}; };
// Arr: Array or StaticallyIndexedArray
// Picks: Sequence<...>
template <typename Arr, typename Picks>
struct ConstantContainerElementPicker
{
using type = ConstantContainerElementPicker;
#if 0
using data_type = typename Arr::data_type;
#endif
__host__ __device__ constexpr ConstantContainerElementPicker() = delete;
__host__ __device__ explicit constexpr ConstantContainerElementPicker(const Arr& array)
: mArray{array}
{
constexpr index_t imax = reduce_on_sequence(Picks{}, math::maxer<index_t>{}, Number<0>{});
static_assert(imax < Arr::Size(), "wrong! exceeding # array element");
}
__host__ __device__ static constexpr auto Size() { return Picks::Size(); }
template <index_t I>
__host__ __device__ constexpr const auto& At(Number<I> i) const
{
static_assert(I < Size(), "wrong!");
constexpr auto IP = Picks{}[i];
return mArray[IP];
}
template <index_t I>
__host__ __device__ constexpr const auto& operator[](Number<I> i) const
{
return At(i);
}
private:
const Arr& mArray;
};
template <typename Arr, typename Picks, typename X> template <typename Arr, typename Picks, typename X>
__host__ __device__ constexpr auto operator+=(ContainerElementPicker<Arr, Picks>& y, const X& x) __host__ __device__ constexpr auto operator+=(ContainerElementPicker<Arr, Picks>& y, const X& x)
{ {
...@@ -103,5 +144,11 @@ __host__ __device__ constexpr auto pick_container_element(Arr& a, Picks) ...@@ -103,5 +144,11 @@ __host__ __device__ constexpr auto pick_container_element(Arr& a, Picks)
return ContainerElementPicker<Arr, Picks>(a); return ContainerElementPicker<Arr, Picks>(a);
} }
template <typename Arr, typename Picks>
__host__ __device__ constexpr auto pick_container_element(const Arr& a, Picks)
{
return ConstantContainerElementPicker<Arr, Picks>(a);
}
} // namespace ck } // namespace ck
#endif #endif
...@@ -545,7 +545,7 @@ int main(int argc, char* argv[]) ...@@ -545,7 +545,7 @@ int main(int argc, char* argv[])
LeftPads{}, LeftPads{},
RightPads{}, RightPads{},
nrepeat); nrepeat);
#elif 0 #elif 1
device_dynamic_col2im_gemmkgemmn_nchw(col_eb_desc, device_dynamic_col2im_gemmkgemmn_nchw(col_eb_desc,
col_eb, col_eb,
img_nchw_desc, img_nchw_desc,
...@@ -583,8 +583,7 @@ int main(int argc, char* argv[]) ...@@ -583,8 +583,7 @@ int main(int argc, char* argv[])
DynamicTensorDescriptor<decltype(transforms), DynamicTensorDescriptor<decltype(transforms),
decltype(low_dim_hidden_idss), decltype(low_dim_hidden_idss),
decltype(up_dim_hidden_idss), decltype(up_dim_hidden_idss),
decltype( decltype(visible_dim_hidden_ids)>{transforms, element_space_size};
visible_dim_hidden_ids)>{}; //{transforms, element_space_size};
#endif #endif
if(do_verification) if(do_verification)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment