"profiler/vscode:/vscode.git/clone" did not exist on "d305c07935076d774f6ed6caf319a223b8ab4e9d"
Commit 3abe105f authored by Chao Liu's avatar Chao Liu
Browse files

make DynamicTensorDescriptor constexpr

parent 2feca7e0
......@@ -139,7 +139,9 @@ struct DynamicGridwiseCol2Im_gemmkgemmn_nchw
InMemoryDataOperation::AtomicAdd,
1,
1>(
col_gemmk_gemmn_global_desc,
make_multi_index(0, gemmn_block_data_on_global),
img_gemmk_gemmn_global_desc,
make_multi_index(0, gemmn_block_data_on_global));
auto col_gemmk_gemmn_coord =
......
......@@ -83,8 +83,7 @@ struct DynamicTensorDescriptor
__host__ __device__ explicit constexpr DynamicTensorDescriptor(const Transforms& transforms,
index_t element_space_size)
: transforms_{transforms},
hidden_lengths_{InitializeHiddenLengths(transforms_, element_space_size)},
visible_lengths_{hidden_lengths_}
hidden_lengths_{InitializeHiddenLengths(transforms_, element_space_size)}
{
static_assert(Transforms::Size() == ntransform_ &&
LowerDimensionIdss::Size() == ntransform_ &&
......@@ -107,10 +106,14 @@ struct DynamicTensorDescriptor
template <index_t IDim>
__host__ __device__ constexpr index_t GetLength(Number<IDim>) const
{
return visible_lengths_[Number<IDim>{}];
return hidden_lengths_[VisibleDimensionIds::At(Number<IDim>{})];
}
__host__ __device__ constexpr const auto& GetLengths() const { return visible_lengths_; }
__host__ __device__ constexpr auto GetLengths() const
{
return unpack([&](auto... is) constexpr { return make_multi_index(GetLength(is)...); },
VisibleDimensionIds{});
}
// maybe this result should be saved as a member variable
__host__ __device__ constexpr index_t GetElementSize() const
......@@ -178,8 +181,6 @@ struct DynamicTensorDescriptor
// TODO maybe hidden_lengths_ should use reference_wrapper (reference to transforms_'s member
// variable lengths_) to save space on stack?
const HiddenIndex hidden_lengths_;
// visible_lenths_ contains a reference to hidden_lengths_
const ContainerElementPicker<const HiddenIndex, VisibleDimensionIds> visible_lengths_;
};
template <index_t NDimHidden, typename VisibleDimensionIds>
......@@ -303,10 +304,11 @@ transform_dynamic_tensor_descriptor(const OldTensorDescriptor& old_tensor_desc,
// new visible dimension's hidden ids
constexpr auto unordered_new_visible_dim_hidden_ids =
unpack([](auto... xs) { return merge_sequences(xs...); }, up_dim_hidden_idss);
unpack([](auto... xs) constexpr { return merge_sequences(xs...); }, up_dim_hidden_idss);
constexpr auto new_visible_dim_unordered2ordered = unpack(
[](auto... xs) { return merge_sequences(xs...); }, NewUpperDimensionNewVisibleIdss{});
constexpr auto new_visible_dim_unordered2ordered =
unpack([](auto... xs) constexpr { return merge_sequences(xs...); },
NewUpperDimensionNewVisibleIdss{});
constexpr auto new_visible_dim_hidden_ids =
unordered_new_visible_dim_hidden_ids.ReorderGivenOld2New(new_visible_dim_unordered2ordered);
......@@ -395,8 +397,8 @@ make_dynamic_tensor_coordinate_step(const TensorDesc&, const VisibleIndex& idx_d
// 1) Need to do this transform
// 2) all components of lower index diff will assume to be non-zero and need to be
// computed
const bool idx_diff_up_has_non_zero =
container_reduce(non_zero_diff_pick_up, [](auto a, auto b) { return a or b; }, false);
const bool idx_diff_up_has_non_zero = container_reduce(
non_zero_diff_pick_up, [](auto a, auto b) constexpr { return a or b; }, false);
do_transforms(itran) = idx_diff_up_has_non_zero;
......
......@@ -41,11 +41,11 @@ struct BlockwiseDynamicTensorSliceTransfer_v1
const BlockDstDesc& block_dst_desc,
const Index& dst_block_slice_origin)
{
static_assert(nDim == BlockSrcDesc::GetNumOfDimension() &&
nDim == BlockDstDesc::GetNumOfDimension() &&
static_assert(
nDim == remove_reference_t<remove_cv_t<BlockSrcDesc>>::GetNumOfDimension() &&
nDim == remove_reference_t<remove_cv_t<BlockDstDesc>>::GetNumOfDimension() &&
nDim == BlockSliceLengths::Size() && nDim == ThreadSliceLengths::Size() &&
nDim == ThreadClusterLengths::Size() &&
nDim == ThreadClusterArrangeOrder::Size() &&
nDim == ThreadClusterLengths::Size() && nDim == ThreadClusterArrangeOrder::Size() &&
nDim == SrcDimAccessOrder::Size() && nDim == DstDimAccessOrder::Size(),
"wrong! nDim not consistent");
......@@ -156,7 +156,8 @@ struct BlockwiseDynamicTensorSliceTransfer_v1
ThreadwiseRead threadwise_read_;
ThreadwiseWrite threadwise_write_;
static constexpr index_t thread_buffer_element_size_ = thread_buffer_desc_.GetElementSpace();
static constexpr index_t thread_buffer_element_size_ =
thread_buffer_desc_.GetElementSpaceSize();
BlockSrcData p_thread_buffer_[thread_buffer_element_size_];
};
......
......@@ -126,16 +126,20 @@ struct ThreadwiseDynamicTensorSliceTransfer_v1
using SrcCoordStep = decltype(make_dynamic_tensor_coordinate_step(SrcDesc{}, Index{}));
using DstCoordStep = decltype(make_dynamic_tensor_coordinate_step(DstDesc{}, Index{}));
__device__ constexpr ThreadwiseDynamicTensorSliceTransfer_v1() = default;
__device__ constexpr ThreadwiseDynamicTensorSliceTransfer_v1(const SrcDesc& src_desc,
const Index& src_slice_origin,
const DstDesc& dst_desc,
const Index& dst_slice_origin)
: src_desc_(src_desc),
src_slice_origin_(src_slice_origin),
src_slice_origin_(make_dynamic_tensor_coordinate(src_desc, src_slice_origin)),
dst_desc_(dst_desc),
dst_slice_origin_(dst_slice_origin)
dst_slice_origin_(make_dynamic_tensor_coordinate(dst_desc, dst_slice_origin))
{
}
__device__ constexpr ThreadwiseDynamicTensorSliceTransfer_v1()
: ThreadwiseDynamicTensorSliceTransfer_v1(
SrcDesc{}, make_zero_multi_index<nDim>(), DstDesc{}, make_zero_multi_index<nDim>())
{
}
......
......@@ -71,6 +71,47 @@ struct ContainerElementPicker
Arr& mArray;
};
// Arr: Array or StaticallyIndexedArray
// Picks: Sequence<...>
template <typename Arr, typename Picks>
struct ConstantContainerElementPicker
{
using type = ConstantContainerElementPicker;
#if 0
using data_type = typename Arr::data_type;
#endif
__host__ __device__ constexpr ConstantContainerElementPicker() = delete;
__host__ __device__ explicit constexpr ConstantContainerElementPicker(const Arr& array)
: mArray{array}
{
constexpr index_t imax = reduce_on_sequence(Picks{}, math::maxer<index_t>{}, Number<0>{});
static_assert(imax < Arr::Size(), "wrong! exceeding # array element");
}
__host__ __device__ static constexpr auto Size() { return Picks::Size(); }
template <index_t I>
__host__ __device__ constexpr const auto& At(Number<I> i) const
{
static_assert(I < Size(), "wrong!");
constexpr auto IP = Picks{}[i];
return mArray[IP];
}
template <index_t I>
__host__ __device__ constexpr const auto& operator[](Number<I> i) const
{
return At(i);
}
private:
const Arr& mArray;
};
template <typename Arr, typename Picks, typename X>
__host__ __device__ constexpr auto operator+=(ContainerElementPicker<Arr, Picks>& y, const X& x)
{
......@@ -103,5 +144,11 @@ __host__ __device__ constexpr auto pick_container_element(Arr& a, Picks)
return ContainerElementPicker<Arr, Picks>(a);
}
template <typename Arr, typename Picks>
__host__ __device__ constexpr auto pick_container_element(const Arr& a, Picks)
{
return ConstantContainerElementPicker<Arr, Picks>(a);
}
} // namespace ck
#endif
......@@ -545,7 +545,7 @@ int main(int argc, char* argv[])
LeftPads{},
RightPads{},
nrepeat);
#elif 0
#elif 1
device_dynamic_col2im_gemmkgemmn_nchw(col_eb_desc,
col_eb,
img_nchw_desc,
......@@ -583,8 +583,7 @@ int main(int argc, char* argv[])
DynamicTensorDescriptor<decltype(transforms),
decltype(low_dim_hidden_idss),
decltype(up_dim_hidden_idss),
decltype(
visible_dim_hidden_ids)>{}; //{transforms, element_space_size};
decltype(visible_dim_hidden_ids)>{transforms, element_space_size};
#endif
if(do_verification)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment