Commit 0c24e527 authored by Chao Liu's avatar Chao Liu
Browse files

remove use of reference from dynamic tensor descriptor and coordinate

parent 2cb05d6d
...@@ -111,11 +111,9 @@ struct DynamicTensorDescriptor ...@@ -111,11 +111,9 @@ struct DynamicTensorDescriptor
__host__ __device__ constexpr auto GetLengths() const __host__ __device__ constexpr auto GetLengths() const
{ {
return unpack([&](auto... is) constexpr { return make_multi_index(GetLength(is)...); }, return get_container_subset(hidden_lengths_, VisibleDimensionIds{});
VisibleDimensionIds{});
} }
// maybe this result should be saved as a member variable
__host__ __device__ constexpr index_t GetElementSize() const __host__ __device__ constexpr index_t GetElementSize() const
{ {
return container_reduce(GetLengths(), math::multiplies<index_t>{}, index_t{1}); return container_reduce(GetLengths(), math::multiplies<index_t>{}, index_t{1});
...@@ -191,11 +189,11 @@ struct DynamicTensorCoordinate ...@@ -191,11 +189,11 @@ struct DynamicTensorCoordinate
public: public:
__host__ __device__ explicit constexpr DynamicTensorCoordinate(const HiddenIndex& idx_hidden) __host__ __device__ explicit constexpr DynamicTensorCoordinate(const HiddenIndex& idx_hidden)
: idx_hidden_{idx_hidden}, idx_visible_{idx_hidden_} : idx_hidden_{idx_hidden}
{ {
} }
__host__ __device__ constexpr const auto& GetIndex() const { return GetVisibleIndex(); } __host__ __device__ constexpr auto GetIndex() const { return GetVisibleIndex(); }
__host__ __device__ constexpr index_t GetOffset() const { return idx_hidden_[Number<0>{}]; } __host__ __device__ constexpr index_t GetOffset() const { return idx_hidden_[Number<0>{}]; }
...@@ -204,14 +202,13 @@ struct DynamicTensorCoordinate ...@@ -204,14 +202,13 @@ struct DynamicTensorCoordinate
__host__ __device__ auto& GetHiddenIndex() { return idx_hidden_; } __host__ __device__ auto& GetHiddenIndex() { return idx_hidden_; }
__host__ __device__ constexpr const auto& GetVisibleIndex() const { return idx_visible_; } __host__ __device__ constexpr auto GetVisibleIndex() const
{
__host__ __device__ auto& GetVisibleIndex() { return idx_visible_; } return get_container_subset(idx_hidden_, VisibleDimensionIds{});
}
// TODO make these private // TODO make these private
HiddenIndex idx_hidden_; HiddenIndex idx_hidden_;
// idx_visible_ contains a reference to idx_hidden_
ContainerElementPicker<HiddenIndex, VisibleDimensionIds> idx_visible_;
}; };
template <index_t NTransform, index_t NDimVisible> template <index_t NTransform, index_t NDimVisible>
...@@ -516,12 +513,12 @@ __host__ __device__ constexpr bool coordinate_has_valid_offset(const TensorDesc& ...@@ -516,12 +513,12 @@ __host__ __device__ constexpr bool coordinate_has_valid_offset(const TensorDesc&
} }
template <typename TensorDesc> template <typename TensorDesc>
using DynamicTensorCoordinate_t = decltype( using DynamicTensorCoordinate_t = decltype(make_dynamic_tensor_coordinate(
make_dynamic_tensor_coordinate(TensorDesc{}, MultiIndex<TensorDesc::GetNumOfDimension()>{})); TensorDesc{}, MultiIndex<remove_cv_t<remove_reference_t<TensorDesc>>::GetNumOfDimension()>{}));
template <typename TensorDesc> template <typename TensorDesc>
using DynamicTensorCoordinateStep_t = decltype(make_dynamic_tensor_coordinate_step( using DynamicTensorCoordinateStep_t = decltype(make_dynamic_tensor_coordinate_step(
TensorDesc{}, MultiIndex<TensorDesc::GetNumOfDimension()>{})); TensorDesc{}, MultiIndex<remove_cv_t<remove_reference_t<TensorDesc>>::GetNumOfDimension()>{}));
} // namespace ck } // namespace ck
#endif #endif
...@@ -72,7 +72,7 @@ struct BlockwiseDynamicTensorSliceTransfer_v1 ...@@ -72,7 +72,7 @@ struct BlockwiseDynamicTensorSliceTransfer_v1
} }
} }
__device__ void RunLoad(const BlockSrcData* p_block_src) __device__ void RunRead(const BlockSrcData* p_block_src)
{ {
if(BlockSize == thread_cluster_desc_.GetElementSize() or if(BlockSize == thread_cluster_desc_.GetElementSize() or
get_thread_local_1d_id() < thread_cluster_desc_.GetElementSize()) get_thread_local_1d_id() < thread_cluster_desc_.GetElementSize())
...@@ -81,7 +81,7 @@ struct BlockwiseDynamicTensorSliceTransfer_v1 ...@@ -81,7 +81,7 @@ struct BlockwiseDynamicTensorSliceTransfer_v1
} }
} }
__device__ void RunStore(BlockDstData* p_block_dst) __device__ void RunWrite(BlockDstData* p_block_dst)
{ {
if(BlockSize == thread_cluster_desc_.GetElementSize() or if(BlockSize == thread_cluster_desc_.GetElementSize() or
get_thread_local_1d_id() < thread_cluster_desc_.GetElementSize()) get_thread_local_1d_id() < thread_cluster_desc_.GetElementSize())
...@@ -90,15 +90,15 @@ struct BlockwiseDynamicTensorSliceTransfer_v1 ...@@ -90,15 +90,15 @@ struct BlockwiseDynamicTensorSliceTransfer_v1
} }
} }
__device__ void Run(const BlockSrcData* p_block_src, BlockDstData* p_block_dst) const __device__ void Run(const BlockSrcData* p_block_src, BlockDstData* p_block_dst)
{ {
if(BlockSize == thread_cluster_desc_.GetElementSize() or if(BlockSize == thread_cluster_desc_.GetElementSize() or
get_thread_local_1d_id() < thread_cluster_desc_.GetElementSize()) get_thread_local_1d_id() < thread_cluster_desc_.GetElementSize())
{ {
RunLoad(p_block_src, p_thread_buffer_); threadwise_read_.Run(p_block_src, p_thread_buffer_);
// if there is type conversion, it's done during store // if there is type conversion, it's done during write
RunStore(p_thread_buffer_, p_block_dst); threadwise_write_.Run(p_thread_buffer_, p_block_dst);
} }
} }
......
...@@ -533,7 +533,7 @@ int main(int argc, char* argv[]) ...@@ -533,7 +533,7 @@ int main(int argc, char* argv[])
#endif #endif
} }
#if 0 #if 1
device_col2im_eb_nchw(col_eb_desc, device_col2im_eb_nchw(col_eb_desc,
col_eb, col_eb,
img_nchw_desc, img_nchw_desc,
...@@ -559,33 +559,6 @@ int main(int argc, char* argv[]) ...@@ -559,33 +559,6 @@ int main(int argc, char* argv[])
nrepeat); nrepeat);
#endif #endif
#if 0
constexpr auto lengths = to_multi_index(Sequence<1,2>{});
constexpr auto thread_buffer_desc_ =
make_dynamic_native_tensor_descriptor_packed<2>(lengths);
#elif 1
constexpr auto lengths = to_multi_index(Sequence<1, 2>{});
constexpr index_t NDim = 2;
constexpr auto transforms = make_tuple(DynamicUnMerge<NDim>{lengths});
constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{});
constexpr auto up_dim_hidden_idss =
make_tuple(typename arithmetic_sequence_gen<1, NDim + 1, 1>::type{});
constexpr auto visible_dim_hidden_ids =
typename arithmetic_sequence_gen<1, NDim + 1, 1>::type{};
constexpr index_t element_space_size =
container_reduce(lengths, math::multiplies<index_t>{}, index_t{1});
constexpr auto desc =
DynamicTensorDescriptor<decltype(transforms),
decltype(low_dim_hidden_idss),
decltype(up_dim_hidden_idss),
decltype(visible_dim_hidden_ids)>{transforms, element_space_size};
#endif
if(do_verification) if(do_verification)
{ {
host_col2im(col_eb, host_col2im(col_eb,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment