remove use of reference from dynamic tensor descriptor and coordinate

0c24e527 · Chao Liu · 2cb05d6d · 0c24e527 · 0c24e527 · 0c24e527
Commit 0c24e527 authored Oct 11, 2020 by Chao Liu
3 changed files
--- a/composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
+++ b/composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
@@ -111,11 +111,9 @@ struct DynamicTensorDescriptor
    __host__ __device__ constexpr auto GetLengths() const
    {
-        return unpack([&](auto... is) constexpr { return make_multi_index(GetLength(is)...); },
+        return get_container_subset(hidden_lengths_, VisibleDimensionIds{});
-                      VisibleDimensionIds{});
    }
-    // maybe this result should be saved as a member variable
    __host__ __device__ constexpr index_t GetElementSize() const
    {
        return container_reduce(GetLengths(), math::multiplies<index_t>{}, index_t{1});
@@ -191,11 +189,11 @@ struct DynamicTensorCoordinate
    public:
    __host__ __device__ explicit constexpr DynamicTensorCoordinate(const HiddenIndex& idx_hidden)
-        : idx_hidden_{idx_hidden}, idx_visible_{idx_hidden_}
+        : idx_hidden_{idx_hidden}
    {
    }
-    __host__ __device__ constexpr const auto& GetIndex() const { return GetVisibleIndex(); }
+    __host__ __device__ constexpr auto GetIndex() const { return GetVisibleIndex(); }
    __host__ __device__ constexpr index_t GetOffset() const { return idx_hidden_[Number<0>{}]; }
@@ -204,14 +202,13 @@ struct DynamicTensorCoordinate
    __host__ __device__ auto& GetHiddenIndex() { return idx_hidden_; }
-    __host__ __device__ constexpr const auto& GetVisibleIndex() const { return idx_visible_; }
+    __host__ __device__ constexpr auto GetVisibleIndex() const
+    {
-    __host__ __device__ auto& GetVisibleIndex() { return idx_visible_; }
+        return get_container_subset(idx_hidden_, VisibleDimensionIds{});
+    }
    // TODO make these private
    HiddenIndex idx_hidden_;
-    // idx_visible_ contains a reference to idx_hidden_
-    ContainerElementPicker<HiddenIndex, VisibleDimensionIds> idx_visible_;
 };
 template <index_t NTransform, index_t NDimVisible>
@@ -516,12 +513,12 @@ __host__ __device__ constexpr bool coordinate_has_valid_offset(const TensorDesc&
 }
 template <typename TensorDesc>
-using DynamicTensorCoordinate_t = decltype(
+using DynamicTensorCoordinate_t = decltype(make_dynamic_tensor_coordinate(
-    make_dynamic_tensor_coordinate(TensorDesc{}, MultiIndex<TensorDesc::GetNumOfDimension()>{}));
+    TensorDesc{}, MultiIndex<remove_cv_t<remove_reference_t<TensorDesc>>::GetNumOfDimension()>{}));
 template <typename TensorDesc>
 using DynamicTensorCoordinateStep_t = decltype(make_dynamic_tensor_coordinate_step(
-    TensorDesc{}, MultiIndex<TensorDesc::GetNumOfDimension()>{}));
+    TensorDesc{}, MultiIndex<remove_cv_t<remove_reference_t<TensorDesc>>::GetNumOfDimension()>{}));
 } // namespace ck
 #endif
--- a/composable_kernel/include/tensor_operation/blockwise_dynamic_tensor_slice_transfer.hpp
+++ b/composable_kernel/include/tensor_operation/blockwise_dynamic_tensor_slice_transfer.hpp
@@ -72,7 +72,7 @@ struct BlockwiseDynamicTensorSliceTransfer_v1
        }
    }
-    __device__ void RunLoad(const BlockSrcData* p_block_src)
+    __device__ void RunRead(const BlockSrcData* p_block_src)
    {
        if(BlockSize == thread_cluster_desc_.GetElementSize() or
           get_thread_local_1d_id() < thread_cluster_desc_.GetElementSize())
@@ -81,7 +81,7 @@ struct BlockwiseDynamicTensorSliceTransfer_v1
        }
    }
-    __device__ void RunStore(BlockDstData* p_block_dst)
+    __device__ void RunWrite(BlockDstData* p_block_dst)
    {
        if(BlockSize == thread_cluster_desc_.GetElementSize() or
           get_thread_local_1d_id() < thread_cluster_desc_.GetElementSize())
@@ -90,15 +90,15 @@ struct BlockwiseDynamicTensorSliceTransfer_v1
        }
    }
-    __device__ void Run(const BlockSrcData* p_block_src, BlockDstData* p_block_dst) const
+    __device__ void Run(const BlockSrcData* p_block_src, BlockDstData* p_block_dst)
    {
        if(BlockSize == thread_cluster_desc_.GetElementSize() or
           get_thread_local_1d_id() < thread_cluster_desc_.GetElementSize())
        {
-            RunLoad(p_block_src, p_thread_buffer_);
+            threadwise_read_.Run(p_block_src, p_thread_buffer_);
-            // if there is type conversion, it's done during store
+            // if there is type conversion, it's done during write
-            RunStore(p_thread_buffer_, p_block_dst);
+            threadwise_write_.Run(p_thread_buffer_, p_block_dst);
        }
    }

--- a/driver/src/col2im_driver.cpp
+++ b/driver/src/col2im_driver.cpp
@@ -533,7 +533,7 @@ int main(int argc, char* argv[])
 #endif
    }
-#if 0
+#if 1
    device_col2im_eb_nchw(col_eb_desc,
                          col_eb,
                          img_nchw_desc,
@@ -559,33 +559,6 @@ int main(int argc, char* argv[])
                                          nrepeat);
 #endif
-#if 0
-    constexpr auto lengths = to_multi_index(Sequence<1,2>{});
-    constexpr auto thread_buffer_desc_ =
-        make_dynamic_native_tensor_descriptor_packed<2>(lengths);
-#elif 1
-    constexpr auto lengths = to_multi_index(Sequence<1, 2>{});
-    constexpr index_t NDim = 2;
-    constexpr auto transforms = make_tuple(DynamicUnMerge<NDim>{lengths});
-    constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{});
-    constexpr auto up_dim_hidden_idss =
-        make_tuple(typename arithmetic_sequence_gen<1, NDim + 1, 1>::type{});
-    constexpr auto visible_dim_hidden_ids =
-        typename arithmetic_sequence_gen<1, NDim + 1, 1>::type{};
-    constexpr index_t element_space_size =
-        container_reduce(lengths, math::multiplies<index_t>{}, index_t{1});
-    constexpr auto desc =
-        DynamicTensorDescriptor<decltype(transforms),
-                                decltype(low_dim_hidden_idss),
-                                decltype(up_dim_hidden_idss),
-                                decltype(visible_dim_hidden_ids)>{transforms, element_space_size};
-#endif
    if(do_verification)
    {
        host_col2im(col_eb,