remove deprecated tensor descriptor

6fc49f91 · Chao Liu · 506a823a · 6fc49f91 · 506a823a · 506a823a
Commit 6fc49f91 authored May 30, 2020 by Chao Liu
20 changed files
--- a/composable_kernel/include/tensor_description/ConstantMatrixDescriptor.hpp
+++ b/composable_kernel/include/tensor_description/ConstantMatrixDescriptor.hpp
@@ -2,7 +2,6 @@
 #define CK_CONSTANT_MATRIX_DESCRIPTOR_HPP

 #include "common_header.hpp"
-#include "ConstantTensorDescriptor_deprecated.hpp"
 #include "tensor_descriptor.hpp"

 namespace ck {
@@ -58,18 +57,6 @@ __host__ __device__ constexpr auto
    return ConstantMatrixDescriptor<NRow, NCol, RowStride>{};
 }

-template <typename... Ts>
-__host__ __device__ constexpr auto
-    make_ConstantMatrixDescriptor(ConstantTensorDescriptor_deprecated<Ts...>)
-{
-    using TDesc = ConstantTensorDescriptor_deprecated<Ts...>;
-    static_assert(TDesc::GetNumOfDimension() == 2, "wrong");
-    static_assert(TDesc::GetStrides()[1] == 1, "wrong");
-    return ConstantMatrixDescriptor<TDesc::GetLengths()[0],
-                                    TDesc::GetLengths()[1],
-                                    TDesc::GetStrides()[0]>{};
-}
-
 template <typename... Ts>
 __host__ __device__ constexpr auto make_ConstantMatrixDescriptor(NativeTensorDescriptor<Ts...>)
 {

--- a/composable_kernel/include/tensor_description/ConstantMergedTensorDescriptor_deprecated.hpp
+++ b/composable_kernel/include/tensor_description/ConstantMergedTensorDescriptor_deprecated.hpp
-#ifndef CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_DEPRECATED_HPP
-#define CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_DEPRECATED_HPP
-
-#include "common_header.hpp"
-#include "ConstantTensorDescriptor_deprecated.hpp"
-
-namespace ck {
-
-// OriginalTensorDesc : ConstantTensorDescriptor_deprecated<...>
-//     it's the tensor whose dimensions are to be merged
-// OriginalDimMergeSeqs : Sequence<...>...
-//     each is a sequence of original dimensions (of OriginalTensorDesc) to be merged
-template <class OriginalTensorDesc, class... OriginalDimMergeSeqs>
-struct ConstantMergedTensorDescriptor_deprecated
-{
-    using Type = ConstantMergedTensorDescriptor_deprecated;
-
-    static constexpr auto mOriginalDimMergeSeqs = std::tuple<OriginalDimMergeSeqs...>{};
-
-    static constexpr index_t nDim         = sizeof...(OriginalDimMergeSeqs);
-    static constexpr index_t nOriginalDim = OriginalTensorDesc::GetNumOfDimension();
-
-    __host__ __device__ constexpr ConstantMergedTensorDescriptor_deprecated()
-    {
-        static_assert(nDim <= nOriginalDim, "wrong!");
-
-        // TODO: check each of OriginalDimMergeSeqs contains at least 1, and at most
-        // OriginalTensorDesc::nDim number of dimensions
-
-        // TODO: check OriginalDimMergeSeqs contains all original dimensions
-
-        // TODO: check there is no duplication in OriginalDimMergeSeqs
-    }
-
-    __host__ __device__ static constexpr auto GetOriginalTensorDescriptor()
-    {
-        return OriginalTensorDesc{};
-    }
-
-    __host__ __device__ static constexpr auto GetNumOfDimension() { return Number<nDim>{}; }
-
-    template <index_t IDim>
-    __host__ __device__ static constexpr auto GetContainedOriginalDimensions(Number<IDim>)
-    {
-        return std::get<IDim>(mOriginalDimMergeSeqs);
-    }
-
-    template <index_t IDim>
-    __host__ __device__ static constexpr bool ContainMultipleOriginalDimensions(Number<IDim>)
-    {
-        return (std::get<IDim>(mOriginalDimMergeSeqs).GetSize() > 1);
-    }
-
-    template <index_t IDim>
-    __host__ __device__ static constexpr auto GetLength(Number<IDim>)
-    {
-        constexpr auto original_dims_partial = std::get<IDim>(mOriginalDimMergeSeqs);
-
-        return OriginalTensorDesc::Extract(original_dims_partial).GetElementSize();
-    }
-
-    template <index_t IDim>
-    __host__ __device__ static constexpr auto GetStride(Number<IDim>)
-    {
-        static_assert(!ContainMultipleOriginalDimensions(Number<IDim>{}),
-                      "wrong! stride of a merged dimension is undefined");
-
-        constexpr auto idim_original = std::get<IDim>(mOriginalDimMergeSeqs).Back();
-
-        return OriginalTensorDesc::GetStride(Number<idim_original>{});
-    }
-
-    // this is a hack to return the stride of the last original dimension of a merged dimension
-    // TODO: refactor this once the concept of "dimension" is used
-    template <index_t IDim>
-    __host__ __device__ static constexpr auto GetLastOriginalDimensionStride(Number<IDim>)
-    {
-        constexpr auto idim_last_original = std::get<IDim>(mOriginalDimMergeSeqs).Back();
-
-        return OriginalTensorDesc::GetStride(Number<idim_last_original>{});
-    }
-
-    __host__ __device__ static constexpr auto GetLengths()
-    {
-        return Sequence<OriginalTensorDesc::Extract(OriginalDimMergeSeqs{}).GetElementSize()...>{};
-    }
-
-    __host__ __device__ static constexpr auto GetElementSize()
-    {
-        return OriginalTensorDesc::GetElementSize();
-    }
-
-    template <class OriginalDimsPartial>
-    struct lambda_1_GetOriginalMultiIndexFromMultiIndex
-    {
-        const Array<index_t, OriginalDimsPartial::GetSize()>& original_multi_id_partial;
-        Array<index_t, nOriginalDim>& original_multi_id;
-
-        __host__ __device__ constexpr lambda_1_GetOriginalMultiIndexFromMultiIndex(
-            const Array<index_t, OriginalDimsPartial::GetSize()>& original_multi_id_partial_,
-            Array<index_t, nOriginalDim>& original_multi_id_)
-            : original_multi_id_partial(original_multi_id_partial_),
-              original_multi_id(original_multi_id_)
-        {
-        }
-
-        template <index_t I>
-        __host__ __device__ constexpr void operator()(Number<I>) const
-        {
-            constexpr index_t idim_original = OriginalDimsPartial::Get(Number<I>{});
-
-            index_t itmp = original_multi_id_partial[I];
-
-            original_multi_id(idim_original) = itmp;
-        }
-    };
-
-    struct lambda_0_GetOriginalMultiIndexFromMultiIndex
-    {
-        const Array<index_t, nDim>& multi_id;
-        Array<index_t, nOriginalDim>& original_multi_id;
-
-        __host__ __device__ constexpr lambda_0_GetOriginalMultiIndexFromMultiIndex(
-            const Array<index_t, nDim>& multi_id_, Array<index_t, nOriginalDim>& original_multi_id_)
-            : multi_id(multi_id_), original_multi_id(original_multi_id_)
-        {
-        }
-
-        template <index_t IDim>
-        __host__ __device__ constexpr void operator()(Number<IDim>) const
-        {
-            constexpr auto original_dims_partial = std::get<IDim>(Type::mOriginalDimMergeSeqs);
-
-            // get partial original-multi-id corresponding to this merged dimension
-            const auto original_multi_id_partial =
-                OriginalTensorDesc::Extract(original_dims_partial)
-                    .GetMultiIndexFrom1dIndex(multi_id[IDim]);
-
-            static_for<0, original_dims_partial.GetSize(), 1>{}(
-                lambda_1_GetOriginalMultiIndexFromMultiIndex<decltype(original_dims_partial)>(
-                    original_multi_id_partial, original_multi_id));
-        }
-    };
-
-    // return type is Array<...>
-    __host__ __device__ static constexpr auto
-    GetOriginalMultiIndexFromMultiIndex(Array<index_t, nDim> multi_id)
-    {
-        Array<index_t, nOriginalDim> original_multi_id;
-
-        static_for<0, nDim, 1>{}(
-            lambda_0_GetOriginalMultiIndexFromMultiIndex(multi_id, original_multi_id));
-
-        return original_multi_id;
-    }
-
-    template <index_t... Is>
-    __host__ __device__ static constexpr index_t GetOffsetFromMultiIndex(Sequence<Is...>)
-    {
-        constexpr auto multi_id = sequence2array(Sequence<Is...>{});
-
-        constexpr auto original_multi_id = GetOriginalMultiIndexFromMultiIndex(multi_id);
-
-        return OriginalTensorDesc::GetOffsetFromMultiIndex(original_multi_id);
-    }
-
-    __host__ __device__ static constexpr index_t
-    GetOffsetFromMultiIndex(Array<index_t, nDim> multi_id)
-    {
-        auto original_multi_id = GetOriginalMultiIndexFromMultiIndex(multi_id);
-
-        return OriginalTensorDesc::GetOffsetFromMultiIndex(original_multi_id);
-    }
-
-    template <class... Is>
-    __host__ __device__ static constexpr index_t GetOffsetFromMultiIndex(Is... is)
-    {
-        return GetOffsetFromMultiIndex(Array<index_t, nDim>{is...});
-    }
-
-    __host__ __device__ static constexpr Array<index_t, nDim> GetMultiIndexFrom1dIndex(index_t id)
-    {
-        constexpr auto packed_desc = make_ConstantTensorDescriptor_packed(GetLengths());
-
-        return packed_desc.GetMultiIndexFrom1dIndex(id);
-    }
-
-    __host__ __device__ static constexpr auto Pack()
-    {
-        constexpr auto lengths = GetLengths();
-        constexpr auto strides = calculate_tensor_strides_packed(lengths);
-        return ConstantTensorDescriptor_deprecated<decltype(lengths), decltype(strides)>{};
-    }
-};
-
-template <class OriginalTensorDesc, class... OriginalDimMergeSeqs>
-__host__ __device__ constexpr auto make_ConstantMergedTensorDescriptor(OriginalTensorDesc,
-                                                                       OriginalDimMergeSeqs...)
-{
-    return ConstantMergedTensorDescriptor_deprecated<OriginalTensorDesc, OriginalDimMergeSeqs...>{};
-}
-
-template <class TDesc>
-__host__ __device__ void print_ConstantMergedTensorDescriptor(const char* s, TDesc)
-{
-    print_ConstantTensorDescriptor(s, TDesc::GetOriginalTensorDescriptor());
-}
-
-} // namespace ck
-#endif
--- a/composable_kernel/include/tensor_description/ConstantTensorDescriptor_deprecated.hpp
+++ b/composable_kernel/include/tensor_description/ConstantTensorDescriptor_deprecated.hpp
--- a/composable_kernel/include/tensor_description/tensor_coordinate_deprecated.hpp
+++ b/composable_kernel/include/tensor_description/tensor_coordinate_deprecated.hpp
-#ifndef CK_TENSOR_COORDINATE_DEPRECATED_HPP
-#define CK_TENSOR_COORDINATE_DEPRECATED_HPP
-
-#include "common_header.hpp"
-#include "ConstantTensorDescriptor_deprecated.hpp"
-#include "ConstantMergedTensorDescriptor_deprecated.hpp"
-
-namespace ck {
-
-// TensorDesc is ConstantTensorDescriptor_deprecated
-template <class TensorDesc>
-struct NormalTensorCoordinate_deprecated
-{
-    using type             = NormalTensorCoordinate_deprecated;
-    using tensor_desc_type = TensorDesc;
-
-    static constexpr index_t nDim = tensor_desc_type::GetNumOfDimension();
-
-    __host__
-        __device__ constexpr NormalTensorCoordinate_deprecated(Array<index_t, nDim> tensor_index)
-        : mOffset{tensor_desc_type::GetOffsetFromMultiIndex(tensor_index)}
-    {
-    }
-
-    template <class... Xs>
-    __host__ __device__ constexpr NormalTensorCoordinate_deprecated(Xs... xs)
-        : NormalTensorCoordinate_deprecated(Array<index_t, nDim>{xs...})
-    {
-    }
-
-    template <index_t... Xs>
-    __host__ __device__ constexpr NormalTensorCoordinate_deprecated(Sequence<Xs...>)
-        : NormalTensorCoordinate_deprecated(Array<index_t, nDim>{Xs...})
-    {
-    }
-
-    __host__ __device__ constexpr index_t GetOffset() const { return mOffset; }
-
-    // T is Array or Sequence
-    template <class T>
-    __host__ __device__ type operator+=(T step_sizes)
-    {
-        static_assert(is_same<typename T::data_type, index_t>{} && T::GetSize() == nDim, "wrong!");
-
-        mOffset += tensor_desc_type::GetOffsetFromMultiIndex(step_sizes);
-
-        return *this;
-    }
-
-    template <class T>
-    __host__ __device__ type operator-=(T step_sizes)
-    {
-        static_assert(is_same<typename T::data_type, index_t>{} && T::GetSize() == nDim, "wrong!");
-
-        mOffset -= tensor_desc_type::GetOffsetFromMultiIndex(step_sizes);
-
-        return *this;
-    }
-
-    template <class T>
-    __host__ __device__ constexpr type operator+(T step_sizes) const
-    {
-        type coord = *this;
-        coord += step_sizes;
-        return coord;
-    }
-
-    template <class T>
-    __host__ __device__ constexpr type operator-(T step_sizes) const
-    {
-        type coord = *this;
-        coord -= step_sizes;
-        return coord;
-    }
-
-    // reposition point of origin, and return compensated offset.
-    // This is a hack to reduce index calculation during looping over
-    // a tensor whose origin is this TensorCoordinate. It does so, by spitting
-    // out the run-time offset to the pointer (to the tensor data) held by this
-    // TensorCoordiante, so the caller can add the offset into the run-time pointer of
-    // the data, so only 1 run-time variable (update pointer) is needed, instead
-    // of 2 run-time variables (old pointer and this offset)
-    // TODO: after introducing the concept of "run-time tensor view", which contains the
-    // run-time pointer to the data, always keep track of the pointer, instead of both
-    // offset and the pointer. This also bring additional benefit that we don't need to
-    // worry the offset might underflow (because offset is unsigned integer) when updating it.
-    __host__ __device__ constexpr index_t RepositionOrigin()
-    {
-        index_t offset_diff = mOffset;
-        mOffset             = 0;
-        return offset_diff;
-    }
-
-    private:
-    index_t mOffset;
-};
-
-// TensorDesc is ConstantMergedTensorDescriptor_deprecated
-template <class TensorDesc>
-struct MergedTensorCoordinate_deprecated
-{
-    using type             = MergedTensorCoordinate_deprecated;
-    using tensor_desc_type = TensorDesc;
-
-    static constexpr index_t nDim = tensor_desc_type::GetNumOfDimension();
-    static constexpr index_t nOriginalDim =
-        tensor_desc_type::GetOriginalTensorDescriptor().GetNumOfDimension();
-
-    __host__
-        __device__ constexpr MergedTensorCoordinate_deprecated(Array<index_t, nDim> tensor_index)
-        : mOriginalIndex{tensor_desc_type::GetOriginalMultiIndexFromMultiIndex(tensor_index)}
-    {
-        // partial offset on each dimension
-        static_for<0, nDim, 1>{}([&](auto idim) {
-            constexpr auto partial_original_dims =
-                tensor_desc_type::GetContainedOriginalDimensions(idim);
-
-            constexpr auto partial_original_desc =
-                tensor_desc_type::GetOriginalTensorDescriptor().Extract(partial_original_dims);
-
-            mPartialOffsets(idim) = partial_original_desc.GetOffsetFromMultiIndex(
-                extract_array(mOriginalIndex, partial_original_dims));
-        });
-
-        // complete offset
-        mOffset =
-            accumulate_on_array(mPartialOffsets, math::plus<index_t>{}, static_cast<index_t>(0));
-    }
-
-    template <class... Xs>
-    __host__ __device__ constexpr MergedTensorCoordinate_deprecated(Xs... xs)
-        : MergedTensorCoordinate_deprecated(Array<index_t, nDim>{xs...})
-    {
-    }
-
-    __host__ __device__ constexpr index_t GetOffset() const { return mOffset; }
-
-    template <class IDim, class T, bool PositiveDirection>
-    __host__ __device__ void
-    MoveOnDimension(IDim idim_, T step_size, integral_constant<bool, PositiveDirection>)
-    {
-        constexpr auto idim = idim_;
-
-        // if step_size is known at compile time
-        static_if<is_static<T>::value>{}(
-            [&](auto) { static_if<T{} == 0>{}([&](auto) { return; }); });
-
-        // update original index
-        static_if<tensor_desc_type::ContainMultipleOriginalDimensions(idim)>{}([&](auto) {
-            constexpr auto partial_original_dims =
-                tensor_desc_type::GetContainedOriginalDimensions(idim);
-
-            constexpr index_t ndim_partial_original = partial_original_dims.GetSize();
-
-            constexpr auto partial_original_desc =
-                tensor_desc_type::GetOriginalTensorDescriptor().Extract(partial_original_dims);
-
-            const auto partial_original_step_sizes =
-                partial_original_desc.GetMultiIndexFrom1dIndex(step_size);
-
-            // update partial original multi-id
-            auto partial_original_id = extract_array(mOriginalIndex, partial_original_dims);
-
-            static_if<PositiveDirection>{}([&](auto) {
-                partial_original_id += partial_original_step_sizes;
-
-                bool carry = false;
-
-                // do carry check in reversed order, starting from lowest dimension
-                // don't check the highest dimension
-                static_for<0, ndim_partial_original - 1, 1>{}([&](auto IReverse) {
-                    constexpr index_t i = ndim_partial_original - 1 - IReverse;
-
-                    if(carry)
-                    {
-                        ++partial_original_id(i);
-                    }
-
-                    carry = false;
-
-                    if(partial_original_id[i] >= partial_original_desc.GetLength(i))
-                    {
-                        partial_original_id(i) -= partial_original_desc.GetLength(i);
-                        carry = true;
-                    }
-                });
-
-                // highest dimension
-                if(carry)
-                {
-                    ++partial_original_id(0);
-                }
-            }).Else([&](auto) {
-                // shift up multi-id to avoid unsigned integer underflow during intermediate
-                // calculations. After the shift, should have new_multi_id[...] >= 1
-                partial_original_id +=
-                    partial_original_desc.GetLengths() - partial_original_step_sizes;
-
-                bool borrow = false;
-
-                // do borrow check in reversed order, starting from lowest dimension
-                // don't check the highest dimension
-                static_for<0, ndim_partial_original - 1, 1>{}([&](auto IReverse) {
-                    constexpr index_t i = ndim_partial_original - 1 - IReverse;
-
-                    if(borrow)
-                    {
-                        --partial_original_id(i);
-                    }
-
-                    borrow = false;
-
-                    if(partial_original_id[i] < partial_original_desc.GetLength(i))
-                    {
-                        partial_original_id(i) += partial_original_desc.GetLength(i);
-                        borrow = true;
-                    }
-                });
-
-                // highest dimension
-                if(borrow)
-                {
-                    --partial_original_id(0);
-                }
-
-                // shift back down multi-id
-                // here, should have new_multi_id[...] >= GetLengths()
-                partial_original_id = partial_original_id - partial_original_desc.GetLengths();
-            });
-
-            // update "mOriginalIndex"
-            static_for<0, ndim_partial_original, 1>{}([&](auto I) {
-                constexpr auto idim_original = partial_original_dims[I];
-
-                mOriginalIndex(idim_original) = partial_original_id[I];
-            });
-
-            // calculate new partial offset on this merged dimension
-            const index_t old_partial_offset = mPartialOffsets[idim];
-
-            mPartialOffsets(idim) =
-                partial_original_desc.GetOffsetFromMultiIndex(partial_original_id);
-
-            // update "mThreadSrcOffset", do "+" before "-" to avoid underflow
-            mOffset = (mOffset + mPartialOffsets[idim]) - old_partial_offset;
-        }).Else([&](auto fwd) {
-            static_if<PositiveDirection>{}([&](auto) {
-                mOffset += step_size * fwd(tensor_desc_type{}).GetStride(idim);
-            }).Else([&](auto) { mOffset -= step_size * fwd(tensor_desc_type{}).GetStride(idim); });
-        });
-    }
-
-    // T is Array or Sequence
-    template <class T>
-    __host__ __device__ type operator+=(T step_sizes)
-    {
-        static_assert(is_same<typename T::data_type, index_t>{} && T::GetSize() == nDim, "wrong!");
-
-        static_for<0, nDim, 1>{}([&](auto idim) {
-            // compiler should remove dead code path, because step_sizes is known at
-            // compile time
-            if(step_sizes[idim] != 0)
-            {
-                this->MoveOnDimension(idim, step_sizes[idim], integral_constant<bool, true>{});
-            }
-        });
-
-        return *this;
-    }
-
-    template <class T>
-    __host__ __device__ type operator-=(T step_sizes)
-    {
-        static_assert(is_same<typename T::data_type, index_t>{} && T::GetSize() == nDim, "wrong!");
-
-        static_for<0, nDim, 1>{}([&](auto idim) {
-            // compiler should remove dead code path, because step_sizes is known at
-            // compile time
-            if(step_sizes[idim] != 0)
-            {
-                this->MoveOnDimension(idim, step_sizes[idim], integral_constant<bool, false>{});
-            }
-        });
-
-        return *this;
-    }
-
-    template <class T>
-    __host__ __device__ constexpr type operator+(T step_sizes) const
-    {
-        type coord = *this;
-        coord += step_sizes;
-        return coord;
-    }
-
-    template <class T>
-    __host__ __device__ constexpr type operator-(T step_sizes) const
-    {
-        type coord = *this;
-        coord -= step_sizes;
-        return coord;
-    }
-
-    __host__ __device__ static constexpr index_t RepositionOrigin() { return 0; }
-
-    private:
-    // Allocate register memory for all merged dimensions and normal dimensions.
-    // However, only those merged dimensions, whose index will be involved in arithmetic
-    // after the construction of this TensorCoordinate (e.g. when user move a slicing
-    // window on the merged dimension), will use these register memory.
-    // Let's hope compiler will optimize away those register memory allocated for normal
-    // dimensions, and those merged dimensions, that would never be involved in index
-    // arithmetic after construction of TensorCoordinate.
-    // TODO: refactor TensorCoordinate, after introducing the concept of "dimensions"
-    // and simplify implementation of ConstantMergedTensorDescriptor_deprecated, so we don't need to
-    // count on compiler to optimize away those register memory for us
-    Array<index_t, nOriginalDim> mOriginalIndex;
-    Array<index_t, nDim> mPartialOffsets;
-
-    // complete offset
-    index_t mOffset;
-};
-
-template <class TensorDesc>
-struct TensorCoordinate_deprecated
-{
-    private:
-    template <class... Ts>
-    __host__ __device__ static constexpr auto
-        MakeDummyTensorCoordinate(ConstantTensorDescriptor_deprecated<Ts...>)
-    {
-        return NormalTensorCoordinate_deprecated<ConstantTensorDescriptor_deprecated<Ts...>>();
-    }
-
-    template <class... Ts>
-    __host__ __device__ static constexpr auto
-        MakeDummyTensorCoordinate(ConstantMergedTensorDescriptor_deprecated<Ts...>)
-    {
-        return MergedTensorCoordinate_deprecated<
-            ConstantMergedTensorDescriptor_deprecated<Ts...>>();
-    }
-
-    public:
-    using type = decltype(MakeDummyTensorCoordinate(TensorDesc{}));
-};
-
-} // namespace ck
-#endif
--- a/composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy_deprecated.hpp
+++ b/composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy_deprecated.hpp
--- a/composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy_deprecated.hpp
+++ b/composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy_deprecated.hpp
--- a/composable_kernel/include/utility/math.hpp
+++ b/composable_kernel/include/utility/math.hpp
@@ -3,6 +3,7 @@

 #include "config.hpp"
 #include "integral_constant.hpp"
+#include "number.hpp"
 #include "type.hpp"

 namespace ck {

--- a/driver/CMakeLists.txt
+++ b/driver/CMakeLists.txt
 set(TENSOR_SOURCE 
-    src/tensor.cpp;
+    src/host_tensor.cpp;
    src/device.cpp;
 )

@@ -25,8 +25,6 @@ elseif(DEVICE_BACKEND STREQUAL "NVIDIA")
 endif()

 add_executable(conv_driver ${CONV_SOURCE}) 
-add_executable(col2im_driver ${COL2IM_SOURCE}) 
 add_executable(conv_bwd_data_driver ${CONV_BWD_DATA_SOURCE}) 
 target_link_libraries(conv_driver PRIVATE host)
-target_link_libraries(col2im_driver PRIVATE host)
 target_link_libraries(conv_bwd_data_driver PRIVATE host)
--- a/driver/include/conv_common.hpp
+++ b/driver/include/conv_common.hpp
 #ifndef CONV_COMMON_HPP
 #define CONV_COMMON_HPP

-#include "ConstantTensorDescriptor_deprecated.hpp"
 #include "tensor_descriptor.hpp"

-template <class InDesc,
-          class WeiDesc,
-          class ConvStrides,
-          class ConvDilations,
-          class LowerPads,
-          class UpperPads>
-constexpr auto get_convolution_output_default_4d_tensor_descriptor_deprecated(
-    InDesc, WeiDesc, ConvStrides, ConvDilations, LowerPads, UpperPads)
-{
-    using namespace ck;
-
-    constexpr auto in_desc  = InDesc{};
-    constexpr auto wei_desc = WeiDesc{};
-
-    constexpr auto I0 = Number<0>{};
-    constexpr auto I1 = Number<1>{};
-    constexpr auto I2 = Number<2>{};
-    constexpr auto I3 = Number<3>{};
-
-    static_assert(in_desc.GetNumOfDimension() == 4, "input nDim is not 4");
-    static_assert(wei_desc.GetNumOfDimension() == 4, "weight nDim is not 4");
-    static_assert(in_desc.GetLength(I1) == wei_desc.GetLength(I1),
-                  "input & weight dimension not consistent");
-
-    constexpr index_t N  = in_desc.GetLength(I0);
-    constexpr index_t Hi = in_desc.GetLength(I2);
-    constexpr index_t Wi = in_desc.GetLength(I3);
-
-    constexpr index_t K = wei_desc.GetLength(I0);
-    constexpr index_t Y = wei_desc.GetLength(I2);
-    constexpr index_t X = wei_desc.GetLength(I3);
-
-    constexpr index_t HPadLow = LowerPads{}.Get(I0);
-    constexpr index_t WPadLow = LowerPads{}.Get(I1);
-
-    constexpr index_t HPadUp = UpperPads{}.Get(I0);
-    constexpr index_t WPadUp = UpperPads{}.Get(I1);
-
-    constexpr index_t YEff = (Y - 1) * ConvDilations{}[0] + 1;
-    constexpr index_t XEff = (X - 1) * ConvDilations{}[1] + 1;
-
-    constexpr index_t Ho = (Hi + HPadLow + HPadUp - YEff) / ConvStrides{}[0] + 1;
-    constexpr index_t Wo = (Wi + WPadLow + WPadUp - XEff) / ConvStrides{}[1] + 1;
-
-    return make_ConstantTensorDescriptor_packed(Sequence<N, K, Ho, Wo>{});
-}
-
 template <class InDesc,
          class WeiDesc,
          class ConvStrides,

--- a/driver/include/device_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw.hpp
+++ b/driver/include/device_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw.hpp
 #pragma once
 #include <unistd.h>
 #include "device.hpp"
-#include "tensor.hpp"
+#include "host_tensor.hpp"
 #include "gridwise_operation_wrapper.hpp"
 #include "gridwise_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw.hpp"


--- a/driver/include/device_convolution_backward_data_implicit_gemm_v1r2_nchw_kcyx_nkhw.hpp
+++ b/driver/include/device_convolution_backward_data_implicit_gemm_v1r2_nchw_kcyx_nkhw.hpp
 #pragma once
 #include <unistd.h>
 #include "device.hpp"
-#include "tensor.hpp"
+#include "host_tensor.hpp"
 #include "gridwise_operation_wrapper.hpp"
 #include "gridwise_convolution_backward_data_implicit_gemm_v1r2_nchw_kcyx_nkhw_lds_double_buffer.hpp"


--- a/driver/include/device_convolution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw.hpp
+++ b/driver/include/device_convolution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw.hpp
 #pragma once
 #include <unistd.h>
 #include "device.hpp"
-#include "tensor.hpp"
+#include "host_tensor.hpp"
 #include "gridwise_operation_wrapper.hpp"
 #include "gridwise_convolution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw.hpp"


--- a/driver/include/device_convolution_backward_data_implicit_gemm_v3r1_nchw_kcyx_nkhw.hpp
+++ b/driver/include/device_convolution_backward_data_implicit_gemm_v3r1_nchw_kcyx_nkhw.hpp
 #pragma once
 #include <unistd.h>
 #include "device.hpp"
-#include "tensor.hpp"
+#include "host_tensor.hpp"
 #include "gridwise_operation_wrapper.hpp"
 #include "gridwise_convolution_backward_data_implicit_gemm_v3r1_nchw_kcyx_nkhw.hpp"


--- a/driver/include/device_convolution_backward_data_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp
+++ b/driver/include/device_convolution_backward_data_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp
 #pragma once
 #include <unistd.h>
 #include "device.hpp"
-#include "tensor.hpp"
+#include "host_tensor.hpp"
 #include "gridwise_operation_wrapper.hpp"
 #include "gridwise_convolution_backward_data_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"


--- a/driver/include/device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp
+++ b/driver/include/device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp
 #pragma once
 #include <unistd.h>
 #include "device.hpp"
-#include "tensor.hpp"
+#include "host_tensor.hpp"
 #include "gridwise_operation_wrapper.hpp"
 #include "gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.hpp"


--- a/driver/include/device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
+++ b/driver/include/device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
 #include <unistd.h>
 #include "device.hpp"
-#include "tensor.hpp"
+#include "host_tensor.hpp"
 #include "gridwise_operation_wrapper.hpp"
 #include "gridwise_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"


--- a/driver/include/device_tensor.hpp
+++ b/driver/include/device_tensor.hpp
 #pragma once
-#include "tensor.hpp"
+#include "host_tensor.hpp"
 #include "common_header.hpp"
-#include "ConstantTensorDescriptor_deprecated.hpp"
 #include "tensor_descriptor.hpp"

-template <typename ConstTensorDesc, std::size_t... Is>
-auto make_TensorDescriptor_impl(ConstTensorDesc, std::integer_sequence<std::size_t, Is...>)
+template <typename TensorDesc, std::size_t... Is>
+auto make_HostTensorDescriptor_impl(TensorDesc, std::integer_sequence<std::size_t, Is...>)
 {
-    std::initializer_list<std::size_t> lengths = {ConstTensorDesc::GetLengths()[Is]...};
-    std::initializer_list<std::size_t> strides = {ConstTensorDesc::GetStrides()[Is]...};
+    std::initializer_list<std::size_t> lengths = {TensorDesc::GetLengths()[Is]...};
+    std::initializer_list<std::size_t> strides = {TensorDesc::GetStrides()[Is]...};

-    return TensorDescriptor(lengths, strides);
+    return HostTensorDescriptor(lengths, strides);
 }

-template <typename ConstTensorDesc>
-auto make_TensorDescriptor(ConstTensorDesc)
+template <typename TensorDesc>
+auto make_HostTensorDescriptor(TensorDesc)
 {
-    return make_TensorDescriptor_impl(
-        ConstTensorDesc{},
-        std::make_integer_sequence<std::size_t, ConstTensorDesc::GetNumOfDimension()>{});
+    return make_HostTensorDescriptor_impl(
+        TensorDesc{}, std::make_integer_sequence<std::size_t, TensorDesc::GetNumOfDimension()>{});
 }

-template <typename ConstTensorDesc>
-void ostream_ConstantTensorDescriptor(ConstTensorDesc, std::ostream& os = std::cout)
+template <typename TensorDesc>
+void ostream_tensor_descriptor(TensorDesc, std::ostream& os = std::cout)
 {
-    ostream_TensorDescriptor(make_TensorDescriptor(ConstTensorDesc{}), os);
+    ostream_HostTensorDescriptor(make_HostTensorDescriptor(TensorDesc{}), os);
 }
--- a/driver/include/host_conv.hpp
+++ b/driver/include/host_conv.hpp
 #pragma once
-#include "tensor.hpp"
+#include "host_tensor.hpp"

 template <class TIn,
          class TWei,

--- a/driver/include/host_conv_bwd_data.hpp
+++ b/driver/include/host_conv_bwd_data.hpp
 #pragma once
-#include "tensor.hpp"
+#include "host_tensor.hpp"

 template <typename TIn,
          typename TWei,

--- a/driver/include/tensor.hpp
+++ b/driver/include/tensor.hpp
-#ifndef TENSOR_HPP
-#define TENSOR_HPP
+#ifndef HOST_TENSOR_HPP
+#define HOST_TENSOR_HPP

 #include <thread>
 #include <vector>
@@ -65,26 +65,26 @@ auto construct_f_unpack_args(F, T args)
    return construct_f_unpack_args_impl<F>(args, std::make_index_sequence<N>{});
 }

-struct TensorDescriptor
+struct HostTensorDescriptor
 {
-    TensorDescriptor() = delete;
+    HostTensorDescriptor() = delete;

    template <typename X>
-    TensorDescriptor(std::vector<X> lens);
+    HostTensorDescriptor(std::vector<X> lens);

    template <typename X, typename Y>
-    TensorDescriptor(std::vector<X> lens, std::vector<Y> strides);
+    HostTensorDescriptor(std::vector<X> lens, std::vector<Y> strides);

    void CalculateStrides();

    template <class Range>
-    TensorDescriptor(const Range& lens) : mLens(lens.begin(), lens.end())
+    HostTensorDescriptor(const Range& lens) : mLens(lens.begin(), lens.end())
    {
        this->CalculateStrides();
    }

    template <class Range1, class Range2>
-    TensorDescriptor(const Range1& lens, const Range2& strides)
+    HostTensorDescriptor(const Range1& lens, const Range2& strides)
        : mLens(lens.begin(), lens.end()), mStrides(strides.begin(), strides.end())
    {
    }
@@ -205,7 +205,7 @@ struct Tensor
    {
    }

-    Tensor(const TensorDescriptor& desc) : mDesc(desc), mData(mDesc.GetElementSpace()) {}
+    Tensor(const HostTensorDescriptor& desc) : mDesc(desc), mData(mDesc.GetElementSpace()) {}

    template <class G>
    void GenerateTensorValue(G g, std::size_t num_thread = 1)
@@ -267,11 +267,11 @@ struct Tensor

    typename std::vector<T>::const_iterator end() const { return mData.end(); }

-    TensorDescriptor mDesc;
+    HostTensorDescriptor mDesc;
    std::vector<T> mData;
 };

-void ostream_TensorDescriptor(const TensorDescriptor& desc, std::ostream& os = std::cout)
+void ostream_HostTensorDescriptor(const HostTensorDescriptor& desc, std::ostream& os = std::cout)
 {
    os << "dim " << desc.GetNumOfDimension() << ", ";