changing class to POD: remove explicit keyword

43d5e05f · Chao Liu · e00de11e · 43d5e05f · 43d5e05f · 43d5e05f
Commit 43d5e05f authored Feb 07, 2021 by Chao Liu
10 changed files
--- a/composable_kernel/include/driver/driver_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
+++ b/composable_kernel/include/driver/driver_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
@@ -963,7 +963,9 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_pad
        static_assert(std::is_trivial<DynamicUnMerge<2>>::value, "wrong");
        static_assert(std::is_trivial<DynamicFreeze>::value, "wrong");
        static_assert(std::is_trivial<remove_cv_t<decltype(desc)>>::value, "wrong");
-        static_assert(std::is_trivial<remove_reference_t<remove_cv_t<decltype(conv_strides)>>>::value, "wrong");
+        static_assert(
+            std::is_trivial<remove_reference_t<remove_cv_t<decltype(conv_strides)>>>::value,
+            "wrong");
        static_assert(
            std::is_trivial<
                remove_reference_t<remove_cv_t<decltype(wei_k_c_y_x_global_desc)>>>::value,

--- a/composable_kernel/include/tensor_description/dynamic_tensor_coordinate_v1.hpp
+++ b/composable_kernel/include/tensor_description/dynamic_tensor_coordinate_v1.hpp
@@ -32,8 +32,9 @@ struct DynamicNativeTensorCoordinate_v1
    static constexpr index_t NDim = tensor_desc_type::GetNumOfDimension();
    using Index                   = MultiIndex<NDim>;
-    __host__ __device__ explicit constexpr DynamicNativeTensorCoordinate_v1(
+    __host__
-        const tensor_desc_type& tensor_desc, const Index& idx)
+        __device__ constexpr DynamicNativeTensorCoordinate_v1(const tensor_desc_type& tensor_desc,
+                                                              const Index& idx)
        : tensor_desc_{tensor_desc}, idx_{idx}, offset_{tensor_desc.CalculateOffset(idx)}
    {
    }
@@ -128,8 +129,9 @@ struct DynamicTransformedTensorCoordinate_v1
    using LowerDesc  = typename UpperDesc::LowerDesc;
    using LowerCoord = typename DynamicTensorCoordinate_v1<LowerDesc>::type;
-    __host__ __device__ explicit constexpr DynamicTransformedTensorCoordinate_v1(
+    __host__
-        const UpperDesc& tensor_desc_up, const UpperIndex& idx_up)
+        __device__ constexpr DynamicTransformedTensorCoordinate_v1(const UpperDesc& tensor_desc_up,
+                                                                   const UpperIndex& idx_up)
        : tensor_desc_up_{tensor_desc_up},
          idx_up_{idx_up},
          coord_low_{tensor_desc_up.GetLowerTensorDescriptor(),

--- a/composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
+++ b/composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
@@ -58,17 +58,10 @@ struct DynamicTensorDescriptor
    using Coordinate   = DynamicTensorCoordinate<ndim_hidden_, VisibleDimensionIds>;
    public:
-#if 1
-    __host__ __device__ explicit constexpr DynamicTensorDescriptor()
-        : DynamicTensorDescriptor(Transforms{}, index_t{0})
-    {
-    }
-#else
    __host__ __device__ constexpr DynamicTensorDescriptor() = default;
-#endif
-    __host__ __device__ explicit constexpr DynamicTensorDescriptor(const Transforms& transforms,
+    __host__ __device__ constexpr DynamicTensorDescriptor(const Transforms& transforms,
-                                                                   index_t element_space_size)
+                                                          index_t element_space_size)
        : transforms_{transforms},
          hidden_lengths_{InitializeHiddenLengths(transforms_, element_space_size)}
    {
@@ -170,7 +163,7 @@ struct DynamicTensorCoordinate
    using VisibleIndex = MultiIndex<ndim_visible_>;
    public:
-    __host__ __device__ explicit constexpr DynamicTensorCoordinate(const HiddenIndex& idx_hidden)
+    __host__ __device__ constexpr DynamicTensorCoordinate(const HiddenIndex& idx_hidden)
        : idx_hidden_{idx_hidden}
    {
    }
@@ -200,7 +193,7 @@ struct DynamicTensorCoordinateIterator
    using VisibleIndex = MultiIndex<NDimVisible>;
    public:
-    __host__ __device__ explicit constexpr DynamicTensorCoordinateIterator(
+    __host__ __device__ constexpr DynamicTensorCoordinateIterator(
        const VisibleIndex& idx_diff_visible, const MultiIndex<NTransform>& do_transforms)
        : idx_diff_visible_{idx_diff_visible}, do_transforms_{do_transforms}
    {
@@ -301,10 +294,10 @@ transform_dynamic_tensor_descriptor(const OldTensorDescriptor& old_tensor_desc,
    constexpr auto all_up_dim_hidden_idss =
        container_cat(OldTensorDescriptor::GetUpperDimensionIdss(), up_dim_hidden_idss);
-    return DynamicTensorDescriptor<decltype(all_transforms),
+    return DynamicTensorDescriptor<remove_cv_t<decltype(all_transforms)>,
-                                   decltype(all_low_dim_hidden_idss),
+                                   remove_cv_t<decltype(all_low_dim_hidden_idss)>,
-                                   decltype(all_up_dim_hidden_idss),
+                                   remove_cv_t<decltype(all_up_dim_hidden_idss)>,
-                                   decltype(new_visible_dim_hidden_ids)>{
+                                   remove_cv_t<decltype(new_visible_dim_hidden_ids)>>{
        all_transforms, old_tensor_desc.GetElementSpaceSize()};
 }

--- a/composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper.hpp
+++ b/composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper.hpp
@@ -28,11 +28,11 @@ make_dynamic_naive_tensor_descriptor(const MultiIndex<N>& lengths, const MultiIn
    static_for<0, N, 1>{}([&](auto i) { element_space_size += (lengths[i] - 1) * strides[i]; });
-    return DynamicTensorDescriptor<decltype(transforms),
+    return DynamicTensorDescriptor<remove_cv_t<decltype(transforms)>,
-                                   decltype(low_dim_hidden_idss),
+                                   remove_cv_t<decltype(low_dim_hidden_idss)>,
-                                   decltype(up_dim_hidden_idss),
+                                   remove_cv_t<decltype(up_dim_hidden_idss)>,
-                                   decltype(visible_dim_hidden_ids)>{transforms,
+                                   remove_cv_t<decltype(visible_dim_hidden_ids)>>{
-                                                                     element_space_size};
+        transforms, element_space_size};
 }
 template <index_t N>

--- a/composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v1.hpp
+++ b/composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v1.hpp
@@ -14,13 +14,13 @@ struct DynamicNativeTensorDescriptor_v1
    const Index lengths_;
    const Index strides_;
-    __host__ __device__ explicit constexpr DynamicNativeTensorDescriptor_v1(const Index& lengths,
+    __host__ __device__ constexpr DynamicNativeTensorDescriptor_v1(const Index& lengths,
-                                                                            const Index& strides)
+                                                                   const Index& strides)
        : lengths_{lengths}, strides_{strides}
    {
    }
-    __host__ __device__ explicit constexpr DynamicNativeTensorDescriptor_v1()
+    __host__ __device__ constexpr DynamicNativeTensorDescriptor_v1()
        : lengths_{make_zero_multi_index<NDim>()}, strides_{make_zero_multi_index<NDim>()}
    {
    }
@@ -140,8 +140,9 @@ struct DynamicTransformedTensorDescriptor_v1
        }
    };
-    __host__ __device__ explicit constexpr DynamicTransformedTensorDescriptor_v1(
+    __host__
-        const LowerDesc& low_tensor_desc, const Transforms& transforms)
+        __device__ constexpr DynamicTransformedTensorDescriptor_v1(const LowerDesc& low_tensor_desc,
+                                                                   const Transforms& transforms)
        : low_tensor_desc_{low_tensor_desc}, transforms_{transforms}
    {
        static_assert(NTransform == Transforms::Size() && NTransform == LowDimensionIds::Size() &&
@@ -181,7 +182,7 @@ struct DynamicTransformedTensorDescriptor_v1
        //   of lower-tensor-descriptor
    }
-    __host__ __device__ explicit constexpr DynamicTransformedTensorDescriptor_v1()
+    __host__ __device__ constexpr DynamicTransformedTensorDescriptor_v1()
        : low_tensor_desc_{}, transforms_{}
    {
    }

--- a/composable_kernel/include/tensor_description/multi_index_transform.hpp
+++ b/composable_kernel/include/tensor_description/multi_index_transform.hpp
@@ -54,7 +54,7 @@ struct Pad
    using LowerIndex = MultiIndex<nDim>;
    using UpperIndex = MultiIndex<nDim>;
-    __host__ __device__ explicit constexpr Pad()
+    __host__ __device__ constexpr Pad()
    {
        static_assert(LowerLengths::GetSize() == nDim && LeftPads::GetSize() == nDim &&
                          RightPads::GetSize() == nDim,
@@ -115,7 +115,7 @@ struct Slice
    using LowerIndex = MultiIndex<nDim>;
    using UpperIndex = MultiIndex<nDim>;
-    __host__ __device__ explicit constexpr Slice()
+    __host__ __device__ constexpr Slice()
    {
        static_assert(LowerLengths::GetSize() == nDim && SliceBegins::GetSize() == nDim &&
                          SliceEnds::GetSize() == nDim,
@@ -189,8 +189,8 @@ struct Merge
        index_t& itmp;
        LowerIndex& idx_low;
-        __host__ __device__ explicit constexpr lambda_CalculateLowerIndex(index_t& itmp_,
+        __host__ __device__ constexpr lambda_CalculateLowerIndex(index_t& itmp_,
-                                                                          LowerIndex& idx_low_)
+                                                                 LowerIndex& idx_low_)
            : itmp(itmp_), idx_low(idx_low_)
        {
        }
@@ -397,7 +397,7 @@ struct Embed
    using LowerIndex = MultiIndex<nDimLow>;
    using UpperIndex = MultiIndex<nDimUp>;
-    __host__ __device__ explicit constexpr Embed()
+    __host__ __device__ constexpr Embed()
    {
        static_assert(UpperLengths::GetSize() == nDimUp && Coefficients::GetSize() == nDimUp + 1,
                      "wrong! # of dimensions not consistent");
@@ -487,7 +487,7 @@ struct Freeze
    using LowerIndex = MultiIndex<nDimLow>;
    using UpperIndex = MultiIndex<nDimUp>;
-    __host__ __device__ explicit constexpr Freeze()
+    __host__ __device__ constexpr Freeze()
    {
        // TODO: sanity check: LowerFreezePoint should be within range of LowerLengths
    }

--- a/composable_kernel/include/tensor_operation/threadwise_dynamic_tensor_slice_transfer.hpp
+++ b/composable_kernel/include/tensor_operation/threadwise_dynamic_tensor_slice_transfer.hpp
@@ -327,11 +327,14 @@ struct ThreadwiseDynamicTensorSliceTransfer_v3
    static constexpr index_t nDim = SliceLengths::Size();
    using Index                   = MultiIndex<nDim>;
-    using SrcCoord = decltype(make_dynamic_tensor_coordinate(SrcDesc{}, Index{}));
+    using SrcDesc_ = remove_reference_t<remove_cv_t<SrcDesc>>;
-    using DstCoord = decltype(make_dynamic_tensor_coordinate(DstDesc{}, Index{}));
+    using DstDesc_ = remove_reference_t<remove_cv_t<DstDesc>>;
-    using SrcCoordIterator = decltype(make_dynamic_tensor_coordinate_iterator(SrcDesc{}, Index{}));
+    using SrcCoord = decltype(make_dynamic_tensor_coordinate(SrcDesc_{}, Index{}));
-    using DstCoordIterator = decltype(make_dynamic_tensor_coordinate_iterator(DstDesc{}, Index{}));
+    using DstCoord = decltype(make_dynamic_tensor_coordinate(DstDesc_{}, Index{}));
+    using SrcCoordIterator = decltype(make_dynamic_tensor_coordinate_iterator(SrcDesc_{}, Index{}));
+    using DstCoordIterator = decltype(make_dynamic_tensor_coordinate_iterator(DstDesc_{}, Index{}));
    __device__ constexpr ThreadwiseDynamicTensorSliceTransfer_v3(const SrcDesc& src_desc,
                                                                 const Index& src_slice_origin,

--- a/composable_kernel/include/utility/container_element_picker.hpp
+++ b/composable_kernel/include/utility/container_element_picker.hpp
@@ -18,7 +18,7 @@ struct ContainerElementPicker
    __host__ __device__ constexpr ContainerElementPicker() = delete;
-    __host__ __device__ explicit constexpr ContainerElementPicker(Arr& array) : mArray{array}
+    __host__ __device__ constexpr ContainerElementPicker(Arr& array) : mArray{array}
    {
        constexpr index_t imax = reduce_on_sequence(Picks{}, math::maxer<index_t>{}, Number<0>{});
@@ -83,8 +83,7 @@ struct ConstantContainerElementPicker
    __host__ __device__ constexpr ConstantContainerElementPicker() = delete;
-    __host__ __device__ explicit constexpr ConstantContainerElementPicker(const Arr& array)
+    __host__ __device__ constexpr ConstantContainerElementPicker(const Arr& array) : mArray{array}
-        : mArray{array}
    {
        constexpr index_t imax = reduce_on_sequence(Picks{}, math::maxer<index_t>{}, Number<0>{});

--- a/composable_kernel/include/utility/float_type.amd.hpp.in
+++ b/composable_kernel/include/utility/float_type.amd.hpp.in
@@ -184,7 +184,7 @@ struct vector_type<float, 1>
    float data_;
-    __host__ __device__ explicit constexpr vector_type() : data_{0} {}
+    __host__ __device__ constexpr vector_type() : data_{0} {}
    __host__ __device__ static constexpr index_t Size() { return 1; }
@@ -220,7 +220,7 @@ struct vector_type<float, 2>
        StaticallyIndexedArray<float, 2> scalars_;
    } data_;
-    __host__ __device__ explicit constexpr vector_type() : data_{MemoryType{0}} {}
+    __host__ __device__ constexpr vector_type() : data_{MemoryType{0}} {}
    __host__ __device__ static constexpr index_t Size() { return 2; }
@@ -256,7 +256,7 @@ struct vector_type<float, 4>
        StaticallyIndexedArray<float, 4> scalars_;
    } data_;
-    __host__ __device__ explicit constexpr vector_type() : data_{MemoryType{0}} {}
+    __host__ __device__ constexpr vector_type() : data_{MemoryType{0}} {}
    __host__ __device__ static constexpr index_t Size() { return 4; }

--- a/composable_kernel/include/utility/tuple.hpp
+++ b/composable_kernel/include/utility/tuple.hpp
@@ -12,27 +12,19 @@ namespace detail {
 template <index_t>
 struct TupleElementKey
 {
+    __host__ __device__ constexpr TupleElementKey() = default;
 };
 template <typename Key, typename Data>
 struct TupleElement
 {
-    __host__ __device__ explicit constexpr TupleElement() = default;
+    __host__ __device__ constexpr TupleElement() = default;
-    template <typename UData>
+    template <
-    __host__ __device__ explicit constexpr TupleElement(const TupleElement<Key, UData>& te)
+        typename T,
-        : mData(static_cast<const UData&>(te.mData))
+        typename std::enable_if<!is_same<remove_reference_t<remove_cv_t<T>>, TupleElement>::value,
-    {
+                                bool>::type = false>
-    }
+    __host__ __device__ constexpr TupleElement(T&& v) : mData(std::forward<T>(v))
-    template <typename UData>
-    __host__ __device__ explicit constexpr TupleElement(TupleElement<Key, UData>&& te)
-        : mData(static_cast<UData&&>(te.mData))
-    {
-    }
-    template <typename T>
-    __host__ __device__ explicit constexpr TupleElement(T&& v) : mData(std::forward<T>(v))
    {
    }
@@ -64,36 +56,25 @@ struct TupleImpl;
 template <index_t... Is, typename... Xs>
 struct TupleImpl<Sequence<Is...>, Xs...> : TupleElement<TupleElementKey<Is>, Xs>...
 {
-#if 1
+    __host__ __device__ constexpr TupleImpl() = default;
-    __host__ __device__ explicit constexpr TupleImpl() = default;
+    template <
-    template <typename... Ys, typename std::enable_if<sizeof...(Ys) >= 1, bool>::type = false>
+        typename Y,
-    __host__ __device__ explicit constexpr TupleImpl(Ys&&... ys)
+        typename std::enable_if<sizeof...(Is) == 1 && sizeof...(Xs) == 1 &&
-        : TupleElement<TupleElementKey<Is>, Xs>(std::forward<Ys>(ys))...
+                                    !is_same<remove_reference_t<remove_cv_t<Y>>, TupleImpl>::value,
-    {
+                                bool>::type = false>
-        static_assert(sizeof...(Is) == sizeof...(Xs) && sizeof...(Is) == sizeof...(Ys),
+    __host__ __device__ constexpr TupleImpl(Y&& y)
-                      "wrong! inconsistent size");
-    }
-#else
-    __host__ __device__ explicit constexpr TupleImpl() = default;
-    template <typename Y,
-              typename std::enable_if<sizeof...(Is) == 1 && sizeof...(Xs) == 1 &&
-                                          !is_same<remove_cv_t<Y>, TupleImpl>::value,
-                                      bool>::type = false>
-    __host__ __device__ explicit constexpr TupleImpl(Y&& y)
        : TupleElement<TupleElementKey<Is>, Xs>(std::forward<Y>(y))...
    {
    }
    template <typename... Ys, typename std::enable_if<sizeof...(Ys) >= 2, bool>::type = false>
-    __host__ __device__ explicit constexpr TupleImpl(Ys&&... ys)
+    __host__ __device__ constexpr TupleImpl(Ys&&... ys)
        : TupleElement<TupleElementKey<Is>, Xs>(std::forward<Ys>(ys))...
    {
        static_assert(sizeof...(Is) == sizeof...(Xs) && sizeof...(Is) == sizeof...(Ys),
                      "wrong! inconsistent size");
    }
-#endif
    __host__ __device__ static constexpr index_t Size() { return sizeof...(Xs); }
@@ -121,16 +102,17 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
    __host__ __device__ constexpr Tuple() = default;
    template <typename Y,
-              typename std::enable_if<sizeof...(Xs) == 1 && !is_same<remove_cv_t<Y>, Tuple>::value,
+              typename std::enable_if<
-                                      bool>::type = false>
+                  sizeof...(Xs) == 1 && !is_same<remove_reference_t<remove_cv_t<Y>>, Tuple>::value,
-    __host__ __device__ explicit constexpr Tuple(Y&& y) : base(std::forward<Y>(y))
+                  bool>::type = false>
+    __host__ __device__ constexpr Tuple(Y&& y) : base(std::forward<Y>(y))
    {
    }
    template <typename... Ys,
              typename std::enable_if<sizeof...(Ys) == sizeof...(Xs) && sizeof...(Ys) >= 2,
                                      bool>::type = false>
-    __host__ __device__ explicit constexpr Tuple(Ys&&... ys) : base(std::forward<Ys>(ys)...)
+    __host__ __device__ constexpr Tuple(Ys&&... ys) : base(std::forward<Ys>(ys)...)
    {
    }