Commit e00de11e authored by Chao Liu's avatar Chao Liu
Browse files

changing class to POD

parent f16356d4
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
namespace ck { namespace ck {
#if 1
// GemmM = K // GemmM = K
// GemmN = N * Ho * Wo // GemmN = N * Ho * Wo
// GemmK = C * Y * X // GemmK = C * Y * X
...@@ -903,6 +904,73 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1 ...@@ -903,6 +904,73 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
} }
} }
}; };
#else
template <index_t BlockSize,
typename Float,
typename AccFloat,
index_t GemmMPerBlock,
index_t GemmNPerBlock,
index_t GemmKPerBlock,
index_t GemmMPerThread,
index_t GemmNPerThread,
index_t GemmKPerThread,
index_t GemmMLevel0Cluster,
index_t GemmNLevel0Cluster,
index_t GemmMLevel1Cluster,
index_t GemmNLevel1Cluster,
typename GemmABlockTransferThreadSliceLengths_GemmK_GemmM,
typename GemmABlockTransferThreadClusterLengths_GemmK_GemmM,
index_t GemmABlockTransferSrcScalarPerVector_GemmK,
index_t GemmABlockTransferDstScalarPerVector_GemmM,
typename GemmBBlockTransferThreadSliceLengths_GemmK_GemmN,
typename GemmBBlockTransferThreadClusterLengths_GemmK_GemmN,
index_t GemmBBlockTransferSrcScalarPerVector_GemmN,
index_t GemmBBlockTransferDstScalarPerVector_GemmN,
index_t GemmCThreadTransferDstScalarPerVector_GemmN1>
struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_pad
{
template <typename... Wei, typename... In, typename... Out>
__host__ void Run(const DynamicTensorDescriptor<Wei...>& wei_k_c_y_x_global_desc,
const DynamicTensorDescriptor<In...>& in_n_c_hi_wi_global_desc,
const DynamicTensorDescriptor<Out...>& out_n_k_ho_wo_global_desc,
const MultiIndex<2> conv_strides,
const MultiIndex<2> conv_dilations,
const MultiIndex<2> in_left_pads,
const MultiIndex<2> in_right_pads,
const Float* __restrict__ p_wei_global,
const Float* __restrict__ p_in_global,
Float* __restrict__ p_out_global) const
{
constexpr auto pass = DynamicPassThrough();
constexpr auto pad = DynamicLeftPad<false>();
constexpr auto freeze = DynamicFreeze();
constexpr auto desc = ck::DynamicTensorDescriptor<ck::Tuple<ck::DynamicUnMerge<4, false>>,
ck::Tuple<ck::Sequence<0>>,
ck::Tuple<ck::Sequence<1, 2, 3, 4>>,
ck::Sequence<1, 2, 3, 4>>();
static_assert(std::is_trivial<Sequence<1>>::value, "wrong");
static_assert(std::is_trivial<detail::TupleElementKey<0>>::value, "wrong");
static_assert(
std::is_trivial<detail::TupleElement<detail::TupleElementKey<0>, index_t>>::value,
"wrong");
static_assert(std::is_trivial<detail::TupleImpl<Sequence<0>, index_t>>::value, "wrong");
static_assert(std::is_trivial<Tuple<index_t>>::value, "wrong");
static_assert(std::is_trivial<MultiIndex<2>>::value, "wrong");
static_assert(std::is_trivial<MultiIndex<1>>::value, "wrong");
static_assert(std::is_trivial<DynamicPassThrough>::value, "wrong");
static_assert(std::is_trivial<DynamicUnMerge<2>>::value, "wrong");
static_assert(std::is_trivial<DynamicFreeze>::value, "wrong");
static_assert(std::is_trivial<remove_cv_t<decltype(desc)>>::value, "wrong");
static_assert(std::is_trivial<remove_reference_t<remove_cv_t<decltype(conv_strides)>>>::value, "wrong");
static_assert(
std::is_trivial<
remove_reference_t<remove_cv_t<decltype(wei_k_c_y_x_global_desc)>>>::value,
"wrong");
}
};
#endif
} // namespace ck } // namespace ck
#endif #endif
...@@ -11,25 +11,15 @@ struct DynamicPassThrough ...@@ -11,25 +11,15 @@ struct DynamicPassThrough
using LowerIndex = MultiIndex<1>; using LowerIndex = MultiIndex<1>;
using UpperIndex = MultiIndex<1>; using UpperIndex = MultiIndex<1>;
const UpperIndex up_lengths_; UpperIndex up_lengths_;
__host__ __device__ constexpr DynamicPassThrough(const DynamicPassThrough& other) __host__ __device__ constexpr DynamicPassThrough() = default;
: up_lengths_{other.up_lengths_}
{
}
__host__ __device__ constexpr DynamicPassThrough(DynamicPassThrough&& other)
: up_lengths_{other.up_lengths_}
{
}
__host__ __device__ constexpr DynamicPassThrough(const index_t& low_length) __host__ __device__ constexpr DynamicPassThrough(const index_t& low_length)
: up_lengths_{make_multi_index(low_length)} : up_lengths_{make_multi_index(low_length)}
{ {
} }
__host__ __device__ constexpr DynamicPassThrough() : up_lengths_{0} {}
__host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; } __host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; }
__host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; } __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; }
...@@ -88,19 +78,11 @@ struct DynamicPad ...@@ -88,19 +78,11 @@ struct DynamicPad
using LowerIndex = MultiIndex<1>; using LowerIndex = MultiIndex<1>;
using UpperIndex = MultiIndex<1>; using UpperIndex = MultiIndex<1>;
const UpperIndex up_lengths_; UpperIndex up_lengths_;
const index_t left_pad_; index_t left_pad_;
const index_t right_pad_; index_t right_pad_;
__host__ __device__ constexpr DynamicPad(const DynamicPad& other) __host__ __device__ constexpr DynamicPad() = default;
: up_lengths_{other.up_lengths_}, left_pad_{other.left_pad_}, right_pad_{other.right_pad_}
{
}
__host__ __device__ constexpr DynamicPad(DynamicPad&& other)
: up_lengths_{other.up_lengths_}, left_pad_{other.left_pad_}, right_pad_{other.right_pad_}
{
}
__host__ __device__ constexpr DynamicPad(const index_t& low_length, __host__ __device__ constexpr DynamicPad(const index_t& low_length,
const index_t& left_pad, const index_t& left_pad,
...@@ -111,8 +93,6 @@ struct DynamicPad ...@@ -111,8 +93,6 @@ struct DynamicPad
{ {
} }
__host__ __device__ constexpr DynamicPad() : up_lengths_{0}, left_pad_{0}, right_pad_{0} {}
__host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; } __host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; }
__host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; } __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; }
...@@ -173,26 +153,16 @@ struct DynamicLeftPad ...@@ -173,26 +153,16 @@ struct DynamicLeftPad
using LowerIndex = MultiIndex<1>; using LowerIndex = MultiIndex<1>;
using UpperIndex = MultiIndex<1>; using UpperIndex = MultiIndex<1>;
const UpperIndex up_lengths_; UpperIndex up_lengths_;
const index_t left_pad_; index_t left_pad_;
__host__ __device__ constexpr DynamicLeftPad(const DynamicLeftPad& other)
: up_lengths_{other.up_lengths_}, left_pad_{other.left_pad_}
{
}
__host__ __device__ constexpr DynamicLeftPad(DynamicLeftPad&& other) __host__ __device__ constexpr DynamicLeftPad() = default;
: up_lengths_{other.up_lengths_}, left_pad_{other.left_pad_}
{
}
__host__ __device__ constexpr DynamicLeftPad(const index_t& low_length, const index_t& left_pad) __host__ __device__ constexpr DynamicLeftPad(const index_t& low_length, const index_t& left_pad)
: up_lengths_{make_multi_index(low_length + left_pad)}, left_pad_{left_pad} : up_lengths_{make_multi_index(low_length + left_pad)}, left_pad_{left_pad}
{ {
} }
__host__ __device__ constexpr DynamicLeftPad() : up_lengths_{0}, left_pad_{0} {}
__host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; } __host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; }
__host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; } __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; }
...@@ -252,23 +222,11 @@ struct DynamicRightPad ...@@ -252,23 +222,11 @@ struct DynamicRightPad
using LowerIndex = MultiIndex<1>; using LowerIndex = MultiIndex<1>;
using UpperIndex = MultiIndex<1>; using UpperIndex = MultiIndex<1>;
const UpperIndex up_lengths_; UpperIndex up_lengths_;
const index_t low_length_; index_t low_length_;
const index_t right_pad_; index_t right_pad_;
__host__ __device__ constexpr DynamicRightPad(const DynamicRightPad& other) __host__ __device__ constexpr DynamicRightPad() = default;
: up_lengths_{other.up_lengths_},
low_length_{other.low_length_},
right_pad_{other.right_pad_}
{
}
__host__ __device__ constexpr DynamicRightPad(DynamicRightPad&& other)
: up_lengths_{other.up_lengths_},
low_length_{other.low_length_},
right_pad_{other.right_pad_}
{
}
__host__ __device__ constexpr DynamicRightPad(const index_t& low_length, __host__ __device__ constexpr DynamicRightPad(const index_t& low_length,
const index_t& right_pad) const index_t& right_pad)
...@@ -278,10 +236,6 @@ struct DynamicRightPad ...@@ -278,10 +236,6 @@ struct DynamicRightPad
{ {
} }
__host__ __device__ constexpr DynamicRightPad() : up_lengths_{0}, low_length_{0}, right_pad_{0}
{
}
__host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; } __host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; }
__host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; } __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; }
...@@ -342,18 +296,10 @@ struct DynamicEmbed ...@@ -342,18 +296,10 @@ struct DynamicEmbed
using LowerIndex = MultiIndex<1>; using LowerIndex = MultiIndex<1>;
using UpperIndex = MultiIndex<NDimUp>; using UpperIndex = MultiIndex<NDimUp>;
const UpperIndex up_lengths_; UpperIndex up_lengths_;
const UpperIndex coefficients_; UpperIndex coefficients_;
__host__ __device__ constexpr DynamicEmbed(const DynamicEmbed& other) __host__ __device__ constexpr DynamicEmbed() = default;
: up_lengths_{other.up_lengths_}, coefficients_{other.coefficients_}
{
}
__host__ __device__ constexpr DynamicEmbed(DynamicEmbed&& other)
: up_lengths_{other.up_lengths_}, coefficients_{other.coefficients_}
{
}
__host__ __device__ constexpr DynamicEmbed(const UpperIndex& up_lengths, __host__ __device__ constexpr DynamicEmbed(const UpperIndex& up_lengths,
const UpperIndex& coefficients) const UpperIndex& coefficients)
...@@ -362,19 +308,6 @@ struct DynamicEmbed ...@@ -362,19 +308,6 @@ struct DynamicEmbed
static_assert(UpperIndex::Size() == NDimUp, "wrong! # of dimensions not consistent"); static_assert(UpperIndex::Size() == NDimUp, "wrong! # of dimensions not consistent");
} }
template <typename UpperLengths, typename Coefficients>
__host__ __device__ constexpr DynamicEmbed(const UpperLengths& up_lengths,
const Coefficients& coefficients)
: up_lengths_{up_lengths}, coefficients_{coefficients}
{
}
__host__ __device__ constexpr DynamicEmbed()
: up_lengths_{make_zero_multi_index<NDimUp>()},
coefficients_{make_zero_multi_index<NDimUp>()}
{
}
__host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; } __host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; }
__host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return NDimUp; } __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return NDimUp; }
...@@ -439,23 +372,11 @@ struct DynamicMerge ...@@ -439,23 +372,11 @@ struct DynamicMerge
using LowerIndex = MultiIndex<NDimLow>; using LowerIndex = MultiIndex<NDimLow>;
using UpperIndex = MultiIndex<1>; using UpperIndex = MultiIndex<1>;
const LowerIndex low_lengths_; LowerIndex low_lengths_;
const LowerIndex low_lengths_scan_; LowerIndex low_lengths_scan_;
const UpperIndex up_lengths_; UpperIndex up_lengths_;
__host__ __device__ constexpr DynamicMerge(const DynamicMerge& other)
: low_lengths_{other.low_lengths_},
low_lengths_scan_{other.low_lengths_scan_},
up_lengths_{other.up_lengths_}
{
}
__host__ __device__ constexpr DynamicMerge(DynamicMerge&& other) __host__ __device__ constexpr DynamicMerge() = default;
: low_lengths_{other.low_lengths_},
low_lengths_scan_{other.low_lengths_scan_},
up_lengths_{other.up_lengths_}
{
}
__host__ __device__ constexpr DynamicMerge(const LowerIndex& low_lengths) __host__ __device__ constexpr DynamicMerge(const LowerIndex& low_lengths)
: low_lengths_{low_lengths}, : low_lengths_{low_lengths},
...@@ -467,13 +388,6 @@ struct DynamicMerge ...@@ -467,13 +388,6 @@ struct DynamicMerge
static_assert(LowerIndex::Size() == NDimLow, "wrong!"); static_assert(LowerIndex::Size() == NDimLow, "wrong!");
} }
__host__ __device__ constexpr DynamicMerge()
: low_lengths_{make_zero_multi_index<NDimLow>()},
low_lengths_scan_{make_zero_multi_index<NDimLow>()},
up_lengths_{0}
{
}
__host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return NDimLow; } __host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return NDimLow; }
__host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; } __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; }
...@@ -953,8 +867,10 @@ struct DynamicUnMerge ...@@ -953,8 +867,10 @@ struct DynamicUnMerge
using LowerIndex = MultiIndex<1>; using LowerIndex = MultiIndex<1>;
using UpperIndex = MultiIndex<NDimUp>; using UpperIndex = MultiIndex<NDimUp>;
const UpperIndex up_lengths_; UpperIndex up_lengths_;
const UpperIndex up_lengths_scan_; UpperIndex up_lengths_scan_;
__host__ __device__ constexpr DynamicUnMerge() = default;
__host__ __device__ constexpr DynamicUnMerge(const UpperIndex& up_lengths) __host__ __device__ constexpr DynamicUnMerge(const UpperIndex& up_lengths)
: up_lengths_{up_lengths}, : up_lengths_{up_lengths},
...@@ -963,12 +879,6 @@ struct DynamicUnMerge ...@@ -963,12 +879,6 @@ struct DynamicUnMerge
{ {
} }
__host__ __device__ constexpr DynamicUnMerge()
: up_lengths_{make_zero_multi_index<NDimUp>()},
up_lengths_scan_{make_zero_multi_index<NDimUp>()}
{
}
__host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; } __host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; }
__host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return NDimUp; } __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return NDimUp; }
...@@ -1035,11 +945,11 @@ struct DynamicFreeze ...@@ -1035,11 +945,11 @@ struct DynamicFreeze
using LowerIndex = MultiIndex<1>; using LowerIndex = MultiIndex<1>;
using UpperIndex = MultiIndex<0>; using UpperIndex = MultiIndex<0>;
const index_t low_idx_; LowerIndex low_idx_;
__host__ __device__ constexpr DynamicFreeze(const index_t& low_idx) : low_idx_{low_idx} {} __host__ __device__ constexpr DynamicFreeze() = default;
__host__ __device__ constexpr DynamicFreeze() : low_idx_{0} {} __host__ __device__ constexpr DynamicFreeze(const index_t& low_idx) : low_idx_{low_idx} {}
__host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; } __host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; }
...@@ -1054,7 +964,7 @@ struct DynamicFreeze ...@@ -1054,7 +964,7 @@ struct DynamicFreeze
static_assert(LowIdx::Size() == 1 && UpIdx::Size() == 1, static_assert(LowIdx::Size() == 1 && UpIdx::Size() == 1,
"wrong! inconsistent # of dimension"); "wrong! inconsistent # of dimension");
idx_low(Number<0>{}) = low_idx_; idx_low = low_idx_;
} }
template <typename LowIdxDiff, template <typename LowIdxDiff,
......
...@@ -58,6 +58,15 @@ struct DynamicTensorDescriptor ...@@ -58,6 +58,15 @@ struct DynamicTensorDescriptor
using Coordinate = DynamicTensorCoordinate<ndim_hidden_, VisibleDimensionIds>; using Coordinate = DynamicTensorCoordinate<ndim_hidden_, VisibleDimensionIds>;
public: public:
#if 1
__host__ __device__ explicit constexpr DynamicTensorDescriptor()
: DynamicTensorDescriptor(Transforms{}, index_t{0})
{
}
#else
__host__ __device__ constexpr DynamicTensorDescriptor() = default;
#endif
__host__ __device__ explicit constexpr DynamicTensorDescriptor(const Transforms& transforms, __host__ __device__ explicit constexpr DynamicTensorDescriptor(const Transforms& transforms,
index_t element_space_size) index_t element_space_size)
: transforms_{transforms}, : transforms_{transforms},
...@@ -71,11 +80,6 @@ struct DynamicTensorDescriptor ...@@ -71,11 +80,6 @@ struct DynamicTensorDescriptor
// TODO check dependency of dimensions is valid // TODO check dependency of dimensions is valid
} }
__host__ __device__ explicit constexpr DynamicTensorDescriptor()
: DynamicTensorDescriptor(Transforms{}, index_t{0})
{
}
__host__ __device__ static constexpr index_t GetNumOfDimension() __host__ __device__ static constexpr index_t GetNumOfDimension()
{ {
return GetNumOfVisibleDimension(); return GetNumOfVisibleDimension();
...@@ -150,10 +154,10 @@ struct DynamicTensorDescriptor ...@@ -150,10 +154,10 @@ struct DynamicTensorDescriptor
} }
// TODO make these private // TODO make these private
const Transforms transforms_; Transforms transforms_;
// TODO maybe hidden_lengths_ should use reference_wrapper (reference to transforms_'s member // TODO maybe hidden_lengths_ should use reference_wrapper (reference to transforms_'s member
// variable lengths_) to save space on stack? // variable lengths_) to save space on stack?
const HiddenIndex hidden_lengths_; HiddenIndex hidden_lengths_;
}; };
template <index_t NDimHidden, typename VisibleDimensionIds> template <index_t NDimHidden, typename VisibleDimensionIds>
......
...@@ -49,11 +49,11 @@ make_dynamic_naive_tensor_descriptor_packed(const MultiIndex<N>& lengths) ...@@ -49,11 +49,11 @@ make_dynamic_naive_tensor_descriptor_packed(const MultiIndex<N>& lengths)
const index_t element_space_size = const index_t element_space_size =
container_reduce(lengths, math::multiplies<index_t>{}, index_t{1}); container_reduce(lengths, math::multiplies<index_t>{}, index_t{1});
return DynamicTensorDescriptor<decltype(transforms), return DynamicTensorDescriptor<remove_cv_t<decltype(transforms)>,
decltype(low_dim_hidden_idss), remove_cv_t<decltype(low_dim_hidden_idss)>,
decltype(up_dim_hidden_idss), remove_cv_t<decltype(up_dim_hidden_idss)>,
decltype(visible_dim_hidden_ids)>{transforms, remove_cv_t<decltype(visible_dim_hidden_ids)>>{
element_space_size}; transforms, element_space_size};
} }
template <index_t N> template <index_t N>
......
...@@ -17,13 +17,7 @@ struct TupleElementKey ...@@ -17,13 +17,7 @@ struct TupleElementKey
template <typename Key, typename Data> template <typename Key, typename Data>
struct TupleElement struct TupleElement
{ {
__host__ __device__ explicit constexpr TupleElement() : mData() {} __host__ __device__ explicit constexpr TupleElement() = default;
#if 0
__host__ __device__ explicit constexpr TupleElement(const TupleElement&) = default;
__host__ __device__ explicit constexpr TupleElement(TupleElement&&) = default;
#endif
template <typename UData> template <typename UData>
__host__ __device__ explicit constexpr TupleElement(const TupleElement<Key, UData>& te) __host__ __device__ explicit constexpr TupleElement(const TupleElement<Key, UData>& te)
...@@ -70,38 +64,36 @@ struct TupleImpl; ...@@ -70,38 +64,36 @@ struct TupleImpl;
template <index_t... Is, typename... Xs> template <index_t... Is, typename... Xs>
struct TupleImpl<Sequence<Is...>, Xs...> : TupleElement<TupleElementKey<Is>, Xs>... struct TupleImpl<Sequence<Is...>, Xs...> : TupleElement<TupleElementKey<Is>, Xs>...
{ {
__host__ __device__ explicit constexpr TupleImpl() : TupleElement<TupleElementKey<Is>, Xs>()... #if 1
{ __host__ __device__ explicit constexpr TupleImpl() = default;
static_assert(sizeof...(Is) == sizeof...(Xs), "wrong! inconsistent size");
}
#if 0
__host__ __device__ explicit constexpr TupleImpl(const TupleImpl&) = default;
__host__ __device__ explicit constexpr TupleImpl(TupleImpl&&) = default; template <typename... Ys, typename std::enable_if<sizeof...(Ys) >= 1, bool>::type = false>
#endif __host__ __device__ explicit constexpr TupleImpl(Ys&&... ys)
: TupleElement<TupleElementKey<Is>, Xs>(std::forward<Ys>(ys))...
template <index_t... Js, typename... Ys>
__host__ __device__ explicit constexpr TupleImpl(const TupleImpl<Sequence<Js...>, Ys...>& y)
: TupleElement<TupleElementKey<Is>, Xs>(
static_cast<const TupleElement<TupleElementKey<Js>, Ys>&>(y))...
{ {
static_assert(sizeof...(Is) == sizeof...(Xs) && sizeof...(Is) == sizeof...(Ys),
"wrong! inconsistent size");
} }
#else
__host__ __device__ explicit constexpr TupleImpl() = default;
template <index_t... Js, typename... Ys> template <typename Y,
__host__ __device__ explicit constexpr TupleImpl(TupleImpl<Sequence<Js...>, Ys...>&& y) typename std::enable_if<sizeof...(Is) == 1 && sizeof...(Xs) == 1 &&
: TupleElement<TupleElementKey<Is>, Xs>( !is_same<remove_cv_t<Y>, TupleImpl>::value,
static_cast<TupleElement<TupleElementKey<Js>, Ys>&&>(y))... bool>::type = false>
__host__ __device__ explicit constexpr TupleImpl(Y&& y)
: TupleElement<TupleElementKey<Is>, Xs>(std::forward<Y>(y))...
{ {
} }
template <typename... Ys, typename std::enable_if<sizeof...(Ys) >= 1, bool>::type = false> template <typename... Ys, typename std::enable_if<sizeof...(Ys) >= 2, bool>::type = false>
__host__ __device__ explicit constexpr TupleImpl(Ys&&... ys) __host__ __device__ explicit constexpr TupleImpl(Ys&&... ys)
: TupleElement<TupleElementKey<Is>, Xs>(std::forward<Ys>(ys))... : TupleElement<TupleElementKey<Is>, Xs>(std::forward<Ys>(ys))...
{ {
static_assert(sizeof...(Is) == sizeof...(Xs) && sizeof...(Is) == sizeof...(Ys), static_assert(sizeof...(Is) == sizeof...(Xs) && sizeof...(Is) == sizeof...(Ys),
"wrong! inconsistent size"); "wrong! inconsistent size");
} }
#endif
__host__ __device__ static constexpr index_t Size() { return sizeof...(Xs); } __host__ __device__ static constexpr index_t Size() { return sizeof...(Xs); }
...@@ -126,34 +118,17 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X ...@@ -126,34 +118,17 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
using base = using base =
detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(Xs), 1>::type, Xs...>; detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(Xs), 1>::type, Xs...>;
__host__ __device__ explicit constexpr Tuple() : base() {} __host__ __device__ constexpr Tuple() = default;
#if 0
__host__ __device__ constexpr Tuple(const Tuple&) = default;
__host__ __device__ constexpr Tuple(Tuple&&) = default;
#endif
template <typename... Ys,
typename std::enable_if<sizeof...(Ys) == sizeof...(Xs), bool>::type = false>
__host__ __device__ explicit constexpr Tuple(const Tuple<Ys...>& y)
: base(static_cast<
const detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(Ys), 1>::type,
Ys...>&>(y))
{
}
template <typename... Ys, template <typename Y,
typename std::enable_if<sizeof...(Ys) == sizeof...(Xs), bool>::type = false> typename std::enable_if<sizeof...(Xs) == 1 && !is_same<remove_cv_t<Y>, Tuple>::value,
__host__ __device__ explicit constexpr Tuple(Tuple<Ys...>&& y) bool>::type = false>
: base(static_cast< __host__ __device__ explicit constexpr Tuple(Y&& y) : base(std::forward<Y>(y))
detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(Ys), 1>::type,
Ys...>&&>(y))
{ {
} }
template <typename... Ys, template <typename... Ys,
typename std::enable_if<sizeof...(Ys) == sizeof...(Xs) && sizeof...(Ys) >= 1, typename std::enable_if<sizeof...(Ys) == sizeof...(Xs) && sizeof...(Ys) >= 2,
bool>::type = false> bool>::type = false>
__host__ __device__ explicit constexpr Tuple(Ys&&... ys) : base(std::forward<Ys>(ys)...) __host__ __device__ explicit constexpr Tuple(Ys&&... ys) : base(std::forward<Ys>(ys)...)
{ {
......
...@@ -11,12 +11,12 @@ ...@@ -11,12 +11,12 @@
#include "conv_common.hpp" #include "conv_common.hpp"
#include "host_conv.hpp" #include "host_conv.hpp"
#include "device_tensor.hpp" #include "device_tensor.hpp"
#include "device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp" //#include "device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp" //#include "device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp" #include "device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
#include "device_dummy_static_transform.hpp" //#include "device_dummy_static_transform.hpp"
#include "device_dummy_dynamic_transform_v1.hpp" //#include "device_dummy_dynamic_transform_v1.hpp"
#include "device_dummy_dynamic_transform.hpp" //#include "device_dummy_dynamic_transform.hpp"
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
......
...@@ -10,7 +10,7 @@ cmake ...@@ -10,7 +10,7 @@ cmake
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \ -D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
-D CMAKE_BUILD_TYPE=Release \ -D CMAKE_BUILD_TYPE=Release \
-D DEVICE_BACKEND="AMD" \ -D DEVICE_BACKEND="AMD" \
-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -gline-tables-only -save-temps=$CWD" \ -D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -gline-tables-only -save-temps=$CWD -ftemplate-backtrace-limit=0" \
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \ -D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
-D CMAKE_PREFIX_PATH="/opt/rocm" \ -D CMAKE_PREFIX_PATH="/opt/rocm" \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \ -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment