Commit 4f4aba48 authored by Chao Liu's avatar Chao Liu
Browse files

adding GetLinearDimensionMask()

parent 545d9305
...@@ -440,9 +440,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buf ...@@ -440,9 +440,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buf
0, 0,
b_thread_data_on_global, b_thread_data_on_global,
0}) 0})
#if 0 #if 1
.Run_generic
#elif 1
.template Run_generic<Float, address_space_t::generic, address_space_t::global> .template Run_generic<Float, address_space_t::generic, address_space_t::global>
#elif 1 #elif 1
.template Run_optimized_dst_address_calculation<Float, address_space_t::global> .template Run_optimized_dst_address_calculation<Float, address_space_t::global>
......
...@@ -45,6 +45,7 @@ struct NativeTensorCoordinate ...@@ -45,6 +45,7 @@ struct NativeTensorCoordinate
__host__ __device__ constexpr type operator+=(const Index& idx_diff) __host__ __device__ constexpr type operator+=(const Index& idx_diff)
{ {
// mIndex is updated here, but some (or all) of its entries may never be used // mIndex is updated here, but some (or all) of its entries may never be used
// compiler should remove those entries as dead code
mIndex += idx_diff; mIndex += idx_diff;
mOffset += tensor_desc_type::CalculateOffsetDiff(idx_diff); mOffset += tensor_desc_type::CalculateOffsetDiff(idx_diff);
...@@ -55,6 +56,7 @@ struct NativeTensorCoordinate ...@@ -55,6 +56,7 @@ struct NativeTensorCoordinate
__host__ __device__ constexpr type operator-=(const Index& idx_diff) __host__ __device__ constexpr type operator-=(const Index& idx_diff)
{ {
// mIndex is updated here, but some (or all) of its entries may never be used // mIndex is updated here, but some (or all) of its entries may never be used
// compiler should remove those entries as dead code
mIndex -= idx_diff; mIndex -= idx_diff;
mOffset -= tensor_desc_type::CalculateOffsetDiff(idx_diff); mOffset -= tensor_desc_type::CalculateOffsetDiff(idx_diff);
...@@ -136,6 +138,7 @@ struct TransformedTensorCoordinate ...@@ -136,6 +138,7 @@ struct TransformedTensorCoordinate
idx_up_diff, GetIndex(), GetLowerCoordinate().GetIndex()); idx_up_diff, GetIndex(), GetLowerCoordinate().GetIndex());
// mIndexUp is updated here, but some (or all) of its entries may never be used // mIndexUp is updated here, but some (or all) of its entries may never be used
// compiler should remove those entries as dead code
mIndexUp += idx_up_diff; mIndexUp += idx_up_diff;
return *this; return *this;
...@@ -146,6 +149,8 @@ struct TransformedTensorCoordinate ...@@ -146,6 +149,8 @@ struct TransformedTensorCoordinate
mCoordLow -= tensor_desc_type::CalculateLowerIndexDiff( mCoordLow -= tensor_desc_type::CalculateLowerIndexDiff(
idx_up_diff, GetIndex(), GetLowerCoordinate().GetIndex()); idx_up_diff, GetIndex(), GetLowerCoordinate().GetIndex());
// mIndex is updated here, but some (or all) of its entries may never be used
// compiler should remove those entries as dead code
mIndexUp -= idx_up_diff; mIndexUp -= idx_up_diff;
return *this; return *this;
......
...@@ -101,12 +101,12 @@ struct NativeTensorDescriptor ...@@ -101,12 +101,12 @@ struct NativeTensorDescriptor
return true; return true;
} }
__host__ __device__ static constexpr auto GetMaskOfLinearDimensions() __host__ __device__ static constexpr auto GetLinearDimensionMask()
{ {
return typename uniform_sequence_gen<nDim, 1>::type{}; return typename uniform_sequence_gen<nDim, 1>::type{};
} }
__host__ __device__ static constexpr auto GetMaskOfNonLinearDimensions() __host__ __device__ static constexpr auto GetNonLinearDimensionMask()
{ {
return typename uniform_sequence_gen<nDim, 0>::type{}; return typename uniform_sequence_gen<nDim, 0>::type{};
} }
...@@ -353,18 +353,27 @@ struct TransformedTensorDescriptor ...@@ -353,18 +353,27 @@ struct TransformedTensorDescriptor
return GetLowerTensorDescriptor().CalculateOffset(CalculateLowerIndex(idx_up)); return GetLowerTensorDescriptor().CalculateOffset(CalculateLowerIndex(idx_up));
} }
#if 0 #if 1
struct lambda_sequence_logic_or struct lambda_sequence_logical_and
{ {
template <typename... Seqs> template <typename... Seqs>
__host__ __device__ constexpr auto operator()(Seqs... seqs) const __host__ __device__ constexpr auto operator()(Seqs...) const
{ {
// TODO: should use math::logic_or<bool>, after Sequence can take bool return typename sequence_reduce<logical_and<index_t>, Seqs...>::type{};
return typename sequence_reduce<math::logic_or<bool>, Seqs...>::type{};
} }
}; };
struct lambda_1 template <typename T>
struct lambda_is_true
{
__host__ __device__ constexpr auto operator()(const T& x) const
{
// TODO: remove static_cast once Sequence can take bool as entries
return static_cast<bool>(x) == true;
}
};
struct lambda_get_linear_dimension_mask_of_single_tranform
{ {
// check only one transform at a time // check only one transform at a time
template <typename Transform, typename LowDimensionId, typename UpDimensionId> template <typename Transform, typename LowDimensionId, typename UpDimensionId>
...@@ -372,55 +381,56 @@ struct TransformedTensorDescriptor ...@@ -372,55 +381,56 @@ struct TransformedTensorDescriptor
operator()(const Transform& tran, LowDimensionId, UpDimensionId) const operator()(const Transform& tran, LowDimensionId, UpDimensionId) const
{ {
// judge if transformation is linear // judge if transformation is linear
constexpr bool is_linear_transform = tran.IsLinearTransform(); constexpr bool is_linear_transform = Transform::IsLinearTransform();
// judge if all lower dimension are linear // judge if all lower dimension are linear
constexpr bool is_all_low_dim_linear = math::reduce_on_sequence( constexpr bool are_all_low_dim_linear = sequence_all_of(
pick_sequence_elements_by_mask( pick_sequence_elements_by_ids(GetLowerTensorDescriptor().GetLinearDimensionMask(),
GetLowerTensorDescriptor().GetMaskOfLinearDimensions(), LowDimensionId{}), LowDimensionId{}),
math::logic_and<bool>{}, lambda_is_true<index_t>{});
integral_constant<bool, true>{});
// judge if upper dimenisons are linear // create linear mask for upper dimensions
constexpr bool is_up_dim_nonlinear = !(is_linear_transform && is_all_low_dim_linear); constexpr bool are_up_dim_linear = is_linear_transform && are_all_low_dim_linear;
constexpr auto value_sequence = constexpr auto mask_of_up_linear_dims = modifiy_sequence_by_ids(
typename uniform_sequence_gen<tran.GetNumOfUpperDimension(), typename uniform_sequence_gen<nDimUp, 0>::type{},
is_up_dim_nonlinear>::type{}; typename uniform_sequence_gen<UpDimensionId::Size(), 1>::type{},
UpDimensionId{});
constexpr auto mask_of_up_nonlinear_dims = modifiy_sequence( return mask_of_up_linear_dims;
typename uniform_sequence_gen<nDimUp, 0>::type{}, value_sequence, UpDimensionId{}); }
return mask_of_up_nonlinear_dims;
}; };
__host__ __device__ static constexpr bool GetMaskOfNonLinearDimensions() __host__ __device__ static constexpr auto GetLinearDimensionMask()
{ {
// create tuple of linear dimension masks, for all transformations // create tuple of linear dimension masks, for all transformations
constexpr auto tuple_of_nonlinear_dimension_mask = constexpr auto tuple_of_linear_dimension_mask =
transform_tuples(lambda_1{}, Transforms{}, LowDimensionIds{}, UpDimensionIds{}); transform_tuples(lambda_get_linear_dimension_mask_of_single_tranform{},
Transforms{},
LowDimensionIds{},
UpDimensionIds{});
// reduce tuple of masks into one mask // reduce tuple of masks into one mask
constexpr auto nonlinear_dimension_mask = constexpr auto linear_dimension_mask =
unpack(lambda_sequence_logic_or{}, tuple_of_nonlinear_dimension_mask); unpack(lambda_sequence_logical_and{}, tuple_of_linear_dimension_mask);
return nonlinear_dimension_mask; return linear_dimension_mask;
} }
__host__ __device__ static constexpr bool GetMaskOfLinearDimensions() __host__ __device__ static constexpr auto GetNonLinearDimensionMask()
{ {
return GetMaskOfNonLinearDimensions().Transform(math::logic_not<bool>{}); return GetLinearDimensionMask().Transform(logical_not<index_t>{});
} }
template <index_t IDim> template <index_t IDim>
__host__ __device__ static constexpr bool IsLinearDimension(Number<IDim>) __host__ __device__ static constexpr bool IsLinearDimension(Number<IDim>)
{ {
return GetMaskOfLinearDimensions().At(Number<IDim>{}); return GetLinearDimensionMask().At(Number<IDim>{});
} }
__host__ __device__ static constexpr auto GetLinearDimensions() __host__ __device__ static constexpr auto GetLinearDimensions()
{ {
constexpr auto linear_dimension_mask = GetMaskOfLienarDimensions(); constexpr auto linear_dimension_mask = GetLinearDimensionMask();
return pick_sequence_elements_by_mask( return pick_sequence_elements_by_mask(
typename arithmetic_sequence_gen<0, nDimUp, 1>::type{}, linear_dimension_mask); typename arithmetic_sequence_gen<0, nDimUp, 1>::type{}, linear_dimension_mask);
...@@ -428,8 +438,7 @@ struct TransformedTensorDescriptor ...@@ -428,8 +438,7 @@ struct TransformedTensorDescriptor
__host__ __device__ static constexpr auto GetNonLinearDimensions() __host__ __device__ static constexpr auto GetNonLinearDimensions()
{ {
constexpr auto nonlinear_dimension_mask = constexpr auto nonlinear_dimension_mask = GetNonLinearDimensionMask();
GetMaskOfLienarDimensions().Transform(math::logic_not<index_t>{});
return pick_sequence_elements_by_mask( return pick_sequence_elements_by_mask(
typename arithmetic_sequence_gen<0, nDimUp, 1>::type{}, nonlinear_dimension_mask); typename arithmetic_sequence_gen<0, nDimUp, 1>::type{}, nonlinear_dimension_mask);
...@@ -457,9 +466,10 @@ struct TransformedTensorDescriptor ...@@ -457,9 +466,10 @@ struct TransformedTensorDescriptor
return flag; return flag;
} }
// Whenever this function is called, it will call CalculateLowerIndex() recursively // Whenever this function is called, it will call CalculateLowerIndex() recursively.
// If you have created a tensor coordinate already, instead of calling this function, // If you have created a tensor coordinate already, instead of calling this function,
// you should call TransformedTensorCoordinate::IsUpperIndexMappedToValidOffset() // you should call TensorCoordinate::IsUpperIndexMappedToValidOffset() which would
// be less expensive.
__host__ __device__ static constexpr bool __host__ __device__ static constexpr bool
IsUpperIndexMappedToValidOffset(const UpperIndex& idx_up) IsUpperIndexMappedToValidOffset(const UpperIndex& idx_up)
{ {
......
...@@ -25,6 +25,18 @@ struct swallow ...@@ -25,6 +25,18 @@ struct swallow
} }
}; };
template <typename T>
struct logical_and
{
constexpr bool operator()(const T& x, const T& y) const { return x && y; }
};
template <typename T>
struct logical_or
{
constexpr bool operator()(const T& x, const T& y) const { return x || y; }
};
template <typename T> template <typename T>
struct logical_not struct logical_not
{ {
......
...@@ -311,7 +311,7 @@ struct sequence_reverse<Sequence<I0, I1>> ...@@ -311,7 +311,7 @@ struct sequence_reverse<Sequence<I0, I1>>
using type = Sequence<I1, I0>; using type = Sequence<I1, I0>;
}; };
#if 0 #if 1
template <typename Reduce, typename Seq, typename... Seqs> template <typename Reduce, typename Seq, typename... Seqs>
struct sequence_reduce struct sequence_reduce
{ {
...@@ -755,31 +755,64 @@ __host__ __device__ constexpr auto pick_sequence_elements_by_ids(Seq, Sequence<I ...@@ -755,31 +755,64 @@ __host__ __device__ constexpr auto pick_sequence_elements_by_ids(Seq, Sequence<I
return Sequence<Seq::At(Number<Is>{})...>{}; return Sequence<Seq::At(Number<Is>{})...>{};
} }
#if 0 #if 1
namespace detail {
template <typename WorkSeq, typename RemainSeq, typename RemainMask>
struct pick_sequence_elements_by_mask_impl
{
using new_work_seq = typename conditional<RemainMask::Front(),
decltype(WorkSeq::PushBack(RemainSeq::Front())),
WorkSeq>::type;
using type =
typename pick_sequence_elements_by_mask_impl<new_work_seq,
decltype(RemainSeq::PopFront()),
decltype(RemainMask::PopFront())>::type;
};
template <typename WorkSeq>
struct pick_sequence_elements_by_mask_impl<WorkSeq, Sequence<>, Sequence<>>
{
using type = WorkSeq;
};
} // namespace detail
template <typename Seq, typename Mask> template <typename Seq, typename Mask>
__host__ __device__ constexpr auto pick_sequence_elements_by_mask(Seq, Mask) __host__ __device__ constexpr auto pick_sequence_elements_by_mask(Seq, Mask)
{ {
// not implemented static_assert(Seq::Size() == Mask::Size(), "wrong!");
return typename detail::pick_sequence_elements_by_mask_impl<Sequence<>, Seq, Mask>::type{};
} }
#endif
template <typename Seq, typename Reduce> namespace detail {
struct lambda_reduce_on_sequence template <typename WorkSeq, typename RemainValues, typename RemainIds>
struct modify_sequence_elements_by_ids_impl
{ {
const Reduce& f; using new_work_seq = decltype(WorkSeq::Modify(RemainIds::Front(), RemainValues::Front()));
index_t& result;
__host__ __device__ constexpr lambda_reduce_on_sequence(const Reduce& f_, index_t& result_) using type =
: f(f_), result(result_) typename modify_sequence_elements_by_ids_impl<new_work_seq,
{ decltype(RemainValues::PopFront()),
} decltype(RemainIds::PopFront())>::type;
};
template <typename IDim> template <typename WorkSeq>
__host__ __device__ constexpr index_t operator()(IDim) const struct modify_sequence_elements_by_ids_impl<WorkSeq, Sequence<>, Sequence<>>
{ {
return result = f(result, Seq::At(IDim{})); using type = WorkSeq;
}
}; };
} // namespace detail
template <typename Seq, typename Values, typename Ids>
__host__ __device__ constexpr auto modify_sequence_elements_by_ids(Seq, Values, Ids)
{
static_assert(Values::Size() == Ids::Size() && Seq::Size() >= Values::Size(), "wrong!");
return typename detail::modify_sequence_elements_by_ids_impl<Seq, Values, Ids>::type{};
}
#endif
template <typename Seq, typename Reduce, index_t Init> template <typename Seq, typename Reduce, index_t Init>
__host__ __device__ constexpr index_t __host__ __device__ constexpr index_t
...@@ -787,14 +820,17 @@ reduce_on_sequence(Seq, Reduce f, Number<Init> /*initial_value*/) ...@@ -787,14 +820,17 @@ reduce_on_sequence(Seq, Reduce f, Number<Init> /*initial_value*/)
{ {
index_t result = Init; index_t result = Init;
static_for<0, Seq::Size(), 1>{}(lambda_reduce_on_sequence<Seq, Reduce>(f, result)); for(index_t i = 0; i < Seq::Size(); ++i)
{
result = f(result, Seq::At(i));
}
return result; return result;
} }
// TODO: a generic any_of for any container // TODO: a generic any_of for any container
template <typename Seq, typename F> template <typename Seq, typename F>
__host__ __device__ constexpr bool sequence_any_of(Seq, F f /*initial_value*/) __host__ __device__ constexpr bool sequence_any_of(Seq, F f)
{ {
bool flag = false; bool flag = false;
...@@ -808,7 +844,7 @@ __host__ __device__ constexpr bool sequence_any_of(Seq, F f /*initial_value*/) ...@@ -808,7 +844,7 @@ __host__ __device__ constexpr bool sequence_any_of(Seq, F f /*initial_value*/)
// TODO: a generic all_of for any container // TODO: a generic all_of for any container
template <typename Seq, typename F> template <typename Seq, typename F>
__host__ __device__ constexpr bool sequence_all_of(Seq, F f /*initial_value*/) __host__ __device__ constexpr bool sequence_all_of(Seq, F f)
{ {
bool flag = true; bool flag = true;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment