Commit 4f4aba48 authored by Chao Liu's avatar Chao Liu
Browse files

adding GetLinearDimensionMask()

parent 545d9305
......@@ -440,9 +440,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buf
0,
b_thread_data_on_global,
0})
#if 0
.Run_generic
#elif 1
#if 1
.template Run_generic<Float, address_space_t::generic, address_space_t::global>
#elif 1
.template Run_optimized_dst_address_calculation<Float, address_space_t::global>
......
......@@ -45,6 +45,7 @@ struct NativeTensorCoordinate
__host__ __device__ constexpr type operator+=(const Index& idx_diff)
{
// mIndex is updated here, but some (or all) of its entries may never be used
// compiler should remove those entries as dead code
mIndex += idx_diff;
mOffset += tensor_desc_type::CalculateOffsetDiff(idx_diff);
......@@ -55,6 +56,7 @@ struct NativeTensorCoordinate
__host__ __device__ constexpr type operator-=(const Index& idx_diff)
{
// mIndex is updated here, but some (or all) of its entries may never be used
// compiler should remove those entries as dead code
mIndex -= idx_diff;
mOffset -= tensor_desc_type::CalculateOffsetDiff(idx_diff);
......@@ -136,6 +138,7 @@ struct TransformedTensorCoordinate
idx_up_diff, GetIndex(), GetLowerCoordinate().GetIndex());
// mIndexUp is updated here, but some (or all) of its entries may never be used
// compiler should remove those entries as dead code
mIndexUp += idx_up_diff;
return *this;
......@@ -146,6 +149,8 @@ struct TransformedTensorCoordinate
mCoordLow -= tensor_desc_type::CalculateLowerIndexDiff(
idx_up_diff, GetIndex(), GetLowerCoordinate().GetIndex());
// mIndex is updated here, but some (or all) of its entries may never be used
// compiler should remove those entries as dead code
mIndexUp -= idx_up_diff;
return *this;
......
......@@ -101,12 +101,12 @@ struct NativeTensorDescriptor
return true;
}
__host__ __device__ static constexpr auto GetMaskOfLinearDimensions()
__host__ __device__ static constexpr auto GetLinearDimensionMask()
{
return typename uniform_sequence_gen<nDim, 1>::type{};
}
__host__ __device__ static constexpr auto GetMaskOfNonLinearDimensions()
__host__ __device__ static constexpr auto GetNonLinearDimensionMask()
{
return typename uniform_sequence_gen<nDim, 0>::type{};
}
......@@ -353,18 +353,27 @@ struct TransformedTensorDescriptor
return GetLowerTensorDescriptor().CalculateOffset(CalculateLowerIndex(idx_up));
}
#if 0
struct lambda_sequence_logic_or
#if 1
struct lambda_sequence_logical_and
{
template <typename... Seqs>
__host__ __device__ constexpr auto operator()(Seqs... seqs) const
__host__ __device__ constexpr auto operator()(Seqs...) const
{
return typename sequence_reduce<logical_and<index_t>, Seqs...>::type{};
}
};
template <typename T>
struct lambda_is_true
{
__host__ __device__ constexpr auto operator()(const T& x) const
{
// TODO: should use math::logic_or<bool>, after Sequence can take bool
return typename sequence_reduce<math::logic_or<bool>, Seqs...>::type{};
// TODO: remove static_cast once Sequence can take bool as entries
return static_cast<bool>(x) == true;
}
};
struct lambda_1
struct lambda_get_linear_dimension_mask_of_single_tranform
{
// check only one transform at a time
template <typename Transform, typename LowDimensionId, typename UpDimensionId>
......@@ -372,73 +381,73 @@ struct TransformedTensorDescriptor
operator()(const Transform& tran, LowDimensionId, UpDimensionId) const
{
// judge if transformation is linear
constexpr bool is_linear_transform = tran.IsLinearTransform();
constexpr bool is_linear_transform = Transform::IsLinearTransform();
// judge if all lower dimension are linear
constexpr bool is_all_low_dim_linear = math::reduce_on_sequence(
pick_sequence_elements_by_mask(
GetLowerTensorDescriptor().GetMaskOfLinearDimensions(), LowDimensionId{}),
math::logic_and<bool>{},
integral_constant<bool, true>{});
// judge if upper dimenisons are linear
constexpr bool is_up_dim_nonlinear = !(is_linear_transform && is_all_low_dim_linear);
constexpr bool are_all_low_dim_linear = sequence_all_of(
pick_sequence_elements_by_ids(GetLowerTensorDescriptor().GetLinearDimensionMask(),
LowDimensionId{}),
lambda_is_true<index_t>{});
constexpr auto value_sequence =
typename uniform_sequence_gen<tran.GetNumOfUpperDimension(),
is_up_dim_nonlinear>::type{};
// create linear mask for upper dimensions
constexpr bool are_up_dim_linear = is_linear_transform && are_all_low_dim_linear;
constexpr auto mask_of_up_nonlinear_dims = modifiy_sequence(
typename uniform_sequence_gen<nDimUp, 0>::type{}, value_sequence, UpDimensionId{});
constexpr auto mask_of_up_linear_dims = modifiy_sequence_by_ids(
typename uniform_sequence_gen<nDimUp, 0>::type{},
typename uniform_sequence_gen<UpDimensionId::Size(), 1>::type{},
UpDimensionId{});
return mask_of_up_nonlinear_dims;
};
return mask_of_up_linear_dims;
}
};
__host__ __device__ static constexpr bool GetMaskOfNonLinearDimensions()
{
// create tuple of linear dimension masks, for all transformations
constexpr auto tuple_of_nonlinear_dimension_mask =
transform_tuples(lambda_1{}, Transforms{}, LowDimensionIds{}, UpDimensionIds{});
__host__ __device__ static constexpr auto GetLinearDimensionMask()
{
// create tuple of linear dimension masks, for all transformations
constexpr auto tuple_of_linear_dimension_mask =
transform_tuples(lambda_get_linear_dimension_mask_of_single_tranform{},
Transforms{},
LowDimensionIds{},
UpDimensionIds{});
// reduce tuple of masks into one mask
constexpr auto nonlinear_dimension_mask =
unpack(lambda_sequence_logic_or{}, tuple_of_nonlinear_dimension_mask);
// reduce tuple of masks into one mask
constexpr auto linear_dimension_mask =
unpack(lambda_sequence_logical_and{}, tuple_of_linear_dimension_mask);
return nonlinear_dimension_mask;
}
return linear_dimension_mask;
}
__host__ __device__ static constexpr bool GetMaskOfLinearDimensions()
{
return GetMaskOfNonLinearDimensions().Transform(math::logic_not<bool>{});
}
__host__ __device__ static constexpr auto GetNonLinearDimensionMask()
{
return GetLinearDimensionMask().Transform(logical_not<index_t>{});
}
template <index_t IDim>
__host__ __device__ static constexpr bool IsLinearDimension(Number<IDim>)
{
return GetMaskOfLinearDimensions().At(Number<IDim>{});
}
template <index_t IDim>
__host__ __device__ static constexpr bool IsLinearDimension(Number<IDim>)
{
return GetLinearDimensionMask().At(Number<IDim>{});
}
__host__ __device__ static constexpr auto GetLinearDimensions()
{
constexpr auto linear_dimension_mask = GetMaskOfLienarDimensions();
__host__ __device__ static constexpr auto GetLinearDimensions()
{
constexpr auto linear_dimension_mask = GetLinearDimensionMask();
return pick_sequence_elements_by_mask(
typename arithmetic_sequence_gen<0, nDimUp, 1>::type{}, linear_dimension_mask);
}
return pick_sequence_elements_by_mask(
typename arithmetic_sequence_gen<0, nDimUp, 1>::type{}, linear_dimension_mask);
}
__host__ __device__ static constexpr auto GetNonLinearDimensions()
{
constexpr auto nonlinear_dimension_mask =
GetMaskOfLienarDimensions().Transform(math::logic_not<index_t>{});
__host__ __device__ static constexpr auto GetNonLinearDimensions()
{
constexpr auto nonlinear_dimension_mask = GetNonLinearDimensionMask();
return pick_sequence_elements_by_mask(
typename arithmetic_sequence_gen<0, nDimUp, 1>::type{}, nonlinear_dimension_mask);
}
return pick_sequence_elements_by_mask(
typename arithmetic_sequence_gen<0, nDimUp, 1>::type{}, nonlinear_dimension_mask);
}
__host__ __device__ static constexpr auto GetNonLinearIndependentDimensionGroups()
{
// not implemented
}
__host__ __device__ static constexpr auto GetNonLinearIndependentDimensionGroups()
{
// not implemented
}
#endif
__host__ __device__ static constexpr bool
......@@ -457,9 +466,10 @@ struct TransformedTensorDescriptor
return flag;
}
// Whenever this function is called, it will call CalculateLowerIndex() recursively
// Whenever this function is called, it will call CalculateLowerIndex() recursively.
// If you have created a tensor coordinate already, instead of calling this function,
// you should call TransformedTensorCoordinate::IsUpperIndexMappedToValidOffset()
// you should call TensorCoordinate::IsUpperIndexMappedToValidOffset() which would
// be less expensive.
__host__ __device__ static constexpr bool
IsUpperIndexMappedToValidOffset(const UpperIndex& idx_up)
{
......
......@@ -25,6 +25,18 @@ struct swallow
}
};
template <typename T>
struct logical_and
{
constexpr bool operator()(const T& x, const T& y) const { return x && y; }
};
template <typename T>
struct logical_or
{
constexpr bool operator()(const T& x, const T& y) const { return x || y; }
};
template <typename T>
struct logical_not
{
......
......@@ -311,7 +311,7 @@ struct sequence_reverse<Sequence<I0, I1>>
using type = Sequence<I1, I0>;
};
#if 0
#if 1
template <typename Reduce, typename Seq, typename... Seqs>
struct sequence_reduce
{
......@@ -755,31 +755,64 @@ __host__ __device__ constexpr auto pick_sequence_elements_by_ids(Seq, Sequence<I
return Sequence<Seq::At(Number<Is>{})...>{};
}
#if 0
#if 1
namespace detail {
template <typename WorkSeq, typename RemainSeq, typename RemainMask>
struct pick_sequence_elements_by_mask_impl
{
using new_work_seq = typename conditional<RemainMask::Front(),
decltype(WorkSeq::PushBack(RemainSeq::Front())),
WorkSeq>::type;
using type =
typename pick_sequence_elements_by_mask_impl<new_work_seq,
decltype(RemainSeq::PopFront()),
decltype(RemainMask::PopFront())>::type;
};
template <typename WorkSeq>
struct pick_sequence_elements_by_mask_impl<WorkSeq, Sequence<>, Sequence<>>
{
using type = WorkSeq;
};
} // namespace detail
template <typename Seq, typename Mask>
__host__ __device__ constexpr auto pick_sequence_elements_by_mask(Seq, Mask)
{
// not implemented
static_assert(Seq::Size() == Mask::Size(), "wrong!");
return typename detail::pick_sequence_elements_by_mask_impl<Sequence<>, Seq, Mask>::type{};
}
#endif
template <typename Seq, typename Reduce>
struct lambda_reduce_on_sequence
namespace detail {
template <typename WorkSeq, typename RemainValues, typename RemainIds>
struct modify_sequence_elements_by_ids_impl
{
const Reduce& f;
index_t& result;
using new_work_seq = decltype(WorkSeq::Modify(RemainIds::Front(), RemainValues::Front()));
__host__ __device__ constexpr lambda_reduce_on_sequence(const Reduce& f_, index_t& result_)
: f(f_), result(result_)
{
}
using type =
typename modify_sequence_elements_by_ids_impl<new_work_seq,
decltype(RemainValues::PopFront()),
decltype(RemainIds::PopFront())>::type;
};
template <typename IDim>
__host__ __device__ constexpr index_t operator()(IDim) const
{
return result = f(result, Seq::At(IDim{}));
}
template <typename WorkSeq>
struct modify_sequence_elements_by_ids_impl<WorkSeq, Sequence<>, Sequence<>>
{
using type = WorkSeq;
};
} // namespace detail
template <typename Seq, typename Values, typename Ids>
__host__ __device__ constexpr auto modify_sequence_elements_by_ids(Seq, Values, Ids)
{
static_assert(Values::Size() == Ids::Size() && Seq::Size() >= Values::Size(), "wrong!");
return typename detail::modify_sequence_elements_by_ids_impl<Seq, Values, Ids>::type{};
}
#endif
template <typename Seq, typename Reduce, index_t Init>
__host__ __device__ constexpr index_t
......@@ -787,14 +820,17 @@ reduce_on_sequence(Seq, Reduce f, Number<Init> /*initial_value*/)
{
index_t result = Init;
static_for<0, Seq::Size(), 1>{}(lambda_reduce_on_sequence<Seq, Reduce>(f, result));
for(index_t i = 0; i < Seq::Size(); ++i)
{
result = f(result, Seq::At(i));
}
return result;
}
// TODO: a generic any_of for any container
template <typename Seq, typename F>
__host__ __device__ constexpr bool sequence_any_of(Seq, F f /*initial_value*/)
__host__ __device__ constexpr bool sequence_any_of(Seq, F f)
{
bool flag = false;
......@@ -808,7 +844,7 @@ __host__ __device__ constexpr bool sequence_any_of(Seq, F f /*initial_value*/)
// TODO: a generic all_of for any container
template <typename Seq, typename F>
__host__ __device__ constexpr bool sequence_all_of(Seq, F f /*initial_value*/)
__host__ __device__ constexpr bool sequence_all_of(Seq, F f)
{
bool flag = true;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment