Commit 9535f806 authored by Chao Liu's avatar Chao Liu
Browse files

refactoring array

parent c98cbea0
...@@ -18,29 +18,34 @@ map_convolution_into_gemm_v1(const WeiDesc& wei_k_c_y_x_global_desc, ...@@ -18,29 +18,34 @@ map_convolution_into_gemm_v1(const WeiDesc& wei_k_c_y_x_global_desc,
const Array<index_t, 2> in_left_pads, const Array<index_t, 2> in_left_pads,
const Array<index_t, 2> in_right_pads) const Array<index_t, 2> in_right_pads)
{ {
const index_t N = in_n_c_hi_wi_global_desc.GetLength(0); constexpr auto i0 = Number<0>{};
const index_t C = in_n_c_hi_wi_global_desc.GetLength(1); constexpr auto i1 = Number<1>{};
const index_t K = out_n_k_ho_wo_global_desc.GetLength(1); constexpr auto i2 = Number<2>{};
constexpr auto i3 = Number<3>{};
const index_t Y = wei_k_c_y_x_global_desc.GetLength(2); const index_t N = in_n_c_hi_wi_global_desc.GetLength(i0);
const index_t X = wei_k_c_y_x_global_desc.GetLength(3); const index_t C = in_n_c_hi_wi_global_desc.GetLength(i1);
const index_t K = out_n_k_ho_wo_global_desc.GetLength(i1);
const index_t Hi = in_n_c_hi_wi_global_desc.GetLength(2); const index_t Y = wei_k_c_y_x_global_desc.GetLength(i2);
const index_t Wi = in_n_c_hi_wi_global_desc.GetLength(3); const index_t X = wei_k_c_y_x_global_desc.GetLength(i3);
const index_t Ho = out_n_k_ho_wo_global_desc.GetLength(2); const index_t Hi = in_n_c_hi_wi_global_desc.GetLength(i2);
const index_t Wo = out_n_k_ho_wo_global_desc.GetLength(3); const index_t Wi = in_n_c_hi_wi_global_desc.GetLength(i3);
const index_t ConvStrideH = conv_strides[0]; const index_t Ho = out_n_k_ho_wo_global_desc.GetLength(i2);
const index_t ConvStrideW = conv_strides[1]; const index_t Wo = out_n_k_ho_wo_global_desc.GetLength(i3);
const index_t ConvDilationH = conv_dilations[0]; const index_t ConvStrideH = conv_strides[i0];
const index_t ConvDilationW = conv_dilations[1]; const index_t ConvStrideW = conv_strides[i1];
const index_t InLeftPadH = in_left_pads[0]; const index_t ConvDilationH = conv_dilations[i0];
const index_t InLeftPadW = in_left_pads[1]; const index_t ConvDilationW = conv_dilations[i1];
const index_t InRightPadH = in_right_pads[0];
const index_t InRightPadW = in_right_pads[1]; const index_t InLeftPadH = in_left_pads[i0];
const index_t InLeftPadW = in_left_pads[i1];
const index_t InRightPadH = in_right_pads[i0];
const index_t InRightPadW = in_right_pads[i1];
// input tensor // input tensor
const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor( const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor(
...@@ -59,24 +64,25 @@ map_convolution_into_gemm_v1(const WeiDesc& wei_k_c_y_x_global_desc, ...@@ -59,24 +64,25 @@ map_convolution_into_gemm_v1(const WeiDesc& wei_k_c_y_x_global_desc,
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}), make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{})); make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}));
const index_t Hip = in_n_c_hip_wip_global_desc.GetLength(2); const index_t Hip = in_n_c_hip_wip_global_desc.GetLength(i2);
const index_t Wip = in_n_c_hip_wip_global_desc.GetLength(3); const index_t Wip = in_n_c_hip_wip_global_desc.GetLength(i3);
const auto in_n_c_y_ho_x_wo_global_desc = transform_dynamic_tensor_descriptor( const auto in_n_c_y_ho_x_wo_global_desc = transform_dynamic_tensor_descriptor(
in_n_c_hip_wip_global_desc, in_n_c_hip_wip_global_desc,
make_tuple(DynamicPassThrough{N}, make_tuple(
DynamicPassThrough{C}, DynamicPassThrough{N},
DynamicEmbed<2>{{Y, Ho}, {ConvDilationH, ConvStrideH, 0}}, DynamicPassThrough{C},
DynamicEmbed<2>{{X, Wo}, {ConvDilationW, ConvStrideW, 0}}), DynamicEmbed<2>{make_multi_index(Y, Ho), make_multi_index(ConvDilationH, ConvStrideH)},
DynamicEmbed<2>{make_multi_index(X, Wo), make_multi_index(ConvDilationW, ConvStrideW)}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}), make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{})); make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{}));
const auto in_gemmk_gemmn_global_desc = const auto in_gemmk_gemmn_global_desc = transform_dynamic_tensor_descriptor(
transform_dynamic_tensor_descriptor(in_n_c_y_ho_x_wo_global_desc, in_n_c_y_ho_x_wo_global_desc,
make_tuple(DynamicMerge<3>{MultiIndex<3>{{C, Y, X}}}, make_tuple(DynamicMerge<3>{make_multi_index(C, Y, X)},
DynamicMerge<3>{MultiIndex<3>{{N, Ho, Wo}}}), DynamicMerge<3>{make_multi_index(N, Ho, Wo)}),
make_tuple(Sequence<1, 2, 4>{}, Sequence<0, 3, 5>{}), make_tuple(Sequence<1, 2, 4>{}, Sequence<0, 3, 5>{}),
make_tuple(Sequence<0>{}, Sequence<1>{})); make_tuple(Sequence<0>{}, Sequence<1>{}));
return make_tuple(in_gemmk_gemmn_global_desc); return make_tuple(in_gemmk_gemmn_global_desc);
} }
......
...@@ -183,7 +183,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer ...@@ -183,7 +183,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
AddressSpace::Vgpr, AddressSpace::Vgpr,
AddressSpace::Lds, AddressSpace::Lds,
InMemoryDataOperation::Set>( InMemoryDataOperation::Set>(
MultiIndex<4>{0, 0, b_block_data_on_global, 0}, MultiIndex<4>{0, 0, 0, 0}); MultiIndex<4>{{0, 0, b_block_data_on_global, 0}}, MultiIndex<4>{{0, 0, 0, 0}});
// weight tensor // weight tensor
// global tensor in global memory, src of blockwise copy // global tensor in global memory, src of blockwise copy
......
...@@ -12,9 +12,8 @@ using MultiIndex = Array<index_t, N>; ...@@ -12,9 +12,8 @@ using MultiIndex = Array<index_t, N>;
template <typename... Xs> template <typename... Xs>
__host__ __device__ constexpr auto make_multi_index(Xs... xs) __host__ __device__ constexpr auto make_multi_index(Xs... xs)
{ {
return MultiIndex<sizeof...(Xs)>{{static_cast<index_t>(xs)...}}; return make_array<index_t>(xs...);
} }
#else #else
template <index_t N> template <index_t N>
using MultiIndex = StaticallyIndexedArray<index_t, N>; using MultiIndex = StaticallyIndexedArray<index_t, N>;
...@@ -33,5 +32,57 @@ __host__ __device__ constexpr auto make_zero_multi_index() ...@@ -33,5 +32,57 @@ __host__ __device__ constexpr auto make_zero_multi_index()
typename uniform_sequence_gen<NSize, 0>::type{}); typename uniform_sequence_gen<NSize, 0>::type{});
} }
template <typename T>
__host__ __device__ constexpr auto to_multi_index(const T& x)
{
return unpack([](auto... ys) { return make_multi_index(ys...); }, x);
}
template <index_t NSize, typename X>
__host__ __device__ constexpr auto operator+=(MultiIndex<NSize>& y, const X& x)
{
static_assert(X::Size() == NSize, "wrong! size not the same");
static_for<0, NSize, 1>{}([&](auto i) { y(i) += x[i]; });
return y;
}
template <index_t NSize, typename X>
__host__ __device__ constexpr auto operator-=(MultiIndex<NSize>& y, const X& x)
{
static_assert(X::Size() == NSize, "wrong! size not the same");
static_for<0, NSize, 1>{}([&](auto i) { y(i) -= x[i]; });
return y;
}
template <index_t NSize, typename T>
__host__ __device__ constexpr auto operator+(const MultiIndex<NSize>& a, const T& b)
{
using type = MultiIndex<NSize>;
static_assert(T::Size() == NSize, "wrong! size not the same");
type r;
static_for<0, NSize, 1>{}([&](auto i) { r(i) = a[i] + b[i]; });
return r;
}
template <index_t NSize, typename T>
__host__ __device__ constexpr auto operator-(const MultiIndex<NSize>& a, const T& b)
{
using type = MultiIndex<NSize>;
static_assert(T::Size() == NSize, "wrong! size not the same");
type r;
static_for<0, NSize, 1>{}([&](auto i) { r(i) = a[i] - b[i]; });
return r;
}
template <index_t NSize, typename T>
__host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& a, const T& b)
{
using type = MultiIndex<NSize>;
static_assert(T::Size() == NSize, "wrong! size not the same");
type r;
static_for<0, NSize, 1>{}([&](auto i) { r(i) = a[i] * b[i]; });
return r;
}
} // namespace ck } // namespace ck
#endif #endif
...@@ -234,7 +234,7 @@ struct Merge ...@@ -234,7 +234,7 @@ struct Merge
{ {
if(idx_up_diff[0] == 0) if(idx_up_diff[0] == 0)
{ {
return make_zero_array<index_t, nDimLow>(); return make_zero_multi_index<nDimLow>();
} }
else else
{ {
...@@ -459,7 +459,7 @@ struct Embed ...@@ -459,7 +459,7 @@ struct Embed
for(index_t icorner = 0; icorner < ncorner; ++icorner) for(index_t icorner = 0; icorner < ncorner; ++icorner)
{ {
// generate upper index for each corner // generate upper index for each corner
auto idx_up = make_zero_array<index_t, nDimUp>(); auto idx_up = make_zero_multi_index<nDimUp>();
index_t itmp = icorner; index_t itmp = icorner;
...@@ -512,7 +512,7 @@ struct Freeze ...@@ -512,7 +512,7 @@ struct Freeze
const UpperIndex& /* idx_up_old */, const UpperIndex& /* idx_up_old */,
const LowerIndex& /* idx_low_old */) const LowerIndex& /* idx_low_old */)
{ {
return make_zero_array<index_t, nDimLow>(); return make_zero_multi_index<nDimLow>();
} }
__host__ __device__ static constexpr bool IsLinearTransform() { return true; } __host__ __device__ static constexpr bool IsLinearTransform() { return true; }
......
...@@ -270,7 +270,7 @@ struct TensorCoordinate ...@@ -270,7 +270,7 @@ struct TensorCoordinate
MakeDummyTensorCoordinate(NativeTensorDescriptor<Ts...>) MakeDummyTensorCoordinate(NativeTensorDescriptor<Ts...>)
{ {
return NativeTensorCoordinate<NativeTensorDescriptor<Ts...>>( return NativeTensorCoordinate<NativeTensorDescriptor<Ts...>>(
make_zero_array<index_t, TensorDesc::GetNumOfDimension()>()); make_zero_multi_index<TensorDesc::GetNumOfDimension()>());
} }
template <typename... Ts> template <typename... Ts>
...@@ -278,7 +278,7 @@ struct TensorCoordinate ...@@ -278,7 +278,7 @@ struct TensorCoordinate
MakeDummyTensorCoordinate(TransformedTensorDescriptor<Ts...>) MakeDummyTensorCoordinate(TransformedTensorDescriptor<Ts...>)
{ {
return TransformedTensorCoordinate<TransformedTensorDescriptor<Ts...>>( return TransformedTensorCoordinate<TransformedTensorDescriptor<Ts...>>(
make_zero_array<index_t, TensorDesc::GetNumOfDimension()>()); make_zero_multi_index<TensorDesc::GetNumOfDimension()>());
} }
public: public:
......
...@@ -317,7 +317,7 @@ struct TransformedTensorDescriptor ...@@ -317,7 +317,7 @@ struct TransformedTensorDescriptor
// this assume each lower (single) index is only assocaited with one transformation, // this assume each lower (single) index is only assocaited with one transformation,
// which is required for index transformation, and has been checked during constructor // which is required for index transformation, and has been checked during constructor
// of TransformedTensorDescriptor // of TransformedTensorDescriptor
idx_low_part = tran.CalculateLowerIndex(to_array(idx_up_part)); idx_low_part = tran.CalculateLowerIndex(to_multi_index(idx_up_part));
}); });
return idx_low; return idx_low;
...@@ -345,8 +345,9 @@ struct TransformedTensorDescriptor ...@@ -345,8 +345,9 @@ struct TransformedTensorDescriptor
// this assume each lower (single) index is associated with only one transformation, // this assume each lower (single) index is associated with only one transformation,
// which is required for index transformation, and has been checked during constructor // which is required for index transformation, and has been checked during constructor
// of TransformedTensorDescriptor // of TransformedTensorDescriptor
idx_low_diff_part = tran.CalculateLowerIndexDiff( idx_low_diff_part = tran.CalculateLowerIndexDiff(to_multi_index(idx_up_diff_part),
to_array(idx_up_diff_part), to_array(idx_up_old_part), to_array(idx_low_old_part)); to_multi_index(idx_up_old_part),
to_multi_index(idx_low_old_part));
}); });
return idx_low_diff; return idx_low_diff;
...@@ -506,7 +507,8 @@ struct TransformedTensorDescriptor ...@@ -506,7 +507,8 @@ struct TransformedTensorDescriptor
constexpr auto low_dims_part = LowDimensionIds{}.At(itran); constexpr auto low_dims_part = LowDimensionIds{}.At(itran);
constexpr auto low_lengths_part = constexpr auto low_lengths_part =
GetLowerTensorDescriptor().GetLengths(low_dims_part); GetLowerTensorDescriptor().GetLengths(low_dims_part);
const auto idx_low_part = to_array(pick_array_element(idx_low, low_dims_part)); const auto idx_low_part =
to_multi_index(pick_array_element(idx_low, low_dims_part));
for(index_t i = 0; i < low_dims_part.Size(); ++i) for(index_t i = 0; i < low_dims_part.Size(); ++i)
{ {
......
...@@ -68,9 +68,9 @@ struct BlockwiseGenericTensorSliceCopy_v4 ...@@ -68,9 +68,9 @@ struct BlockwiseGenericTensorSliceCopy_v4
const auto thread_data_id_begin = thread_cluster_id * ThreadSliceLengths{}; const auto thread_data_id_begin = thread_cluster_id * ThreadSliceLengths{};
mThreadwiseLoad.SetSrcSliceOrigin(src_block_slice_origin + thread_data_id_begin); mThreadwiseLoad.SetSrcSliceOrigin(src_block_slice_origin + thread_data_id_begin);
mThreadwiseLoad.SetDstSliceOrigin(make_zero_array<index_t, nDim>()); mThreadwiseLoad.SetDstSliceOrigin(make_zero_multi_index<nDim>());
mThreadwiseStore.SetSrcSliceOrigin(make_zero_array<index_t, nDim>()); mThreadwiseStore.SetSrcSliceOrigin(make_zero_multi_index<nDim>());
mThreadwiseStore.SetDstSliceOrigin(dst_block_slice_origin + thread_data_id_begin); mThreadwiseStore.SetDstSliceOrigin(dst_block_slice_origin + thread_data_id_begin);
} }
} }
......
...@@ -54,8 +54,8 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2 ...@@ -54,8 +54,8 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
} }
__device__ constexpr ThreadwiseGenericTensorSliceCopy_v4r2() __device__ constexpr ThreadwiseGenericTensorSliceCopy_v4r2()
: ThreadwiseGenericTensorSliceCopy_v4r2(make_zero_array<index_t, nDim>(), : ThreadwiseGenericTensorSliceCopy_v4r2(make_zero_multi_index<nDim>(),
make_zero_array<index_t, nDim>()) make_zero_multi_index<nDim>())
{ {
} }
...@@ -104,7 +104,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2 ...@@ -104,7 +104,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
// load data from src to the long-vector buffer // load data from src to the long-vector buffer
for(index_t i = 0; i < long_vector_size / src_data_per_access; ++i) for(index_t i = 0; i < long_vector_size / src_data_per_access; ++i)
{ {
auto scalar_id = make_zero_array<index_t, nDim>(); auto scalar_id = make_zero_multi_index<nDim>();
scalar_id(vector_access_dim) = i * src_data_per_access; scalar_id(vector_access_dim) = i * src_data_per_access;
const index_t buffer_offset = i * src_data_per_access; const index_t buffer_offset = i * src_data_per_access;
...@@ -143,7 +143,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2 ...@@ -143,7 +143,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
// store data from the long-vector buffer to dst // store data from the long-vector buffer to dst
for(index_t i = 0; i < long_vector_size / dst_data_per_access; ++i) for(index_t i = 0; i < long_vector_size / dst_data_per_access; ++i)
{ {
auto scalar_id = make_zero_array<index_t, nDim>(); auto scalar_id = make_zero_multi_index<nDim>();
scalar_id(vector_access_dim) = i * dst_data_per_access; scalar_id(vector_access_dim) = i * dst_data_per_access;
const index_t buffer_offset = i * dst_data_per_access; const index_t buffer_offset = i * dst_data_per_access;
...@@ -177,9 +177,9 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2 ...@@ -177,9 +177,9 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
__device__ void MoveSrcSliceWindow(const T& step_sizes_, __device__ void MoveSrcSliceWindow(const T& step_sizes_,
integral_constant<bool, PositiveDirection>) integral_constant<bool, PositiveDirection>)
{ {
const auto step_sizes = to_array(step_sizes_); const auto step_sizes = to_multi_index(step_sizes_);
static_if<PositiveDirection>{}([&](auto) { mSrcSliceOrigin += to_array(step_sizes); }) static_if<PositiveDirection>{}([&](auto) { mSrcSliceOrigin += to_multi_index(step_sizes); })
.Else([&](auto) { mSrcSliceOrigin -= step_sizes; }); .Else([&](auto) { mSrcSliceOrigin -= step_sizes; });
} }
...@@ -187,7 +187,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2 ...@@ -187,7 +187,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
__device__ void MoveDstSliceWindow(const T& step_sizes_, __device__ void MoveDstSliceWindow(const T& step_sizes_,
integral_constant<bool, PositiveDirection>) integral_constant<bool, PositiveDirection>)
{ {
const auto step_sizes = to_array(step_sizes_); const auto step_sizes = to_multi_index(step_sizes_);
static_if<PositiveDirection>{}([&](auto) { mDstSliceOrigin += step_sizes; }) static_if<PositiveDirection>{}([&](auto) { mDstSliceOrigin += step_sizes; })
.Else([&](auto) { mDstSliceOrigin -= step_sizes; }); .Else([&](auto) { mDstSliceOrigin -= step_sizes; });
......
...@@ -9,49 +9,20 @@ namespace ck { ...@@ -9,49 +9,20 @@ namespace ck {
template <typename TData, index_t NSize> template <typename TData, index_t NSize>
struct Array struct Array
{ {
using type = Array<TData, NSize>; using type = Array;
using data_type = TData; using data_type = TData;
// hack: add extra element to allow empty array TData mData[NSize] = {0};
// TODO: implement empty Array
TData mData[NSize + 1] = {0};
__host__ __device__ static constexpr index_t Size() { return NSize; } __host__ __device__ static constexpr index_t Size() { return NSize; }
// TODO: remove
__host__ __device__ static constexpr index_t GetSize() { return Size(); }
template <index_t I>
__host__ __device__ constexpr const TData& At(Number<I>) const
{
static_assert(I < NSize, "wrong!");
return mData[I];
}
template <index_t I>
__host__ __device__ constexpr TData& At(Number<I>)
{
static_assert(I < NSize, "wrong!");
return mData[I];
}
__host__ __device__ constexpr const TData& At(index_t i) const { return mData[i]; } __host__ __device__ constexpr const TData& At(index_t i) const { return mData[i]; }
__host__ __device__ constexpr TData& At(index_t i) { return mData[i]; } __host__ __device__ constexpr TData& At(index_t i) { return mData[i]; }
template <typename I> __host__ __device__ constexpr const TData& operator[](index_t i) const { return At(i); }
__host__ __device__ constexpr const TData& operator[](I i) const
{
return At(i);
}
template <typename I> __host__ __device__ constexpr TData& operator()(index_t i) { return At(i); }
__host__ __device__ constexpr TData& operator()(I i)
{
return At(i);
}
template <typename T> template <typename T>
__host__ __device__ constexpr auto operator=(const T& a) __host__ __device__ constexpr auto operator=(const T& a)
...@@ -62,127 +33,34 @@ struct Array ...@@ -62,127 +33,34 @@ struct Array
return *this; return *this;
} }
};
template <typename T> // empty Array
__host__ __device__ constexpr auto operator+=(const T& a) template <typename TData>
{ struct Array<TData, 0>
static_assert(T::Size() == Size(), "wrong! size not the same"); {
using type = Array;
static_for<0, Size(), 1>{}([&](auto i) { operator()(i) += a[i]; }); using data_type = TData;
return *this;
}
template <typename T>
__host__ __device__ constexpr auto operator-=(const T& a)
{
static_assert(T::Size() == Size(), "wrong! size not the same");
static_for<0, Size(), 1>{}([&](auto i) { operator()(i) -= a[i]; });
return *this;
}
template <typename T>
__host__ __device__ constexpr auto operator+(const T& a) const
{
static_assert(T::Size() == Size(), "wrong! size not the same");
type r;
static_for<0, Size(), 1>{}([&](auto i) { r(i) = operator[](i) + a[i]; });
return r;
}
template <typename T>
__host__ __device__ constexpr auto operator-(const T& a) const
{
static_assert(T::Size() == Size(), "wrong! size not the same");
type r;
static_for<0, Size(), 1>{}([&](auto i) { r(i) = operator[](i) - a[i]; });
return r;
}
template <typename T>
__host__ __device__ constexpr auto operator*(const T& a) const
{
static_assert(T::Size() == Size(), "wrong! size not the same");
type r;
static_for<0, Size(), 1>{}([&](auto i) { r(i) = operator[](i) * a[i]; });
return r;
}
struct lambda_PushBack // emulate constexpr lambda
{
const Array<TData, NSize>& old_array;
Array<TData, NSize + 1>& new_array;
__host__ __device__ constexpr lambda_PushBack(const Array<TData, NSize>& old_array_,
Array<TData, NSize + 1>& new_array_)
: old_array(old_array_), new_array(new_array_)
{
}
template <index_t I>
__host__ __device__ constexpr void operator()(Number<I>) const
{
new_array(Number<I>{}) = old_array[I];
}
};
__host__ __device__ constexpr auto PushBack(TData x) const
{
Array<TData, NSize + 1> new_array;
static_for<0, NSize, 1>{}(lambda_PushBack(*this, new_array));
new_array(Number<NSize>{}) = x;
return new_array;
}
template <index_t NAppend>
__host__ __device__ constexpr auto Append(const Array<TData, NAppend>& xs) const
{
Array<TData, NSize + NAppend> r;
static_for<0, NSize, 1>{}([&r, this ](auto i) constexpr { r(i) = (*this)[i]; });
static_for<0, NAppend, 1>{}([&r, &xs ](auto i) constexpr { r(NSize + i) = xs[i]; });
return r; __host__ __device__ static constexpr index_t Size() { return 0; }
}
}; };
template <typename X, typename... Xs> template <typename X, typename... Xs>
__host__ __device__ constexpr auto make_array(const X& x, const Xs&... xs) __host__ __device__ constexpr auto make_array(const X& x, const Xs&... xs)
{ {
return Array<X, sizeof...(xs) + 1>{{x, xs...}}; return Array<X, sizeof...(Xs) + 1>{{x, static_cast<X>(xs)...}};
} }
template <typename T> template <typename TData, index_t NSize>
__host__ __device__ constexpr auto to_array(const T& x) __host__ __device__ constexpr auto push_back(Array<TData, NSize>& a, const TData& x)
{ {
Array<typename T::data_type, T::Size()> y; Array<TData, NSize + 1> r;
static_for<0, T::Size(), 1>{}([&](auto i) { y.At(i) = x.At(i); }); static_for<0, NSize, 1>{}([&r, &a ](auto i) constexpr { r(i) = a[i]; });
return y; r(Number<NSize>{}) = x;
}
template <typename TData, index_t NSize> return r;
__host__ __device__ constexpr auto make_zero_array()
{
constexpr auto zero_sequence = typename uniform_sequence_gen<NSize, 0>::type{};
constexpr auto zero_array = to_array(zero_sequence);
return zero_array;
} }
} // namespace ck } // namespace ck
......
...@@ -63,7 +63,7 @@ struct ford_impl ...@@ -63,7 +63,7 @@ struct ford_impl
for(index_t i = 0; i < RemainLengths::Front(); ++i) for(index_t i = 0; i < RemainLengths::Front(); ++i)
{ {
ford_impl<decltype(RemainLengths::PopFront()), Orders>{}( ford_impl<decltype(RemainLengths::PopFront()), Orders>{}(
f, current_ordered_id.PushBack(i)); f, push_back(current_ordered_id, i));
} }
} }
}; };
......
...@@ -386,43 +386,5 @@ struct StaticallyIndexedArray<TData, 22> : Tuple<TData, ...@@ -386,43 +386,5 @@ struct StaticallyIndexedArray<TData, 22> : Tuple<TData,
using data_type = TData; using data_type = TData;
}; };
template <typename TData, index_t NSize, typename X>
__host__ __device__ constexpr auto operator+=(StaticallyIndexedArray<TData, NSize>& y, const X& x)
{
static_assert(X::Size() == NSize, "wrong! size not the same");
static_for<0, NSize, 1>{}([&](auto i) { y(i) += x[i]; });
return y;
}
template <typename TData, index_t NSize, typename X>
__host__ __device__ constexpr auto operator-=(StaticallyIndexedArray<TData, NSize>& y, const X& x)
{
static_assert(X::Size() == NSize, "wrong! size not the same");
static_for<0, NSize, 1>{}([&](auto i) { y(i) -= x[i]; });
return y;
}
template <typename TData, index_t NSize, typename T>
__host__ __device__ constexpr auto operator+(const StaticallyIndexedArray<TData, NSize>& a,
const T& b)
{
using type = StaticallyIndexedArray<TData, NSize>;
static_assert(T::Size() == NSize, "wrong! size not the same");
type r;
static_for<0, NSize, 1>{}([&](auto i) { r(i) = a[i] + b[i]; });
return r;
}
template <typename TData, index_t NSize, typename T>
__host__ __device__ constexpr auto operator-(const StaticallyIndexedArray<TData, NSize>& a,
const T& b)
{
using type = StaticallyIndexedArray<TData, NSize>;
static_assert(T::Size() == NSize, "wrong! size not the same");
type r;
static_for<0, NSize, 1>{}([&](auto i) { r(i) = a[i] - b[i]; });
return r;
}
} // namespace ck } // namespace ck
#endif #endif
...@@ -29,16 +29,16 @@ void device_dummy_dynamic_transform_v2(InDesc, ...@@ -29,16 +29,16 @@ void device_dummy_dynamic_transform_v2(InDesc,
using TDevice = typename conditional<is_same<half_float::half, T>::value, half_t, T>::type; using TDevice = typename conditional<is_same<half_float::half, T>::value, half_t, T>::type;
const auto in_nchw_desc = make_dynamic_native_tensor_descriptor_v2( const auto in_nchw_desc = make_dynamic_native_tensor_descriptor_v2(
to_array(InDesc::GetLengths()), to_array(InDesc::GetStrides())); to_multi_index(InDesc::GetLengths()), to_multi_index(InDesc::GetStrides()));
const auto wei_kcyx_desc = make_dynamic_native_tensor_descriptor_v2( const auto wei_kcyx_desc = make_dynamic_native_tensor_descriptor_v2(
to_array(WeiDesc::GetLengths()), to_array(WeiDesc::GetStrides())); to_multi_index(WeiDesc::GetLengths()), to_multi_index(WeiDesc::GetStrides()));
const auto out_nkhw_desc = make_dynamic_native_tensor_descriptor_v2( const auto out_nkhw_desc = make_dynamic_native_tensor_descriptor_v2(
to_array(OutDesc::GetLengths()), to_array(OutDesc::GetStrides())); to_multi_index(OutDesc::GetLengths()), to_multi_index(OutDesc::GetStrides()));
const auto conv_strides = to_array(ConvStrides{}); const auto conv_strides = to_multi_index(ConvStrides{});
const auto conv_dilations = to_array(ConvDilations{}); const auto conv_dilations = to_multi_index(ConvDilations{});
const auto in_left_pads = to_array(InLeftPads{}); const auto in_left_pads = to_multi_index(InLeftPads{});
const auto in_right_pads = to_array(InRightPads{}); const auto in_right_pads = to_multi_index(InRightPads{});
const auto tensor_descs = map_convolution_into_gemm_v2(wei_kcyx_desc, const auto tensor_descs = map_convolution_into_gemm_v2(wei_kcyx_desc,
in_nchw_desc, in_nchw_desc,
......
...@@ -549,7 +549,7 @@ int main(int argc, char* argv[]) ...@@ -549,7 +549,7 @@ int main(int argc, char* argv[])
#endif #endif
} }
#if 0 #if 1
device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc, device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
in_nchw, in_nchw,
wei_kcyx_desc, wei_kcyx_desc,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment