Commit a339028c authored by Chao Liu's avatar Chao Liu
Browse files

adding statically indexed array

parent e7f633c5
...@@ -123,7 +123,7 @@ struct DummyDynamicTransform_v2_1 ...@@ -123,7 +123,7 @@ struct DummyDynamicTransform_v2_1
const auto in_gemmk_gemmn_coord_step = make_dynamic_tensor_coordinate_step_v2( const auto in_gemmk_gemmn_coord_step = make_dynamic_tensor_coordinate_step_v2(
in_gemmk_gemmn_global_desc, make_multi_index(1, 0)); in_gemmk_gemmn_global_desc, make_multi_index(1, 0));
#pragma unroll #pragma unroll 1
for(index_t i = 0; i < 10; ++i) for(index_t i = 0; i < 10; ++i)
{ {
move_dynamic_tensor_coordinate_v2( move_dynamic_tensor_coordinate_v2(
...@@ -293,14 +293,14 @@ struct DummyDynamicTransform_v2_2 ...@@ -293,14 +293,14 @@ struct DummyDynamicTransform_v2_2
// initialize idx // initialize idx
static_for<0, 2, 1>{}([&](auto i) { idx(i) = p_wei_global[get_thread_local_1d_id() + i]; }); static_for<0, 2, 1>{}([&](auto i) { idx(i) = p_wei_global[get_thread_local_1d_id() + i]; });
#if 0 #if 1
const index_t niter = p_wei_global[10]; const index_t niter = p_wei_global[10];
auto in_gemmk_gemmn_coord = auto in_gemmk_gemmn_coord =
make_dynamic_tensor_coordinate_v2(in_gemmk_gemmn_global_desc, idx); make_dynamic_tensor_coordinate_v2(in_gemmk_gemmn_global_desc, idx);
const auto in_gemmk_gemmn_coord_step = make_dynamic_tensor_coordinate_step_v2( const auto in_gemmk_gemmn_coord_step = make_dynamic_tensor_coordinate_step_v2(
in_gemmk_gemmn_global_desc, MultiIndex<2>{{1, 0}}); in_gemmk_gemmn_global_desc, make_multi_index(1, 0));
for(index_t iter = 0; iter < niter; ++iter) for(index_t iter = 0; iter < niter; ++iter)
{ {
......
...@@ -119,8 +119,8 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer ...@@ -119,8 +119,8 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
const auto block_work_id = block_work_desc.CalculateClusterIndex(get_block_1d_id()); const auto block_work_id = block_work_desc.CalculateClusterIndex(get_block_1d_id());
const index_t k_block_data_on_global = block_work_id[0] * KPerBlock; const index_t k_block_data_on_global = block_work_id[I0] * KPerBlock;
const index_t b_block_data_on_global = block_work_id[1] * BPerBlock; const index_t b_block_data_on_global = block_work_id[I1] * BPerBlock;
// input tensor // input tensor
// global tensor in global memory // global tensor in global memory
......
...@@ -14,27 +14,27 @@ struct DynamicPassThrough ...@@ -14,27 +14,27 @@ struct DynamicPassThrough
const UpperIndex up_lengths_; const UpperIndex up_lengths_;
#if 0 #if 0
__host__ __device__ explicit constexpr DynamicPassThrough(const DynamicPassThrough&) = default; __host__ __device__ constexpr DynamicPassThrough(const DynamicPassThrough&) = default;
__host__ __device__ explicit constexpr DynamicPassThrough(DynamicPassThrough&&) = default; __host__ __device__ constexpr DynamicPassThrough(DynamicPassThrough&&) = default;
#else #else
__host__ __device__ explicit constexpr DynamicPassThrough(const DynamicPassThrough& other) __host__ __device__ constexpr DynamicPassThrough(const DynamicPassThrough& other)
: up_lengths_{other.up_lengths_} : up_lengths_{other.up_lengths_}
{ {
} }
__host__ __device__ explicit constexpr DynamicPassThrough(DynamicPassThrough&& other) __host__ __device__ constexpr DynamicPassThrough(DynamicPassThrough&& other)
: up_lengths_{other.up_lengths_} : up_lengths_{other.up_lengths_}
{ {
} }
#endif #endif
__host__ __device__ explicit constexpr DynamicPassThrough(const index_t& low_length) __host__ __device__ constexpr DynamicPassThrough(const index_t& low_length)
: up_lengths_{make_multi_index(low_length)} : up_lengths_{make_multi_index(low_length)}
{ {
} }
__host__ __device__ explicit constexpr DynamicPassThrough() : up_lengths_{0} {} __host__ __device__ constexpr DynamicPassThrough() : up_lengths_{0} {}
__host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; } __host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; }
...@@ -89,28 +89,27 @@ struct DynamicLeftPad ...@@ -89,28 +89,27 @@ struct DynamicLeftPad
const index_t left_pad_; const index_t left_pad_;
#if 0 #if 0
__host__ __device__ explicit constexpr DynamicLeftPad(const DynamicLeftPad&) = default; __host__ __device__ constexpr DynamicLeftPad(const DynamicLeftPad&) = default;
__host__ __device__ explicit constexpr DynamicLeftPad(DynamicLeftPad&&) = default; __host__ __device__ constexpr DynamicLeftPad(DynamicLeftPad&&) = default;
#else #else
__host__ __device__ explicit constexpr DynamicLeftPad(const DynamicLeftPad& other) __host__ __device__ constexpr DynamicLeftPad(const DynamicLeftPad& other)
: up_lengths_{other.up_lengths_}, left_pad_{other.left_pad_} : up_lengths_{other.up_lengths_}, left_pad_{other.left_pad_}
{ {
} }
__host__ __device__ explicit constexpr DynamicLeftPad(DynamicLeftPad&& other) __host__ __device__ constexpr DynamicLeftPad(DynamicLeftPad&& other)
: up_lengths_{other.up_lengths_}, left_pad_{other.left_pad_} : up_lengths_{other.up_lengths_}, left_pad_{other.left_pad_}
{ {
} }
#endif #endif
__host__ __device__ explicit constexpr DynamicLeftPad(const index_t& low_length, __host__ __device__ constexpr DynamicLeftPad(const index_t& low_length, const index_t& left_pad)
const index_t& left_pad)
: up_lengths_{make_multi_index(low_length + left_pad)}, left_pad_{left_pad} : up_lengths_{make_multi_index(low_length + left_pad)}, left_pad_{left_pad}
{ {
} }
__host__ __device__ explicit constexpr DynamicLeftPad() : up_lengths_{0}, left_pad_{0} {} __host__ __device__ constexpr DynamicLeftPad() : up_lengths_{0}, left_pad_{0} {}
__host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; } __host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; }
...@@ -168,18 +167,18 @@ struct DynamicRightPad ...@@ -168,18 +167,18 @@ struct DynamicRightPad
const index_t right_pad_; const index_t right_pad_;
#if 0 #if 0
__host__ __device__ explicit constexpr DynamicRightPad(const DynamicRightPad&) = default; __host__ __device__ constexpr DynamicRightPad(const DynamicRightPad&) = default;
__host__ __device__ explicit constexpr DynamicRightPad(DynamicRightPad&&) = default; __host__ __device__ constexpr DynamicRightPad(DynamicRightPad&&) = default;
#else #else
__host__ __device__ explicit constexpr DynamicRightPad(const DynamicRightPad& other) __host__ __device__ constexpr DynamicRightPad(const DynamicRightPad& other)
: up_lengths_{other.up_lengths_}, : up_lengths_{other.up_lengths_},
low_length_{other.low_length_}, low_length_{other.low_length_},
right_pad_{other.right_pad_} right_pad_{other.right_pad_}
{ {
} }
__host__ __device__ explicit constexpr DynamicRightPad(DynamicRightPad&& other) __host__ __device__ constexpr DynamicRightPad(DynamicRightPad&& other)
: up_lengths_{other.up_lengths_}, : up_lengths_{other.up_lengths_},
low_length_{other.low_length_}, low_length_{other.low_length_},
right_pad_{other.right_pad_} right_pad_{other.right_pad_}
...@@ -187,16 +186,15 @@ struct DynamicRightPad ...@@ -187,16 +186,15 @@ struct DynamicRightPad
} }
#endif #endif
__host__ __device__ explicit constexpr DynamicRightPad(const index_t& low_length, __host__ __device__ constexpr DynamicRightPad(const index_t& low_length,
const index_t& right_pad) const index_t& right_pad)
: up_lengths_{make_multi_index(low_length + right_pad)}, : up_lengths_{make_multi_index(low_length + right_pad)},
low_length_{low_length}, low_length_{low_length},
right_pad_{right_pad} right_pad_{right_pad}
{ {
} }
__host__ __device__ explicit constexpr DynamicRightPad() __host__ __device__ constexpr DynamicRightPad() : up_lengths_{0}, low_length_{0}, right_pad_{0}
: up_lengths_{0}, low_length_{0}, right_pad_{0}
{ {
} }
...@@ -256,35 +254,35 @@ struct DynamicEmbed ...@@ -256,35 +254,35 @@ struct DynamicEmbed
const UpperIndex coefficients_; const UpperIndex coefficients_;
#if 0 #if 0
__host__ __device__ explicit constexpr DynamicEmbed(const DynamicEmbed&) = default; __host__ __device__ constexpr DynamicEmbed(const DynamicEmbed&) = default;
__host__ __device__ explicit constexpr DynamicEmbed(DynamicEmbed&&) = default; __host__ __device__ constexpr DynamicEmbed(DynamicEmbed&&) = default;
#else #else
__host__ __device__ explicit constexpr DynamicEmbed(const DynamicEmbed& other) __host__ __device__ constexpr DynamicEmbed(const DynamicEmbed& other)
: up_lengths_{other.up_lengths_}, coefficients_{other.coefficients_} : up_lengths_{other.up_lengths_}, coefficients_{other.coefficients_}
{ {
} }
__host__ __device__ explicit constexpr DynamicEmbed(DynamicEmbed&& other) __host__ __device__ constexpr DynamicEmbed(DynamicEmbed&& other)
: up_lengths_{other.up_lengths_}, coefficients_{other.coefficients_} : up_lengths_{other.up_lengths_}, coefficients_{other.coefficients_}
{ {
} }
#endif #endif
__host__ __device__ explicit constexpr DynamicEmbed(const UpperIndex& up_lengths, __host__ __device__ constexpr DynamicEmbed(const UpperIndex& up_lengths,
const UpperIndex& coefficients) const UpperIndex& coefficients)
: up_lengths_{up_lengths}, coefficients_{coefficients} : up_lengths_{up_lengths}, coefficients_{coefficients}
{ {
static_assert(UpperIndex::Size() == NDimUp, "wrong! # of dimensions not consistent"); static_assert(UpperIndex::Size() == NDimUp, "wrong! # of dimensions not consistent");
} }
template <typename UpperLengths, typename Coefficients> template <typename UpperLengths, typename Coefficients>
__host__ __device__ explicit constexpr DynamicEmbed(const UpperLengths& up_lengths, __host__ __device__ constexpr DynamicEmbed(const UpperLengths& up_lengths,
const Coefficients& coefficients) const Coefficients& coefficients)
: up_lengths_{up_lengths}, coefficients_{coefficients} : up_lengths_{up_lengths}, coefficients_{coefficients}
{ {
} }
__host__ __device__ explicit constexpr DynamicEmbed() __host__ __device__ constexpr DynamicEmbed()
: up_lengths_{make_zero_multi_index<NDimUp>()}, : up_lengths_{make_zero_multi_index<NDimUp>()},
coefficients_{make_zero_multi_index<NDimUp>()} coefficients_{make_zero_multi_index<NDimUp>()}
{ {
...@@ -352,18 +350,18 @@ struct DynamicMerge ...@@ -352,18 +350,18 @@ struct DynamicMerge
const UpperIndex up_lengths_; const UpperIndex up_lengths_;
#if 0 #if 0
__host__ __device__ explicit constexpr DynamicMerge(const DynamicMerge&) = default; __host__ __device__ constexpr DynamicMerge(const DynamicMerge&) = default;
__host__ __device__ explicit constexpr DynamicMerge(DynamicMerge&&) = default; __host__ __device__ constexpr DynamicMerge(DynamicMerge&&) = default;
#else #else
__host__ __device__ explicit constexpr DynamicMerge(const DynamicMerge& other) __host__ __device__ constexpr DynamicMerge(const DynamicMerge& other)
: low_lengths_{other.low_lengths_}, : low_lengths_{other.low_lengths_},
low_lengths_scan_{other.low_lengths_scan_}, low_lengths_scan_{other.low_lengths_scan_},
up_lengths_{other.up_lengths_} up_lengths_{other.up_lengths_}
{ {
} }
__host__ __device__ explicit constexpr DynamicMerge(DynamicMerge&& other) __host__ __device__ constexpr DynamicMerge(DynamicMerge&& other)
: low_lengths_{other.low_lengths_}, : low_lengths_{other.low_lengths_},
low_lengths_scan_{other.low_lengths_scan_}, low_lengths_scan_{other.low_lengths_scan_},
up_lengths_{other.up_lengths_} up_lengths_{other.up_lengths_}
...@@ -371,9 +369,9 @@ struct DynamicMerge ...@@ -371,9 +369,9 @@ struct DynamicMerge
} }
#endif #endif
__host__ __device__ explicit constexpr DynamicMerge(const LowerIndex& low_lengths) __host__ __device__ constexpr DynamicMerge(const LowerIndex& low_lengths)
: low_lengths_{low_lengths}, : low_lengths_{low_lengths},
low_lengths_scan_{reverse_exclusive_scan_on_array( low_lengths_scan_{reverse_exclusive_scan_on_array<index_t, NDimLow>(
low_lengths, math::multiplies<index_t>{}, index_t{1})}, low_lengths, math::multiplies<index_t>{}, index_t{1})},
up_lengths_{make_multi_index( up_lengths_{make_multi_index(
reduce_on_array(low_lengths, math::multiplies<index_t>(), index_t{1}))} reduce_on_array(low_lengths, math::multiplies<index_t>(), index_t{1}))}
...@@ -381,7 +379,7 @@ struct DynamicMerge ...@@ -381,7 +379,7 @@ struct DynamicMerge
static_assert(LowerIndex::Size() == NDimLow, "wrong!"); static_assert(LowerIndex::Size() == NDimLow, "wrong!");
} }
__host__ __device__ explicit constexpr DynamicMerge() __host__ __device__ constexpr DynamicMerge()
: low_lengths_{make_zero_multi_index<NDimLow>()}, : low_lengths_{make_zero_multi_index<NDimLow>()},
low_lengths_scan_{make_zero_multi_index<NDimLow>()}, low_lengths_scan_{make_zero_multi_index<NDimLow>()},
up_lengths_{0} up_lengths_{0}
...@@ -515,14 +513,14 @@ struct DynamicUnMerge ...@@ -515,14 +513,14 @@ struct DynamicUnMerge
const UpperIndex up_lengths_; const UpperIndex up_lengths_;
const UpperIndex up_lengths_scan_; const UpperIndex up_lengths_scan_;
__host__ __device__ explicit constexpr DynamicUnMerge(const UpperIndex& up_lengths) __host__ __device__ constexpr DynamicUnMerge(const UpperIndex& up_lengths)
: up_lengths_{up_lengths}, : up_lengths_{up_lengths},
up_lengths_scan_{ up_lengths_scan_{
reverse_exclusive_scan_on_array(up_lengths, math::multiplies<index_t>(), index_t{1})} reverse_exclusive_scan_on_array(up_lengths, math::multiplies<index_t>(), index_t{1})}
{ {
} }
__host__ __device__ explicit constexpr DynamicUnMerge() __host__ __device__ constexpr DynamicUnMerge()
: up_lengths_{make_zero_multi_index<NDimUp>()}, : up_lengths_{make_zero_multi_index<NDimUp>()},
up_lengths_scan_{make_zero_multi_index<NDimUp>()} up_lengths_scan_{make_zero_multi_index<NDimUp>()}
{ {
...@@ -575,11 +573,9 @@ struct DynamicFreeze ...@@ -575,11 +573,9 @@ struct DynamicFreeze
const index_t low_idx_; const index_t low_idx_;
__host__ __device__ explicit constexpr DynamicFreeze(const index_t& low_idx) : low_idx_{low_idx} __host__ __device__ constexpr DynamicFreeze(const index_t& low_idx) : low_idx_{low_idx} {}
{
}
__host__ __device__ explicit constexpr DynamicFreeze() : low_idx_{0} {} __host__ __device__ constexpr DynamicFreeze() : low_idx_{0} {}
__host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; } __host__ __device__ static constexpr index_t GetNumOfLowerDimension() { return 1; }
......
...@@ -119,7 +119,10 @@ struct DynamicTensorDescriptor_v2 ...@@ -119,7 +119,10 @@ struct DynamicTensorDescriptor_v2
return reduce_on_array(GetLengths(), math::multiplies<index_t>{}, index_t{1}); return reduce_on_array(GetLengths(), math::multiplies<index_t>{}, index_t{1});
} }
__host__ __device__ constexpr index_t GetElementSpaceSize() const { return hidden_lengths_[0]; } __host__ __device__ constexpr index_t GetElementSpaceSize() const
{
return hidden_lengths_[Number<0>{}];
}
template <typename Idx> template <typename Idx>
__host__ __device__ constexpr index_t CalculateOffset(const Idx& idx) const __host__ __device__ constexpr index_t CalculateOffset(const Idx& idx) const
...@@ -152,7 +155,7 @@ struct DynamicTensorDescriptor_v2 ...@@ -152,7 +155,7 @@ struct DynamicTensorDescriptor_v2
tran.CalculateLowerIndex(idx_low, idx_up); tran.CalculateLowerIndex(idx_low, idx_up);
}); });
return idx_hidden[0]; return idx_hidden[Number<0>{}];
#endif #endif
} }
...@@ -266,7 +269,7 @@ struct DynamicTensorCoordinate_v2 ...@@ -266,7 +269,7 @@ struct DynamicTensorCoordinate_v2
__host__ __device__ constexpr const auto& GetIndex() const { return GetVisibleIndex(); } __host__ __device__ constexpr const auto& GetIndex() const { return GetVisibleIndex(); }
__host__ __device__ constexpr index_t GetOffset() const { return idx_hidden_[0]; } __host__ __device__ constexpr index_t GetOffset() const { return idx_hidden_[Number<0>{}]; }
// private: // private:
__host__ __device__ constexpr const auto& GetHiddenIndex() const { return idx_hidden_; } __host__ __device__ constexpr const auto& GetHiddenIndex() const { return idx_hidden_; }
...@@ -517,7 +520,7 @@ __host__ __device__ void move_dynamic_tensor_coordinate_v2(const TensorDesc& ten ...@@ -517,7 +520,7 @@ __host__ __device__ void move_dynamic_tensor_coordinate_v2(const TensorDesc& ten
using HiddenIndex = MultiIndex<ndim_hidden>; using HiddenIndex = MultiIndex<ndim_hidden>;
// this is what needs to be calculated // this is what needs to be calculated
auto idx_diff_hidden = HiddenIndex{{0}}; auto idx_diff_hidden = make_zero_multi_index<ndim_hidden>();
// initialize visible index diff // initialize visible index diff
// idx_diff_hidden_pick_visible contains reference to idx_diff_hidden // idx_diff_hidden_pick_visible contains reference to idx_diff_hidden
......
...@@ -208,7 +208,7 @@ struct Merge ...@@ -208,7 +208,7 @@ struct Merge
{ {
LowerIndex idx_low; LowerIndex idx_low;
index_t itmp = idx_up[0]; index_t itmp = idx_up[Number<0>{}];
constexpr auto pseudo_low_strides = constexpr auto pseudo_low_strides =
reverse_inclusive_scan_sequence( reverse_inclusive_scan_sequence(
...@@ -218,7 +218,7 @@ struct Merge ...@@ -218,7 +218,7 @@ struct Merge
static_for<0, nDimLow - 1, 1>{}( static_for<0, nDimLow - 1, 1>{}(
lambda_CalculateLowerIndex<decltype(pseudo_low_strides)>(itmp, idx_low)); lambda_CalculateLowerIndex<decltype(pseudo_low_strides)>(itmp, idx_low));
idx_low(nDimLow - 1) = itmp / pseudo_low_strides[nDimLow - 1]; idx_low(Number<nDimLow - 1>{}) = itmp / pseudo_low_strides[Number<nDimLow - 1>{}];
return idx_low; return idx_low;
} }
......
...@@ -11,8 +11,10 @@ namespace ck { ...@@ -11,8 +11,10 @@ namespace ck {
template <typename Arr, typename Picks> template <typename Arr, typename Picks>
struct ArrayElementPicker struct ArrayElementPicker
{ {
using type = ArrayElementPicker; using type = ArrayElementPicker;
#if 0
using data_type = typename Arr::data_type; using data_type = typename Arr::data_type;
#endif
__host__ __device__ constexpr ArrayElementPicker() = delete; __host__ __device__ constexpr ArrayElementPicker() = delete;
...@@ -26,20 +28,20 @@ struct ArrayElementPicker ...@@ -26,20 +28,20 @@ struct ArrayElementPicker
__host__ __device__ static constexpr auto Size() { return Picks::Size(); } __host__ __device__ static constexpr auto Size() { return Picks::Size(); }
template <index_t I> template <index_t I>
__host__ __device__ constexpr const data_type& At(Number<I>) const __host__ __device__ constexpr const auto& At(Number<I> i) const
{ {
static_assert(I < Size(), "wrong!"); static_assert(I < Size(), "wrong!");
constexpr auto IP = Picks{}[I]; constexpr auto IP = Picks{}[i];
return mArray[IP]; return mArray[IP];
} }
template <index_t I> template <index_t I>
__host__ __device__ constexpr data_type& At(Number<I>) __host__ __device__ constexpr auto& At(Number<I> i)
{ {
static_assert(I < Size(), "wrong!"); static_assert(I < Size(), "wrong!");
constexpr auto IP = Picks{}[I]; constexpr auto IP = Picks{}[i];
return mArray(IP); return mArray(IP);
} }
......
...@@ -133,7 +133,7 @@ __host__ __device__ constexpr auto operator*(TData v, Array<TData, NSize> a) ...@@ -133,7 +133,7 @@ __host__ __device__ constexpr auto operator*(TData v, Array<TData, NSize> a)
template <typename TData, typename Arr, typename Reduce> template <typename TData, typename Arr, typename Reduce>
__host__ __device__ constexpr TData reduce_on_array(const Arr& a, Reduce f, TData init) __host__ __device__ constexpr TData reduce_on_array(const Arr& a, Reduce f, TData init)
{ {
static_assert(is_same<typename Arr::data_type, TData>::value, "wrong! different data type"); // static_assert(is_same<typename Arr::data_type, TData>::value, "wrong! different data type");
static_assert(Arr::Size() > 0, "wrong"); static_assert(Arr::Size() > 0, "wrong");
TData result = init; TData result = init;
...@@ -151,12 +151,22 @@ reverse_inclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in ...@@ -151,12 +151,22 @@ reverse_inclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
TData r = init; TData r = init;
#if 0
#pragma unroll #pragma unroll
for(index_t i = NSize - 1; i >= 0; --i) for(index_t i = NSize - 1; i >= 0; --i)
{ {
r = f(r, x[i]); r = f(r, x[i]);
y(i) = r; y(i) = r;
} }
#else
static_for<NSize - 1, 0, -1>{}([&](auto i) {
r = f(r, x[i]);
y(i) = r;
});
r = f(r, x[Number<0>{}]);
y(Number<0>{}) = r;
#endif
return y; return y;
} }
...@@ -169,6 +179,7 @@ reverse_exclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in ...@@ -169,6 +179,7 @@ reverse_exclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
TData r = init; TData r = init;
#if 0
#pragma unroll #pragma unroll
for(index_t i = NSize - 1; i > 0; --i) for(index_t i = NSize - 1; i > 0; --i)
{ {
...@@ -177,6 +188,14 @@ reverse_exclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in ...@@ -177,6 +188,14 @@ reverse_exclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
} }
y(0) = r; y(0) = r;
#else
static_for<NSize - 1, 0, -1>{}([&](auto i) {
y(i) = r;
r = f(r, x[i]);
});
y(Number<0>{}) = r;
#endif
return y; return y;
} }
......
...@@ -10,7 +10,7 @@ namespace ck { ...@@ -10,7 +10,7 @@ namespace ck {
template <typename T> template <typename T>
__host__ __device__ void print_array(const char* s, T a) __host__ __device__ void print_array(const char* s, T a)
{ {
using data_type = typename decltype(a)::data_type; using data_type = decltype(a.At(Number<0>{}));
constexpr index_t nsize = a.Size(); constexpr index_t nsize = a.Size();
if constexpr(is_same<data_type, uint32_t>{}) if constexpr(is_same<data_type, uint32_t>{})
...@@ -30,7 +30,7 @@ __host__ __device__ void print_array(const char* s, T a) ...@@ -30,7 +30,7 @@ __host__ __device__ void print_array(const char* s, T a)
template <typename T> template <typename T>
__host__ __device__ void print_array_v2(const char* s, T a) __host__ __device__ void print_array_v2(const char* s, T a)
{ {
using data_type = typename decltype(a)::data_type; using data_type = decltype(a.At(Number<0>{}));
constexpr index_t nsize = a.Size(); constexpr index_t nsize = a.Size();
if constexpr(is_same<data_type, uint32_t>{}) if constexpr(is_same<data_type, uint32_t>{})
......
...@@ -7,493 +7,6 @@ ...@@ -7,493 +7,6 @@
namespace ck { namespace ck {
#if 0
template <typename T, index_t NSize>
struct StaticallyIndexedArray
{
};
template <typename T>
struct StaticallyIndexedArray<T, 0> : public Tuple<>
{
using data_type = T;
using base = Tuple<>;
__host__ __device__ explicit constexpr StaticallyIndexedArray() : base() {}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 1> : public Tuple<T>
{
using type = StaticallyIndexedArray;
using data_type = T;
using base = Tuple<T>;
static constexpr index_t nsize = base::Size();
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
template <typename Y>
__host__
__device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray<Y, nsize>& y)
: base(static_cast<const Tuple<Y>&>(y))
{
}
template <typename Y>
__host__ __device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray<Y, nsize>&& y)
: base(static_cast<Tuple<Y>&&>(y))
{
}
#if 0
template <typename... Ys,
typename std::enable_if<sizeof...(Ys) == base::Size(),
bool>::type = false>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
static_assert(sizeof...(Ys) == nsize, "wrong! inconsistent size");
}
#else
template <typename Y>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Y&& y)
: base(std::forward<Y>(y))
{
}
#endif
};
template <typename T>
struct StaticallyIndexedArray<T, 2> : public Tuple<T, T>
{
using data_type = T;
using base = Tuple<T, T>;
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
template <typename Y>
__host__
__device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray<Y, 2>& y)
: base(static_cast<const Tuple<Y, Y>&>(y))
{
}
template <typename Y>
__host__ __device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray<Y, 2>&& y)
: base(static_cast<Tuple<Y, Y>&&>(y))
{
}
template <typename... Ys,
typename std::enable_if<sizeof...(Ys) == base::Size(),
bool>::type = false>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
static_assert(sizeof...(Ys) == 2, "wrong! inconsistent size");
}
};
template <typename T>
struct StaticallyIndexedArray<T, 3> : public Tuple<T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 4> : public Tuple<T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 5> : public Tuple<T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 6> : public Tuple<T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 7> : public Tuple<T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 8> : public Tuple<T, T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 9> : public Tuple<T, T, T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 10> : public Tuple<T, T, T, T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 11> : public Tuple<T, T, T, T, T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 12> : public Tuple<T, T, T, T, T, T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 13> : public Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 14> : public Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 15> : public Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 16> : public Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 17>
: public Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 18>
: public Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 19>
: public Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 20>
: public Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 21>
: public Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 22>
: public Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>
{
using data_type = T;
using base = Tuple<T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T>;
template <typename... Ys>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
#else
namespace detail { namespace detail {
template <typename T, index_t NSize> template <typename T, index_t NSize>
...@@ -507,48 +20,8 @@ using same_type_tuple = decltype(generate_same_type_tuple<T, NSize>()); ...@@ -507,48 +20,8 @@ using same_type_tuple = decltype(generate_same_type_tuple<T, NSize>());
} // namespace detail } // namespace detail
#if 0
template <typename T, index_t NSize>
struct StaticallyIndexedArray : public detail::same_type_tuple<T, NSize>
{
using type = StaticallyIndexedArray;
using data_type = T;
using base = detail::same_type_tuple<T, NSize>;
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
template <typename Y>
__host__ __device__ explicit constexpr StaticallyIndexedArray(
const StaticallyIndexedArray<Y, NSize>& y)
: base(static_cast<const detail::same_type_tuple<Y, NSize>&>(y))
{
}
template <typename Y>
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray<Y, NSize>&& y)
: base(static_cast<detail::same_type_tuple<Y, NSize>&&>(y))
{
}
template <typename... Ys,
typename std::enable_if<sizeof...(Ys) == base::Size(), bool>::type = false>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
static_assert(sizeof...(Ys) == NSize, "wrong! inconsistent size");
}
};
#else
template <typename T, index_t NSize> template <typename T, index_t NSize>
using StaticallyIndexedArray = detail::same_type_tuple<T, NSize>; using StaticallyIndexedArray = detail::same_type_tuple<T, NSize>;
#endif
#endif
template <typename X, typename... Xs> template <typename X, typename... Xs>
__host__ __device__ constexpr auto make_statically_indexed_array(const X& x, const Xs&... xs) __host__ __device__ constexpr auto make_statically_indexed_array(const X& x, const Xs&... xs)
...@@ -563,5 +36,42 @@ __host__ __device__ constexpr auto make_statically_indexed_array() ...@@ -563,5 +36,42 @@ __host__ __device__ constexpr auto make_statically_indexed_array()
return StaticallyIndexedArray<X, 0>(); return StaticallyIndexedArray<X, 0>();
} }
template <typename TData, index_t NSize, typename Reduce>
__host__ __device__ constexpr auto
reverse_exclusive_scan_on_array(const StaticallyIndexedArray<TData, NSize>& x, Reduce f, TData init)
{
StaticallyIndexedArray<TData, NSize> y;
TData r = init;
static_for<NSize - 1, 0, -1>{}([&](auto i) {
y(i) = r;
r = f(r, x[i]);
});
y(Number<0>{}) = r;
return y;
}
template <typename TData, index_t NSize, typename Reduce>
__host__ __device__ constexpr auto
reverse_inclusive_scan_on_array(const StaticallyIndexedArray<TData, NSize>& x, Reduce f, TData init)
{
StaticallyIndexedArray<TData, NSize> y;
TData r = init;
static_for<NSize - 1, 0, -1>{}([&](auto i) {
r = f(r, x[i]);
y(i) = r;
});
r = f(r, x[Number<0>{}]);
y(Number<0>{}) = r;
return y;
}
} // namespace ck } // namespace ck
#endif #endif
...@@ -124,9 +124,9 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X ...@@ -124,9 +124,9 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
__host__ __device__ explicit constexpr Tuple() : base() {} __host__ __device__ explicit constexpr Tuple() : base() {}
__host__ __device__ explicit constexpr Tuple(const Tuple&) = default; __host__ __device__ constexpr Tuple(const Tuple&) = default;
__host__ __device__ explicit constexpr Tuple(Tuple&&) = default; __host__ __device__ constexpr Tuple(Tuple&&) = default;
#if 0 #if 0
template <typename... Ys, template <typename... Ys,
......
...@@ -28,11 +28,11 @@ void device_dummy_dynamic_transform_v2(InDesc, ...@@ -28,11 +28,11 @@ void device_dummy_dynamic_transform_v2(InDesc,
using TDevice = typename conditional<is_same<half_float::half, T>::value, half_t, T>::type; using TDevice = typename conditional<is_same<half_float::half, T>::value, half_t, T>::type;
const auto in_nchw_desc = make_dynamic_native_tensor_descriptor_v2( const auto in_nchw_desc = make_dynamic_native_tensor_descriptor_v2<4>(
to_multi_index(InDesc::GetLengths()), to_multi_index(InDesc::GetStrides())); to_multi_index(InDesc::GetLengths()), to_multi_index(InDesc::GetStrides()));
const auto wei_kcyx_desc = make_dynamic_native_tensor_descriptor_v2( const auto wei_kcyx_desc = make_dynamic_native_tensor_descriptor_v2<4>(
to_multi_index(WeiDesc::GetLengths()), to_multi_index(WeiDesc::GetStrides())); to_multi_index(WeiDesc::GetLengths()), to_multi_index(WeiDesc::GetStrides()));
const auto out_nkhw_desc = make_dynamic_native_tensor_descriptor_v2( const auto out_nkhw_desc = make_dynamic_native_tensor_descriptor_v2<4>(
to_multi_index(OutDesc::GetLengths()), to_multi_index(OutDesc::GetStrides())); to_multi_index(OutDesc::GetLengths()), to_multi_index(OutDesc::GetStrides()));
const auto conv_strides = to_multi_index(ConvStrides{}); const auto conv_strides = to_multi_index(ConvStrides{});
...@@ -124,11 +124,7 @@ void device_dummy_dynamic_transform_v2(InDesc, ...@@ -124,11 +124,7 @@ void device_dummy_dynamic_transform_v2(InDesc,
index_t* const, index_t* const,
float* const, float* const,
float* const, float* const,
const decltype(in_gemmk_gemmn_global_desc), const decltype(in_gemmk_gemmn_global_desc)>,
const MultiIndex<2>,
const MultiIndex<2>,
const MultiIndex<2>,
const MultiIndex<2>>,
dim3(GridSize), dim3(GridSize),
dim3(BlockSize), dim3(BlockSize),
0, 0,
...@@ -136,11 +132,7 @@ void device_dummy_dynamic_transform_v2(InDesc, ...@@ -136,11 +132,7 @@ void device_dummy_dynamic_transform_v2(InDesc,
static_cast<index_t*>(wei_kcyx_device_buf.GetDeviceBuffer()), static_cast<index_t*>(wei_kcyx_device_buf.GetDeviceBuffer()),
static_cast<float*>(in_nchw_device_buf.GetDeviceBuffer()), static_cast<float*>(in_nchw_device_buf.GetDeviceBuffer()),
static_cast<float*>(out_nkhw_device_buf.GetDeviceBuffer()), static_cast<float*>(out_nkhw_device_buf.GetDeviceBuffer()),
in_gemmk_gemmn_global_desc, in_gemmk_gemmn_global_desc);
conv_strides,
conv_dilations,
in_left_pads,
in_right_pads);
#endif #endif
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment