"git@developer.sourcefind.cn:OpenDAS/mmdetection3d.git" did not exist on "8fb2cf6c3773977a3a1f473c43b065afc0181b48"
Commit 4687ef88 authored by Chao Liu's avatar Chao Liu
Browse files

made multi-index transform support compile-time and run-time info

parent 2558d019
...@@ -86,46 +86,43 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_pad ...@@ -86,46 +86,43 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_pad
// weight tensor // weight tensor
const auto wei_gemmk_gemmm_global_desc = transform_dynamic_tensor_descriptor( const auto wei_gemmk_gemmm_global_desc = transform_dynamic_tensor_descriptor(
make_dynamic_naive_tensor_descriptor_packed_v2(make_multi_index(K, C * Y * X)), make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(K, C * Y * X)),
make_tuple(DynamicPassThrough{K}, DynamicPassThrough{C * Y * X}), make_tuple(make_pass_through_transform(K), make_pass_through_transform(C * Y * X)),
make_tuple(Sequence<0>{}, Sequence<1>{}), make_tuple(Sequence<0>{}, Sequence<1>{}),
make_tuple(Sequence<1>{}, Sequence<0>{})); make_tuple(Sequence<1>{}, Sequence<0>{}));
// input tensor // input tensor
const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor( const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor(
in_n_c_hi_wi_global_desc, in_n_c_hi_wi_global_desc,
make_tuple(DynamicPassThrough{N}, make_tuple(make_pass_through_transform(N),
DynamicPassThrough{C}, make_pass_through_transform(C),
DynamicPad{Hi, InLeftPadH, InRightPadH}, make_pad_transform(Hi, InLeftPadH, InRightPadH),
DynamicPad{Wi, InLeftPadW, InRightPadW}), make_pad_transform(Wi, InLeftPadW, InRightPadW)),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}), make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{})); make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}));
const auto Hip = in_n_c_hip_wip_global_desc.GetLength(I2);
const auto Wip = in_n_c_hip_wip_global_desc.GetLength(I3);
const auto in_n_c_y_ho_x_wo_global_desc = transform_dynamic_tensor_descriptor( const auto in_n_c_y_ho_x_wo_global_desc = transform_dynamic_tensor_descriptor(
in_n_c_hip_wip_global_desc, in_n_c_hip_wip_global_desc,
make_tuple(DynamicPassThrough{N}, make_tuple(
DynamicPassThrough{C}, make_pass_through_transform(N),
DynamicEmbed<2>{make_multi_index(Y, Ho), make_pass_through_transform(C),
make_multi_index(ConvDilationH, ConvStrideH)}, make_embed_transform(make_tuple(Y, Ho), make_tuple(ConvDilationH, ConvStrideH)),
DynamicEmbed<2>{make_multi_index(X, Wo), make_embed_transform(make_tuple(X, Wo), make_tuple(ConvDilationW, ConvStrideW))),
make_multi_index(ConvDilationW, ConvStrideW)}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}), make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{})); make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{}));
const auto in_gemmk_gemmn_global_desc = transform_dynamic_tensor_descriptor( const auto in_gemmk_gemmn_global_desc = transform_dynamic_tensor_descriptor(
in_n_c_y_ho_x_wo_global_desc, in_n_c_y_ho_x_wo_global_desc,
make_tuple(DynamicMerge<3>{make_multi_index(C, Y, X)}, make_tuple(make_merge_transform(make_tuple(C, Y, X)),
DynamicMerge<3>{make_multi_index(N, Ho, Wo)}), make_merge_transform(make_tuple(N, Ho, Wo))),
make_tuple(Sequence<1, 2, 4>{}, Sequence<0, 3, 5>{}), make_tuple(Sequence<1, 2, 4>{}, Sequence<0, 3, 5>{}),
make_tuple(Sequence<0>{}, Sequence<1>{})); make_tuple(Sequence<0>{}, Sequence<1>{}));
// output tensor // output tensor
const auto out_gemmm_gemmn_global_desc = transform_dynamic_tensor_descriptor( const auto out_gemmm_gemmn_global_desc = transform_dynamic_tensor_descriptor(
make_dynamic_naive_tensor_descriptor_packed_v2(make_multi_index(N, K, Ho * Wo)), make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(N, K, Ho * Wo)),
make_tuple(DynamicPassThrough{K}, DynamicMerge<2>{make_multi_index(N, Ho * Wo)}), make_tuple(make_pass_through_transform(K),
make_merge_transform(make_tuple(N, Ho * Wo))),
make_tuple(Sequence<1>{}, Sequence<0, 2>{}), make_tuple(Sequence<1>{}, Sequence<0, 2>{}),
make_tuple(Sequence<0>{}, Sequence<1>{})); make_tuple(Sequence<0>{}, Sequence<1>{}));
...@@ -139,21 +136,17 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_pad ...@@ -139,21 +136,17 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_pad
throw std::runtime_error("wrong! GEMM size no divisible"); throw std::runtime_error("wrong! GEMM size no divisible");
} }
constexpr auto GemmM1 = GemmMPerThread * GemmMLevel0Cluster * GemmMLevel1Cluster; constexpr auto GemmM1 = Number<GemmMPerThread * GemmMLevel0Cluster * GemmMLevel1Cluster>{};
constexpr auto GemmN1 = GemmNPerThread * GemmNLevel0Cluster * GemmNLevel1Cluster; constexpr auto GemmN1 = Number<GemmNPerThread * GemmNLevel0Cluster * GemmNLevel1Cluster>{};
const auto GemmM0 = GemmM / GemmM1; const auto GemmM0 = GemmM / GemmM1;
const auto GemmN0 = GemmN / GemmN1; const auto GemmN0 = GemmN / GemmN1;
const auto GemmM0_GemmM1 = make_tuple(GemmM0, Number<GemmM1>{});
const auto GemmN0_GemmN1 = make_tuple(GemmN0, Number<GemmN1>{});
const auto out_gemmm0_gemmm1_gemmn0_gemmn1_global_desc = const auto out_gemmm0_gemmm1_gemmn0_gemmn1_global_desc =
transform_dynamic_tensor_descriptor( transform_dynamic_tensor_descriptor(
out_gemmm_gemmn_global_desc, out_gemmm_gemmn_global_desc,
make_tuple( make_tuple(make_unmerge_transform(make_tuple(GemmM0, GemmM1)),
DynamicUnMerge<2, false, remove_cv_t<decltype(GemmM0_GemmM1)>>{GemmM0_GemmM1}, make_unmerge_transform(make_tuple(GemmN0, GemmN1))),
DynamicUnMerge<2, false, remove_cv_t<decltype(GemmN0_GemmN1)>>{GemmN0_GemmN1}),
make_tuple(Sequence<0>{}, Sequence<1>{}), make_tuple(Sequence<0>{}, Sequence<1>{}),
make_tuple(Sequence<0, 1>{}, Sequence<2, 3>{})); make_tuple(Sequence<0, 1>{}, Sequence<2, 3>{}));
...@@ -770,65 +763,41 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad ...@@ -770,65 +763,41 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad
if(!(InLeftPadH == 0 && InLeftPadW == 0 && InRightPadH == 0 && InRightPadW == 0)) if(!(InLeftPadH == 0 && InLeftPadW == 0 && InRightPadH == 0 && InRightPadW == 0))
{ {
throw std::runtime_error("wrong! 1x1, stride 1, no padding"); throw std::runtime_error("wrong! no padding");
} }
// weight tensor // weight tensor
#if 0
// TODO implement graph optimization of tensor descriptor transformation
const auto wei_gemmk_gemmm_global_desc = transform_dynamic_tensor_descriptor(
wei_k_c_y_x_global_desc,
make_tuple(DynamicPassThrough{K}, DynamicMerge<3>{make_multi_index(C, Y, X)}),
make_tuple(Sequence<0>{}, Sequence<1, 2, 3>{}),
make_tuple(Sequence<1>{}, Sequence<0>{}));
#else
const auto wei_gemmk_gemmm_global_desc = transform_dynamic_tensor_descriptor( const auto wei_gemmk_gemmm_global_desc = transform_dynamic_tensor_descriptor(
make_dynamic_naive_tensor_descriptor_packed_v2(make_multi_index(K, C * Y * X)), make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(K, C * Y * X)),
make_tuple(DynamicPassThrough{K}, DynamicPassThrough{C * Y * X}), make_tuple(make_pass_through_transform(K), make_pass_through_transform(C * Y * X)),
make_tuple(Sequence<0>{}, Sequence<1>{}), make_tuple(Sequence<0>{}, Sequence<1>{}),
make_tuple(Sequence<1>{}, Sequence<0>{})); make_tuple(Sequence<1>{}, Sequence<0>{}));
#endif
// input tensor // input tensor
// debug: don't do padding
const auto in_n_c_hip_wip_global_desc = in_n_c_hi_wi_global_desc;
const auto Hip = in_n_c_hip_wip_global_desc.GetLength(I2);
const auto Wip = in_n_c_hip_wip_global_desc.GetLength(I3);
const auto in_n_c_y_ho_x_wo_global_desc = transform_dynamic_tensor_descriptor( const auto in_n_c_y_ho_x_wo_global_desc = transform_dynamic_tensor_descriptor(
in_n_c_hip_wip_global_desc, in_n_c_hi_wi_global_desc,
make_tuple(DynamicPassThrough{N}, make_tuple(
DynamicPassThrough{C}, make_pass_through_transform(N),
DynamicEmbed<2>{make_multi_index(Y, Ho), make_pass_through_transform(C),
make_multi_index(ConvDilationH, ConvStrideH)}, make_embed_transform(make_tuple(Y, Ho), make_tuple(ConvDilationH, ConvStrideH)),
DynamicEmbed<2>{make_multi_index(X, Wo), make_embed_transform(make_tuple(X, Wo), make_tuple(ConvDilationW, ConvStrideW))),
make_multi_index(ConvDilationW, ConvStrideW)}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}), make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{})); make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{}));
const auto in_gemmk_gemmn_global_desc = transform_dynamic_tensor_descriptor( const auto in_gemmk_gemmn_global_desc = transform_dynamic_tensor_descriptor(
in_n_c_y_ho_x_wo_global_desc, in_n_c_y_ho_x_wo_global_desc,
make_tuple(DynamicMerge<3>{make_multi_index(C, Y, X)}, make_tuple(make_merge_transform(make_tuple(C, Y, X)),
DynamicMerge<3>{make_multi_index(N, Ho, Wo)}), make_merge_transform(make_tuple(N, Ho, Wo))),
make_tuple(Sequence<1, 2, 4>{}, Sequence<0, 3, 5>{}), make_tuple(Sequence<1, 2, 4>{}, Sequence<0, 3, 5>{}),
make_tuple(Sequence<0>{}, Sequence<1>{})); make_tuple(Sequence<0>{}, Sequence<1>{}));
// output tensor // output tensor
#if 0
//TODO: implement graph optimization of tensor descriptor transformation
const auto out_gemmm_gemmn_global_desc =
transform_dynamic_tensor_descriptor(out_n_k_ho_wo_global_desc,
make_tuple(DynamicPassThrough{K}, DynamicMerge<3>{make_mult_index(N, Ho, Wo)}),
make_tuple(Sequence<1>{}, Sequence<0, 2, 3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}));
#else
const auto out_gemmm_gemmn_global_desc = transform_dynamic_tensor_descriptor( const auto out_gemmm_gemmn_global_desc = transform_dynamic_tensor_descriptor(
make_dynamic_naive_tensor_descriptor_packed_v2(make_multi_index(N, K, Ho * Wo)), make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(N, K, Ho * Wo)),
make_tuple(DynamicPassThrough{K}, DynamicMerge<2>{make_multi_index(N, Ho * Wo)}), make_tuple(make_pass_through_transform(K),
make_merge_transform(make_tuple(N, Ho * Wo))),
make_tuple(Sequence<1>{}, Sequence<0, 2>{}), make_tuple(Sequence<1>{}, Sequence<0, 2>{}),
make_tuple(Sequence<0>{}, Sequence<1>{})); make_tuple(Sequence<0>{}, Sequence<1>{}));
#endif
const auto GemmM = out_gemmm_gemmn_global_desc.GetLength(I0); const auto GemmM = out_gemmm_gemmn_global_desc.GetLength(I0);
const auto GemmN = out_gemmm_gemmn_global_desc.GetLength(I1); const auto GemmN = out_gemmm_gemmn_global_desc.GetLength(I1);
...@@ -840,8 +809,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad ...@@ -840,8 +809,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad
throw std::runtime_error("wrong! GEMM size no divisible"); throw std::runtime_error("wrong! GEMM size no divisible");
} }
constexpr auto GemmM1 = GemmMPerThread * GemmMLevel0Cluster * GemmMLevel1Cluster; constexpr auto GemmM1 = Number<GemmMPerThread * GemmMLevel0Cluster * GemmMLevel1Cluster>{};
constexpr auto GemmN1 = GemmNPerThread * GemmNLevel0Cluster * GemmNLevel1Cluster; constexpr auto GemmN1 = Number<GemmNPerThread * GemmNLevel0Cluster * GemmNLevel1Cluster>{};
const auto GemmM0 = GemmM / GemmM1; const auto GemmM0 = GemmM / GemmM1;
const auto GemmN0 = GemmN / GemmN1; const auto GemmN0 = GemmN / GemmN1;
...@@ -849,8 +818,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad ...@@ -849,8 +818,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad
const auto out_gemmm0_gemmm1_gemmn0_gemmn1_global_desc = const auto out_gemmm0_gemmm1_gemmn0_gemmn1_global_desc =
transform_dynamic_tensor_descriptor( transform_dynamic_tensor_descriptor(
out_gemmm_gemmn_global_desc, out_gemmm_gemmn_global_desc,
make_tuple(DynamicUnMerge<2>{make_multi_index(GemmM0, GemmM1)}, make_tuple(make_unmerge_transform(make_tuple(GemmM0, GemmM1)),
DynamicUnMerge<2>{make_multi_index(GemmN0, GemmN1)}), make_unmerge_transform(make_tuple(GemmN0, GemmN1))),
make_tuple(Sequence<0>{}, Sequence<1>{}), make_tuple(Sequence<0>{}, Sequence<1>{}),
make_tuple(Sequence<0, 1>{}, Sequence<2, 3>{})); make_tuple(Sequence<0, 1>{}, Sequence<2, 3>{}));
...@@ -1469,22 +1438,23 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1 ...@@ -1469,22 +1438,23 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
// weight tensor // weight tensor
const auto wei_gemmk_gemmm_global_desc = transform_dynamic_tensor_descriptor( const auto wei_gemmk_gemmm_global_desc = transform_dynamic_tensor_descriptor(
make_dynamic_naive_tensor_descriptor_packed_v2(make_multi_index(K, C)), make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(K, C)),
make_tuple(DynamicPassThrough{K}, DynamicPassThrough{C}), make_tuple(make_pass_through_transform(K), make_pass_through_transform(C)),
make_tuple(Sequence<0>{}, Sequence<1>{}), make_tuple(Sequence<0>{}, Sequence<1>{}),
make_tuple(Sequence<1>{}, Sequence<0>{})); make_tuple(Sequence<1>{}, Sequence<0>{}));
// input tensor // input tensor
const auto in_gemmk_gemmn_global_desc = transform_dynamic_tensor_descriptor( const auto in_gemmk_gemmn_global_desc = transform_dynamic_tensor_descriptor(
in_n_c_hi_wi_global_desc, in_n_c_hi_wi_global_desc,
make_tuple(DynamicPassThrough{C}, DynamicMerge<3>{make_multi_index(N, Ho, Wo)}), make_tuple(make_pass_through_transform(C), make_merge_transform(make_tuple(N, Ho, Wo))),
make_tuple(Sequence<1>{}, Sequence<0, 2, 3>{}), make_tuple(Sequence<1>{}, Sequence<0, 2, 3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{})); make_tuple(Sequence<0>{}, Sequence<1>{}));
// output tensor // output tensor
const auto out_gemmm_gemmn_global_desc = transform_dynamic_tensor_descriptor( const auto out_gemmm_gemmn_global_desc = transform_dynamic_tensor_descriptor(
make_dynamic_naive_tensor_descriptor_packed_v2(make_multi_index(N, K, Ho * Wo)), make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(N, K, Ho * Wo)),
make_tuple(DynamicPassThrough{K}, DynamicMerge<2>{make_multi_index(N, Ho * Wo)}), make_tuple(make_pass_through_transform(K),
make_merge_transform(make_tuple(N, Ho * Wo))),
make_tuple(Sequence<1>{}, Sequence<0, 2>{}), make_tuple(Sequence<1>{}, Sequence<0, 2>{}),
make_tuple(Sequence<0>{}, Sequence<1>{})); make_tuple(Sequence<0>{}, Sequence<1>{}));
...@@ -1498,8 +1468,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1 ...@@ -1498,8 +1468,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
throw std::runtime_error("wrong! GEMM size no divisible"); throw std::runtime_error("wrong! GEMM size no divisible");
} }
constexpr auto GemmM1 = GemmMPerThread * GemmMLevel0Cluster * GemmMLevel1Cluster; constexpr auto GemmM1 = Number<GemmMPerThread * GemmMLevel0Cluster * GemmMLevel1Cluster>{};
constexpr auto GemmN1 = GemmNPerThread * GemmNLevel0Cluster * GemmNLevel1Cluster; constexpr auto GemmN1 = Number<GemmNPerThread * GemmNLevel0Cluster * GemmNLevel1Cluster>{};
const auto GemmM0 = GemmM / GemmM1; const auto GemmM0 = GemmM / GemmM1;
const auto GemmN0 = GemmN / GemmN1; const auto GemmN0 = GemmN / GemmN1;
...@@ -1507,8 +1477,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1 ...@@ -1507,8 +1477,8 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
const auto out_gemmm0_gemmm1_gemmn0_gemmn1_global_desc = const auto out_gemmm0_gemmm1_gemmn0_gemmn1_global_desc =
transform_dynamic_tensor_descriptor( transform_dynamic_tensor_descriptor(
out_gemmm_gemmn_global_desc, out_gemmm_gemmn_global_desc,
make_tuple(DynamicUnMerge<2>{make_multi_index(GemmM0, GemmM1)}, make_tuple(make_unmerge_transform(make_tuple(GemmM0, GemmM1)),
DynamicUnMerge<2>{make_multi_index(GemmN0, GemmN1)}), make_unmerge_transform(make_tuple(GemmN0, GemmN1))),
make_tuple(Sequence<0>{}, Sequence<1>{}), make_tuple(Sequence<0>{}, Sequence<1>{}),
make_tuple(Sequence<0, 1>{}, Sequence<2, 3>{})); make_tuple(Sequence<0, 1>{}, Sequence<2, 3>{}));
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
namespace ck { namespace ck {
template <typename LowLength = index_t> template <typename LowLength>
struct DynamicPassThrough struct DynamicPassThrough
{ {
using LowerIndex = MultiIndex<1>; using LowerIndex = MultiIndex<1>;
...@@ -84,10 +84,7 @@ struct DynamicPassThrough ...@@ -84,10 +84,7 @@ struct DynamicPassThrough
} }
}; };
template <bool SkipIsValidCheck = false, template <typename LowLength, typename LeftPad, typename RightPad, bool SkipIsValidCheck = false>
typename LowLength = index_t,
typename LeftPad = index_t,
typename RightPad = index_t>
struct DynamicPad struct DynamicPad
{ {
using LowerIndex = MultiIndex<1>; using LowerIndex = MultiIndex<1>;
...@@ -175,7 +172,7 @@ struct DynamicPad ...@@ -175,7 +172,7 @@ struct DynamicPad
} }
}; };
template <bool SkipIsValidCheck = false, typename LowLength = index_t, typename LeftPad = index_t> template <typename LowLength, typename LeftPad, bool SkipIsValidCheck = false>
struct DynamicLeftPad struct DynamicLeftPad
{ {
using LowerIndex = MultiIndex<1>; using LowerIndex = MultiIndex<1>;
...@@ -257,7 +254,7 @@ struct DynamicLeftPad ...@@ -257,7 +254,7 @@ struct DynamicLeftPad
} }
}; };
template <bool SkipIsValidCheck = false, typename LowLength = index_t, typename RightPad = index_t> template <typename LowLength, typename RightPad, bool SkipIsValidCheck = false>
struct DynamicRightPad struct DynamicRightPad
{ {
using LowerIndex = MultiIndex<1>; using LowerIndex = MultiIndex<1>;
...@@ -349,13 +346,13 @@ struct DynamicRightPad ...@@ -349,13 +346,13 @@ struct DynamicRightPad
// 2) Tuple of Number, which is known at compile-time, or // 2) Tuple of Number, which is known at compile-time, or
// 3) Tuple of mixture of index_t and Number, which is known partially at run-time and partially // 3) Tuple of mixture of index_t and Number, which is known partially at run-time and partially
// at compile-time // at compile-time
template <index_t NDimUp, template <typename UpLengths,
typename UpLengths = MultiIndex<NDimUp>, typename Coefficients,
typename Coefficients = MultiIndex<NDimUp>, typename std::enable_if<UpLengths::Size() == Coefficients::Size(), bool>::type = false>
typename std::enable_if<UpLengths::Size() == NDimUp && Coefficients::Size() == NDimUp,
bool>::type = false>
struct DynamicEmbed struct DynamicEmbed
{ {
static constexpr index_t NDimUp = UpLengths::Size();
using LowerIndex = MultiIndex<1>; using LowerIndex = MultiIndex<1>;
using UpperIndex = MultiIndex<NDimUp>; using UpperIndex = MultiIndex<NDimUp>;
...@@ -439,9 +436,11 @@ struct DynamicEmbed ...@@ -439,9 +436,11 @@ struct DynamicEmbed
} }
}; };
template <index_t NDimLow, typename LowLengths = MultiIndex<NDimLow>> template <typename LowLengths>
struct DynamicMerge struct DynamicMerge
{ {
static constexpr index_t NDimLow = LowLengths::Size();
using LowerIndex = MultiIndex<NDimLow>; using LowerIndex = MultiIndex<NDimLow>;
using UpperIndex = MultiIndex<1>; using UpperIndex = MultiIndex<1>;
...@@ -952,12 +951,11 @@ struct DynamicMerge ...@@ -952,12 +951,11 @@ struct DynamicMerge
} }
}; };
template <index_t NDimUp, template <typename UpLengths, bool Use24BitIntegerCalculation>
bool Use24BitIntegerCalculation = false,
typename UpLengths = MultiIndex<NDimUp>,
typename std::enable_if<UpLengths::Size() == NDimUp, bool>::type = false>
struct DynamicUnMerge struct DynamicUnMerge
{ {
static constexpr index_t NDimUp = UpLengths::Size();
using LowerIndex = MultiIndex<1>; using LowerIndex = MultiIndex<1>;
using UpperIndex = MultiIndex<NDimUp>; using UpperIndex = MultiIndex<NDimUp>;
...@@ -1046,7 +1044,7 @@ struct DynamicUnMerge ...@@ -1046,7 +1044,7 @@ struct DynamicUnMerge
} }
}; };
template <typename LowerIndex = index_t> template <typename LowerIndex>
struct DynamicFreeze struct DynamicFreeze
{ {
LowerIndex low_idx_; LowerIndex low_idx_;
......
#ifndef CK_DYNAMIC_MULTI_INDEX_TRANSFORM_HELPER_HPP
#define CK_DYNAMIC_MULTI_INDEX_TRANSFORM_HELPER_HPP
#include "common_header.hpp"
#include "dynamic_multi_index_transform.hpp"
namespace ck {
template <typename LowLength>
__host__ __device__ constexpr auto make_pass_through_transform(const LowLength& low_length)
{
return DynamicPassThrough<LowLength>{low_length};
}
template <typename LowLength, typename LeftPad, typename RightPad, bool SkipIsValidCheck = false>
__host__ __device__ constexpr auto
make_pad_transform(const LowLength& low_length,
const LeftPad& left_pad,
const RightPad& right_pad,
integral_constant<bool, SkipIsValidCheck> = integral_constant<bool, false>{})
{
return DynamicPad<LowLength, LeftPad, RightPad, SkipIsValidCheck>{
low_length, left_pad, right_pad};
}
template <typename LowLength, typename LeftPad, bool SkipIsValidCheck = false>
__host__ __device__ constexpr auto make_left_pad_transform(
const LowLength& low_length,
const LeftPad& left_pad,
integral_constant<bool, SkipIsValidCheck> = integral_constant<bool, false>{})
{
return DynamicLeftPad<LowLength, LeftPad, SkipIsValidCheck>{low_length, left_pad};
}
template <typename LowLength, typename RightPad, bool SkipIsValidCheck>
__host__ __device__ constexpr auto make_right_pad_transform(
const LowLength& low_length,
const RightPad& right_pad,
integral_constant<bool, SkipIsValidCheck> = integral_constant<bool, false>{})
{
return DynamicRightPad<LowLength, RightPad, SkipIsValidCheck>{low_length, right_pad};
}
template <typename UpLengths,
typename Coefficients,
typename std::enable_if<UpLengths::Size() == Coefficients::Size(), bool>::type = false>
__host__ __device__ constexpr auto make_embed_transform(const UpLengths& up_lengths,
const Coefficients& coefficients)
{
return DynamicEmbed<UpLengths, Coefficients>{up_lengths, coefficients};
}
template <typename LowLengths>
__host__ __device__ constexpr auto make_merge_transform(const LowLengths& low_lengths)
{
return DynamicMerge<LowLengths>{low_lengths};
}
template <typename UpLengths, bool Use24BitIntegerCalculation = false>
__host__ __device__ constexpr auto make_unmerge_transform(
const UpLengths& up_lengths,
integral_constant<bool, Use24BitIntegerCalculation> = integral_constant<bool, false>{})
{
return DynamicUnMerge<UpLengths, Use24BitIntegerCalculation>{up_lengths};
}
template <typename LowerIndex>
__host__ __device__ constexpr auto make_freeze_transform(const LowerIndex& low_idx)
{
return DynamicFreeze<LowerIndex>{low_idx};
}
} // namespace ck
#endif
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include "common_header.hpp" #include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp" #include "dynamic_tensor_descriptor.hpp"
#include "dynamic_multi_index_transform_helper.hpp"
namespace ck { namespace ck {
...@@ -23,8 +24,7 @@ make_dynamic_naive_tensor_descriptor_v2(const Tuple<Lengths...>& lengths, ...@@ -23,8 +24,7 @@ make_dynamic_naive_tensor_descriptor_v2(const Tuple<Lengths...>& lengths,
{ {
constexpr index_t N = sizeof...(Lengths); constexpr index_t N = sizeof...(Lengths);
const auto transforms = const auto transforms = make_tuple(make_embed_transform(lengths, strides));
make_tuple(DynamicEmbed<N, Tuple<Lengths...>, Tuple<Strides...>>{lengths, strides});
constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{}); constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{});
...@@ -66,7 +66,7 @@ make_dynamic_naive_tensor_descriptor_packed_v2(const Tuple<Lengths...>& lengths) ...@@ -66,7 +66,7 @@ make_dynamic_naive_tensor_descriptor_packed_v2(const Tuple<Lengths...>& lengths)
{ {
constexpr index_t N = sizeof...(Lengths); constexpr index_t N = sizeof...(Lengths);
const auto transforms = make_tuple(DynamicUnMerge<N, false, Tuple<Lengths...>>{lengths}); const auto transforms = make_tuple(make_unmerge_transform(lengths));
constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{}); constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{});
......
...@@ -49,7 +49,8 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc ...@@ -49,7 +49,8 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc
wei_kcyx_device_buf.ToDevice(wei_kcyx.mData.data()); wei_kcyx_device_buf.ToDevice(wei_kcyx.mData.data());
out_nkhw_device_buf.ToDevice(out_nkhw.mData.data()); out_nkhw_device_buf.ToDevice(out_nkhw.mData.data());
#if 1 #if 0
// run-time variables
const auto in_n_c_hi_wi_desc = const auto in_n_c_hi_wi_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(to_multi_index(InDesc::GetLengths())); make_dynamic_naive_tensor_descriptor_packed_v2(to_multi_index(InDesc::GetLengths()));
const auto wei_k_c_y_x_desc = const auto wei_k_c_y_x_desc =
...@@ -62,6 +63,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc ...@@ -62,6 +63,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc
const auto in_left_pads = to_multi_index(InLeftPads{}); const auto in_left_pads = to_multi_index(InLeftPads{});
const auto in_right_pads = to_multi_index(InRightPads{}); const auto in_right_pads = to_multi_index(InRightPads{});
#else #else
// compile-time variables
const auto in_n_c_hi_wi_desc = make_dynamic_naive_tensor_descriptor_packed_v2( const auto in_n_c_hi_wi_desc = make_dynamic_naive_tensor_descriptor_packed_v2(
sequence_to_tuple_of_number(InDesc::GetLengths())); sequence_to_tuple_of_number(InDesc::GetLengths()));
const auto wei_k_c_y_x_desc = make_dynamic_naive_tensor_descriptor_packed_v2( const auto wei_k_c_y_x_desc = make_dynamic_naive_tensor_descriptor_packed_v2(
...@@ -234,7 +236,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc ...@@ -234,7 +236,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc
constexpr auto conv_driver = constexpr auto conv_driver =
#if 1 #if 1
DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_pad DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_pad
#elif 1 #elif 0
DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_no_pad
#elif 1 #elif 1
DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1 DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment