"...git@developer.sourcefind.cn:OpenDAS/mmdetection3d.git" did not exist on "32ab994d76353f7a34ae772984a5f9ee97da6b7e"
Commit 56fc0842 authored by Chao Liu's avatar Chao Liu
Browse files

tidy

parent 54fba515
...@@ -14,15 +14,13 @@ void host_direct_convolution(const Tensor<TIn>& in, ...@@ -14,15 +14,13 @@ void host_direct_convolution(const Tensor<TIn>& in,
const ConvStrides& conv_strides, const ConvStrides& conv_strides,
const ConvDilations& conv_dilations, const ConvDilations& conv_dilations,
const InLeftPads& in_left_pads, const InLeftPads& in_left_pads,
const InRightPads& in_right_pads, const InRightPads&,
const ConvTensorLayout layout = ConvTensorLayout::NCHW) const ConvTensorLayout layout = ConvTensorLayout::NCHW)
{ {
using namespace ck; using namespace ck;
constexpr auto I0 = Number<0>{}; constexpr auto I0 = Number<0>{};
constexpr auto I1 = Number<1>{}; constexpr auto I1 = Number<1>{};
constexpr auto I2 = Number<2>{};
constexpr auto I3 = Number<3>{};
auto f_nchw = [&](auto n, auto k, auto ho, auto wo) { auto f_nchw = [&](auto n, auto k, auto ho, auto wo) {
double v = 0; double v = 0;
...@@ -68,23 +66,25 @@ void host_direct_convolution(const Tensor<TIn>& in, ...@@ -68,23 +66,25 @@ void host_direct_convolution(const Tensor<TIn>& in,
out(n, ho, wo, k) = v; out(n, ho, wo, k) = v;
}; };
switch(layout) if(layout == ConvTensorLayout::NCHW)
{ {
case ConvTensorLayout::NCHW:
make_ParallelTensorFunctor(f_nchw, make_ParallelTensorFunctor(f_nchw,
out.mDesc.GetLengths()[0], out.mDesc.GetLengths()[0],
out.mDesc.GetLengths()[1], out.mDesc.GetLengths()[1],
out.mDesc.GetLengths()[2], out.mDesc.GetLengths()[2],
out.mDesc.GetLengths()[3])(std::thread::hardware_concurrency()); out.mDesc.GetLengths()[3])(std::thread::hardware_concurrency());
break; }
case ConvTensorLayout::NHWC: else if(layout == ConvTensorLayout::NHWC)
{
make_ParallelTensorFunctor(f_nhwc, make_ParallelTensorFunctor(f_nhwc,
out.mDesc.GetLengths()[0], out.mDesc.GetLengths()[0],
out.mDesc.GetLengths()[1], out.mDesc.GetLengths()[1],
out.mDesc.GetLengths()[2], out.mDesc.GetLengths()[2],
out.mDesc.GetLengths()[3])(std::thread::hardware_concurrency()); out.mDesc.GetLengths()[3])(std::thread::hardware_concurrency());
break; }
default: throw std::runtime_error("wrong! not supported layout"); else
{
throw std::runtime_error("wrong! not supported layout");
} }
} }
...@@ -100,17 +100,15 @@ void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw, ...@@ -100,17 +100,15 @@ void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw,
constexpr std::size_t HoPerTile = 2; constexpr std::size_t HoPerTile = 2;
constexpr std::size_t WoPerTile = 2; constexpr std::size_t WoPerTile = 2;
std::size_t N = in_nchw.mDesc.GetLengths()[0]; std::size_t N = in_nchw.mDesc.GetLengths()[0];
std::size_t C = in_nchw.mDesc.GetLengths()[1]; std::size_t C = in_nchw.mDesc.GetLengths()[1];
std::size_t HI = in_nchw.mDesc.GetLengths()[2];
std::size_t WI = in_nchw.mDesc.GetLengths()[3];
std::size_t K = wei_kcyx.mDesc.GetLengths()[0]; std::size_t K = wei_kcyx.mDesc.GetLengths()[0];
std::size_t Y = wei_kcyx.mDesc.GetLengths()[2]; std::size_t Y = wei_kcyx.mDesc.GetLengths()[2];
std::size_t X = wei_kcyx.mDesc.GetLengths()[3]; std::size_t X = wei_kcyx.mDesc.GetLengths()[3];
std::size_t HO = out_nkhw.mDesc.GetLengths()[2]; std::size_t Ho = out_nkhw.mDesc.GetLengths()[2];
std::size_t WO = out_nkhw.mDesc.GetLengths()[3]; std::size_t Wo = out_nkhw.mDesc.GetLengths()[3];
index_t h_pad_low = InLeftPads{}.Get(Number<0>{}); index_t h_pad_low = InLeftPads{}.Get(Number<0>{});
index_t w_pad_low = InLeftPads{}.Get(Number<1>{}); index_t w_pad_low = InLeftPads{}.Get(Number<1>{});
...@@ -118,8 +116,8 @@ void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw, ...@@ -118,8 +116,8 @@ void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw,
std::size_t HiPerTile = HoPerTile + Y - 1; std::size_t HiPerTile = HoPerTile + Y - 1;
std::size_t WiPerTile = WoPerTile + X - 1; std::size_t WiPerTile = WoPerTile + X - 1;
std::size_t HTile = (HO + HoPerTile - 1) / HoPerTile; std::size_t HTile = (Ho + HoPerTile - 1) / HoPerTile;
std::size_t WTile = (WO + WoPerTile - 1) / WoPerTile; std::size_t WTile = (Wo + WoPerTile - 1) / WoPerTile;
Tensor<double> in_hold({N, C, HTile, WTile, HiPerTile, WiPerTile}); Tensor<double> in_hold({N, C, HTile, WTile, HiPerTile, WiPerTile});
Tensor<double> in_transform({N, C, HTile, WTile, HiPerTile, WiPerTile}); Tensor<double> in_transform({N, C, HTile, WTile, HiPerTile, WiPerTile});
......
...@@ -9,7 +9,7 @@ struct GeneratorTensor_1 ...@@ -9,7 +9,7 @@ struct GeneratorTensor_1
int value = 1; int value = 1;
template <typename... Is> template <typename... Is>
float operator()(Is... is) float operator()(Is...)
{ {
return value; return value;
} }
......
...@@ -99,40 +99,48 @@ struct CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw ...@@ -99,40 +99,48 @@ struct CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw
// clang-format on // clang-format on
} }
ck::DataTypeEnum_t ABDataTypeEnum; ck::DataTypeEnum_t ABDataTypeEnum = ck::DataTypeEnum_t::Unknown;
ck::DataTypeEnum_t AccDataTypeEnum; ck::DataTypeEnum_t AccDataTypeEnum = ck::DataTypeEnum_t::Unknown;
ck::DataTypeEnum_t CDataTypeEnum; ck::DataTypeEnum_t CDataTypeEnum = ck::DataTypeEnum_t::Unknown;
int BlockSize; int BlockSize = 1;
int GN0; int GN0 = -1;
int GK1; int GK1 = -1;
int GM1PerBlockGM11; int GM1PerBlockGM11 = -1;
int GN1PerBlockGN11; int GN1PerBlockGN11 = -1;
int GK0PerBlock; int GK0PerBlock = -1;
int BM1PerThreadBM11; int BM1PerThreadBM11 = -1;
int BN1PerThreadBN11; int BN1PerThreadBN11 = -1;
int BK0PerThread; int BK0PerThread = -1;
std::array<int, 2> BM10BN10ThreadClusterBM10Xs; std::array<int, 2> BM10BN10ThreadClusterBM10Xs = {-1, -1};
std::array<int, 2> BM10BN10ThreadClusterBN10Xs; std::array<int, 2> BM10BN10ThreadClusterBN10Xs = {-1, -1};
std::array<int, 5> ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1; std::array<int, 5> ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1 = {
std::array<int, 5> ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1; -1, -1, -1, -1, -1};
std::array<int, 5> ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1; std::array<int, 5> ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1 = {
std::array<int, 5> ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1; -1, -1, -1, -1, -1};
std::array<int, 5> ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1 = {
std::array<int, 5> BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1; -1, -1, -1, -1, -1};
std::array<int, 5> BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1; std::array<int, 5> ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1 = {
std::array<int, 5> BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1; -1, -1, -1, -1, -1};
std::array<int, 5> BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1;
std::array<int, 5> BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1 = {
int CThreadTransferDstScalarPerVector; -1, -1, -1, -1, -1};
std::array<int, 5> BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1 = {
bool HasMainKBlockLoop; -1, -1, -1, -1, -1};
bool HasDoubleTailKBlockLoop; std::array<int, 5> BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1 = {
-1, -1, -1, -1, -1};
std::array<int, 5> BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1 = {
-1, -1, -1, -1, -1};
int CThreadTransferDstScalarPerVector = -1;
bool HasMainKBlockLoop = false;
bool HasDoubleTailKBlockLoop = false;
}; };
struct TunableConvIgemmFwdV6r1DlopsNchwKcyxNkhw struct TunableConvIgemmFwdV6r1DlopsNchwKcyxNkhw
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment