Commit 56fc0842 authored by Chao Liu's avatar Chao Liu
Browse files

tidy

parent 54fba515
......@@ -14,15 +14,13 @@ void host_direct_convolution(const Tensor<TIn>& in,
const ConvStrides& conv_strides,
const ConvDilations& conv_dilations,
const InLeftPads& in_left_pads,
const InRightPads& in_right_pads,
const InRightPads&,
const ConvTensorLayout layout = ConvTensorLayout::NCHW)
{
using namespace ck;
constexpr auto I0 = Number<0>{};
constexpr auto I1 = Number<1>{};
constexpr auto I2 = Number<2>{};
constexpr auto I3 = Number<3>{};
auto f_nchw = [&](auto n, auto k, auto ho, auto wo) {
double v = 0;
......@@ -68,23 +66,25 @@ void host_direct_convolution(const Tensor<TIn>& in,
out(n, ho, wo, k) = v;
};
switch(layout)
if(layout == ConvTensorLayout::NCHW)
{
case ConvTensorLayout::NCHW:
make_ParallelTensorFunctor(f_nchw,
out.mDesc.GetLengths()[0],
out.mDesc.GetLengths()[1],
out.mDesc.GetLengths()[2],
out.mDesc.GetLengths()[3])(std::thread::hardware_concurrency());
break;
case ConvTensorLayout::NHWC:
}
else if(layout == ConvTensorLayout::NHWC)
{
make_ParallelTensorFunctor(f_nhwc,
out.mDesc.GetLengths()[0],
out.mDesc.GetLengths()[1],
out.mDesc.GetLengths()[2],
out.mDesc.GetLengths()[3])(std::thread::hardware_concurrency());
break;
default: throw std::runtime_error("wrong! not supported layout");
}
else
{
throw std::runtime_error("wrong! not supported layout");
}
}
......@@ -102,15 +102,13 @@ void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw,
std::size_t N = in_nchw.mDesc.GetLengths()[0];
std::size_t C = in_nchw.mDesc.GetLengths()[1];
std::size_t HI = in_nchw.mDesc.GetLengths()[2];
std::size_t WI = in_nchw.mDesc.GetLengths()[3];
std::size_t K = wei_kcyx.mDesc.GetLengths()[0];
std::size_t Y = wei_kcyx.mDesc.GetLengths()[2];
std::size_t X = wei_kcyx.mDesc.GetLengths()[3];
std::size_t HO = out_nkhw.mDesc.GetLengths()[2];
std::size_t WO = out_nkhw.mDesc.GetLengths()[3];
std::size_t Ho = out_nkhw.mDesc.GetLengths()[2];
std::size_t Wo = out_nkhw.mDesc.GetLengths()[3];
index_t h_pad_low = InLeftPads{}.Get(Number<0>{});
index_t w_pad_low = InLeftPads{}.Get(Number<1>{});
......@@ -118,8 +116,8 @@ void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw,
std::size_t HiPerTile = HoPerTile + Y - 1;
std::size_t WiPerTile = WoPerTile + X - 1;
std::size_t HTile = (HO + HoPerTile - 1) / HoPerTile;
std::size_t WTile = (WO + WoPerTile - 1) / WoPerTile;
std::size_t HTile = (Ho + HoPerTile - 1) / HoPerTile;
std::size_t WTile = (Wo + WoPerTile - 1) / WoPerTile;
Tensor<double> in_hold({N, C, HTile, WTile, HiPerTile, WiPerTile});
Tensor<double> in_transform({N, C, HTile, WTile, HiPerTile, WiPerTile});
......
......@@ -9,7 +9,7 @@ struct GeneratorTensor_1
int value = 1;
template <typename... Is>
float operator()(Is... is)
float operator()(Is...)
{
return value;
}
......
......@@ -99,40 +99,48 @@ struct CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw
// clang-format on
}
ck::DataTypeEnum_t ABDataTypeEnum;
ck::DataTypeEnum_t AccDataTypeEnum;
ck::DataTypeEnum_t CDataTypeEnum;
int BlockSize;
int GN0;
int GK1;
int GM1PerBlockGM11;
int GN1PerBlockGN11;
int GK0PerBlock;
int BM1PerThreadBM11;
int BN1PerThreadBN11;
int BK0PerThread;
std::array<int, 2> BM10BN10ThreadClusterBM10Xs;
std::array<int, 2> BM10BN10ThreadClusterBN10Xs;
std::array<int, 5> ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1;
std::array<int, 5> ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1;
std::array<int, 5> ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1;
std::array<int, 5> ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1;
std::array<int, 5> BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1;
std::array<int, 5> BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1;
std::array<int, 5> BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1;
std::array<int, 5> BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1;
int CThreadTransferDstScalarPerVector;
bool HasMainKBlockLoop;
bool HasDoubleTailKBlockLoop;
ck::DataTypeEnum_t ABDataTypeEnum = ck::DataTypeEnum_t::Unknown;
ck::DataTypeEnum_t AccDataTypeEnum = ck::DataTypeEnum_t::Unknown;
ck::DataTypeEnum_t CDataTypeEnum = ck::DataTypeEnum_t::Unknown;
int BlockSize = 1;
int GN0 = -1;
int GK1 = -1;
int GM1PerBlockGM11 = -1;
int GN1PerBlockGN11 = -1;
int GK0PerBlock = -1;
int BM1PerThreadBM11 = -1;
int BN1PerThreadBN11 = -1;
int BK0PerThread = -1;
std::array<int, 2> BM10BN10ThreadClusterBM10Xs = {-1, -1};
std::array<int, 2> BM10BN10ThreadClusterBN10Xs = {-1, -1};
std::array<int, 5> ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1 = {
-1, -1, -1, -1, -1};
std::array<int, 5> ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1 = {
-1, -1, -1, -1, -1};
std::array<int, 5> ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1 = {
-1, -1, -1, -1, -1};
std::array<int, 5> ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1 = {
-1, -1, -1, -1, -1};
std::array<int, 5> BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1 = {
-1, -1, -1, -1, -1};
std::array<int, 5> BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1 = {
-1, -1, -1, -1, -1};
std::array<int, 5> BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1 = {
-1, -1, -1, -1, -1};
std::array<int, 5> BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1 = {
-1, -1, -1, -1, -1};
int CThreadTransferDstScalarPerVector = -1;
bool HasMainKBlockLoop = false;
bool HasDoubleTailKBlockLoop = false;
};
struct TunableConvIgemmFwdV6r1DlopsNchwKcyxNkhw
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment