Commit 3549e344 authored by Chao Liu's avatar Chao Liu
Browse files

adding group

parent c0bfcf91
......@@ -112,50 +112,14 @@ int run_conv_fwd(bool do_verification,
const WeiElementOp& wei_element_op,
const OutElementOp& out_element_op)
{
#if 0
const auto in_g_n_c_wis_desc = ck::utils::conv::get_input_host_tensor_descriptor<InLayout>(conv_param);
const auto wei_g_k_c_xs_desc = ck::utils::conv::get_weight_host_tensor_descriptor<WeiLayout>(conv_param);
const auto out_g_n_k_wos_desc = ck::utils::conv::get_output_host_tensor_descriptor<OutLayout>(conv_param);
#else
const auto in_g_n_wis_c_desc = HostTensorDescriptor(
std::vector<std::size_t>{static_cast<std::size_t>(conv_param.G_),
static_cast<std::size_t>(conv_param.N_),
static_cast<std::size_t>(conv_param.input_spatial_lengths_[0]),
static_cast<std::size_t>(conv_param.input_spatial_lengths_[1]),
static_cast<std::size_t>(conv_param.C_)});
const auto wei_g_k_xs_c_desc = HostTensorDescriptor(
std::vector<std::size_t>{static_cast<std::size_t>(conv_param.G_),
static_cast<std::size_t>(conv_param.K_),
static_cast<std::size_t>(conv_param.filter_spatial_lengths_[0]),
static_cast<std::size_t>(conv_param.filter_spatial_lengths_[1]),
static_cast<std::size_t>(conv_param.C_)});
const auto bias_g_n_wos_k_desc = HostTensorDescriptor(
std::vector<std::size_t>{static_cast<std::size_t>(conv_param.G_),
static_cast<std::size_t>(conv_param.N_),
static_cast<std::size_t>(conv_param.output_spatial_lengths_[0]),
static_cast<std::size_t>(conv_param.output_spatial_lengths_[1]),
static_cast<std::size_t>(conv_param.K_)},
std::vector<std::size_t>{0, 0, 0, 0, 1});
const auto out_g_n_wos_k_desc = HostTensorDescriptor(
std::vector<std::size_t>{static_cast<std::size_t>(conv_param.G_),
static_cast<std::size_t>(conv_param.N_),
static_cast<std::size_t>(conv_param.output_spatial_lengths_[0]),
static_cast<std::size_t>(conv_param.output_spatial_lengths_[1]),
static_cast<std::size_t>(conv_param.K_)});
// tensor descriptor in NCHW/KXYC/NKHW dimensional order
const auto in_g_n_c_wis_desc = transpose_host_tensor_descriptor_given_new2old(
in_g_n_wis_c_desc, std::vector<ck::index_t>{0, 1, 4, 2, 3});
const auto wei_g_k_c_xs_desc = transpose_host_tensor_descriptor_given_new2old(
wei_g_k_xs_c_desc, std::vector<ck::index_t>{0, 1, 4, 2, 3});
const auto bias_g_n_k_wos_desc = transpose_host_tensor_descriptor_given_new2old(
bias_g_n_wos_k_desc, std::vector<ck::index_t>{0, 1, 4, 2, 3});
const auto out_g_n_k_wos_desc = transpose_host_tensor_descriptor_given_new2old(
out_g_n_wos_k_desc, std::vector<ck::index_t>{0, 1, 4, 2, 3});
#endif
const auto in_g_n_c_wis_desc =
ck::utils::conv::make_input_host_tensor_descriptor_packed<InLayout>(conv_param);
const auto wei_g_k_c_xs_desc =
ck::utils::conv::make_weight_host_tensor_descriptor_packed<WeiLayout>(conv_param);
const auto bias_g_n_k_wos_desc =
ck::utils::conv::make_output_host_tensor_descriptor_packed<OutLayout>(conv_param);
const auto out_g_n_k_wos_desc =
ck::utils::conv::make_output_host_tensor_descriptor_packed<OutLayout>(conv_param);
Tensor<InDataType> in(in_g_n_c_wis_desc);
Tensor<WeiDataType> wei(wei_g_k_c_xs_desc);
......@@ -267,9 +231,6 @@ int run_conv_fwd(bool do_verification,
Tensor<OutDataType> c_host(out_g_n_k_wos_desc);
auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<NDimSpatial,
InLayout,
WeiLayout,
OutLayout,
InDataType,
WeiDataType,
OutDataType,
......@@ -291,24 +252,9 @@ int run_conv_fwd(bool do_verification,
ref_invoker.Run(ref_argument);
for(int g = 0; g < out_host.mDesc.GetLengths()[0]; g++)
{
for(int n = 0; n < out_host.mDesc.GetLengths()[1]; n++)
{
for(int k = 0; k < out_host.mDesc.GetLengths()[2]; k++)
{
for(int ho = 0; ho < out_host.mDesc.GetLengths()[3]; ho++)
{
for(int wo = 0; wo < out_host.mDesc.GetLengths()[4]; wo++)
{
out_element_op(out_host(g, n, k, ho, wo),
c_host(g, n, k, ho, wo),
bias(g, n, k, ho, wo));
}
}
}
}
}
// TODO: implement elementwise operation for host
out_host.ForEach(
[&](auto&, auto idx) { out_element_op(out_host(idx), c_host(idx), bias(idx)); });
out_device_buf.FromDevice(out_device.mData.data());
......
......@@ -71,22 +71,41 @@ static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecializatio
template <ck::index_t NDimSpatial>
using DeviceConvNDFwdInstance = ck::tensor_operation::device::DeviceConvFwdMultipleD_Xdl_CShuffle<
NDimSpatial,
#if 0
ck::tuple_element_t<NDimSpatial - 1,
ck::Tuple<ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::NDHWC>>,
ck::Tuple<ck::tensor_layout::convolution::G_NW_C,
ck::tensor_layout::convolution::G_NHW_C,
ck::tensor_layout::convolution::G_NDHW_C>>,
ck::tuple_element_t<NDimSpatial - 1,
ck::Tuple<ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::KZYXC>>,
ck::Tuple<ck::tensor_layout::convolution::G_K_X_C,
ck::tensor_layout::convolution::G_K_YX_C,
ck::tensor_layout::convolution::G_K_ZYX_C>>,
ck::Tuple<ck::tuple_element_t<NDimSpatial - 1,
ck::Tuple<ck::tensor_layout::convolution::NW_K,
ck::tensor_layout::convolution::NHW_K,
ck::tensor_layout::convolution::NDHW_K>>>,
ck::Tuple<ck::tensor_layout::convolution::G_NW_K,
ck::tensor_layout::convolution::G_NHW_K,
ck::tensor_layout::convolution::G_NDHW_K>>>,
ck::tuple_element_t<NDimSpatial - 1,
ck::Tuple<ck::tensor_layout::convolution::G_NW_K,
ck::tensor_layout::convolution::G_NHW_K,
ck::tensor_layout::convolution::G_NDHW_K>>,
#else
ck::tuple_element_t<NDimSpatial - 1,
ck::Tuple<ck::tensor_layout::convolution::NWGC,
ck::tensor_layout::convolution::NHWGC,
ck::tensor_layout::convolution::NDHWGC>>,
ck::tuple_element_t<NDimSpatial - 1,
ck::Tuple<ck::tensor_layout::convolution::NWK,
ck::tensor_layout::convolution::NHWK,
ck::tensor_layout::convolution::NDHWK>>,
ck::Tuple<ck::tensor_layout::convolution::KXGC,
ck::tensor_layout::convolution::KYXGC,
ck::tensor_layout::convolution::KZYXGC>>,
ck::Tuple<ck::tuple_element_t<NDimSpatial - 1,
ck::Tuple<ck::tensor_layout::convolution::NWGK,
ck::tensor_layout::convolution::NHWGK,
ck::tensor_layout::convolution::NDHWGK>>>,
ck::tuple_element_t<NDimSpatial - 1,
ck::Tuple<ck::tensor_layout::convolution::NWGK,
ck::tensor_layout::convolution::NHWGK,
ck::tensor_layout::convolution::NDHWGK>>,
#endif
InDataType,
WeiDataType,
AccDataType,
......@@ -167,9 +186,9 @@ int main(int argc, char* argv[])
if(num_dim_spatial == 1)
{
return run_conv_fwd<1,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK,
ck::tensor_layout::convolution::NWGC,
ck::tensor_layout::convolution::KXGC,
ck::tensor_layout::convolution::NWGK,
InDataType,
WeiDataType,
OutDataType,
......@@ -187,9 +206,9 @@ int main(int argc, char* argv[])
else if(num_dim_spatial == 2)
{
return run_conv_fwd<2,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
ck::tensor_layout::convolution::NHWGC,
ck::tensor_layout::convolution::KYXGC,
ck::tensor_layout::convolution::NHWGK,
InDataType,
WeiDataType,
OutDataType,
......@@ -207,9 +226,9 @@ int main(int argc, char* argv[])
else if(num_dim_spatial == 3)
{
return run_conv_fwd<3,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK,
ck::tensor_layout::convolution::NDHWGC,
ck::tensor_layout::convolution::KZYXGC,
ck::tensor_layout::convolution::NDHWGK,
InDataType,
WeiDataType,
OutDataType,
......
......@@ -25,6 +25,39 @@ struct ColumnMajor : public BaseTensorLayout
namespace convolution {
// input tensor
// packed NCW/NCHW/NCDHW
struct NCW : public BaseTensorLayout
{
static constexpr const char* name = "NCW";
};
struct NCHW : public BaseTensorLayout
{
static constexpr const char* name = "NCHW";
};
struct NCDHW : public BaseTensorLayout
{
static constexpr const char* name = "NCDHW";
};
// packed GNCW/GNCHW/GNCDHW
struct GNCW : public BaseTensorLayout
{
static constexpr const char* name = "GNCW";
};
struct GNCHW : public BaseTensorLayout
{
static constexpr const char* name = "GNCHW";
};
struct GNCDHW : public BaseTensorLayout
{
static constexpr const char* name = "GNCDHW";
};
// input tensor
// packed NWC/NHWC/NDHWC
struct NWC : public BaseTensorLayout
......@@ -43,20 +76,88 @@ struct NDHWC : public BaseTensorLayout
};
// input tensor
// packed NCW/NCHW/NCDHW
struct NCW : public BaseTensorLayout
// packed GNWC/GNHWC/GNDHWC
struct GNWC : public BaseTensorLayout
{
static constexpr const char* name = "NCW";
static constexpr const char* name = "GNWC";
};
struct NCHW : public BaseTensorLayout
struct GNHWC : public BaseTensorLayout
{
static constexpr const char* name = "NCHW";
static constexpr const char* name = "GNHWC";
};
struct NCDHW : public BaseTensorLayout
struct GNDHWC : public BaseTensorLayout
{
static constexpr const char* name = "NCDHW";
static constexpr const char* name = "GNDHWC";
};
// input tensor
// packed GNWC/GNHWC/GNDHWC
struct NWGC : public BaseTensorLayout
{
static constexpr const char* name = "NWGC";
};
struct NHWGC : public BaseTensorLayout
{
static constexpr const char* name = "NHWGC";
};
struct NDHWGC : public BaseTensorLayout
{
static constexpr const char* name = "NDHWGC";
};
// input tensor
// strided layout
struct G_NW_C : public BaseTensorLayout
{
static constexpr const char* name = "G_NW_C";
};
struct G_NHW_C : public BaseTensorLayout
{
static constexpr const char* name = "G_NHW_C";
};
struct G_NDHW_C : public BaseTensorLayout
{
static constexpr const char* name = "G_NDHW_C";
};
// weight tensor
// packed KCX/KCYX/KCZYX
struct KCX : public BaseTensorLayout
{
static constexpr const char* name = "KCX";
};
struct KCYX : public BaseTensorLayout
{
static constexpr const char* name = "KCYX";
};
struct KCZYX : public BaseTensorLayout
{
static constexpr const char* name = "KCZYX";
};
// weight tensor
// packed KCX/KCYX/KCZYX
struct GKCX : public BaseTensorLayout
{
static constexpr const char* name = "GKCX";
};
struct GKCYX : public BaseTensorLayout
{
static constexpr const char* name = "GKCYX";
};
struct GKCZYX : public BaseTensorLayout
{
static constexpr const char* name = "GKCZYX";
};
// weight tensor
......@@ -77,20 +178,88 @@ struct KZYXC : public BaseTensorLayout
};
// weight tensor
// packed KCX/KCYX/KCZYX
struct KCX : public BaseTensorLayout
// packed GKXC/GKYXC/GKZYXC
struct GKXC : public BaseTensorLayout
{
static constexpr const char* name = "KCX";
static constexpr const char* name = "GKXC";
};
struct KCYX : public BaseTensorLayout
struct GKYXC : public BaseTensorLayout
{
static constexpr const char* name = "KCYX";
static constexpr const char* name = "GKYXC";
};
struct KCZYX : public BaseTensorLayout
struct GKZYXC : public BaseTensorLayout
{
static constexpr const char* name = "KCZYX";
static constexpr const char* name = "GKZYXC";
};
// weight tensor
// packed KXGC/KYXGC/KZYXGC
struct KXGC : public BaseTensorLayout
{
static constexpr const char* name = "KXGC";
};
struct KYXGC : public BaseTensorLayout
{
static constexpr const char* name = "KYXGC";
};
struct KZYXGC : public BaseTensorLayout
{
static constexpr const char* name = "KZYXGC";
};
// weight tensor
// strided
struct G_K_X_C : public BaseTensorLayout
{
static constexpr const char* name = "G_K_X_C";
};
struct G_K_YX_C : public BaseTensorLayout
{
static constexpr const char* name = "G_K_YX_C";
};
struct G_K_ZYX_C : public BaseTensorLayout
{
static constexpr const char* name = "G_K_ZYX_C";
};
// output tensor
// packed NKW/NKHW/NKDHW
struct NKW : public BaseTensorLayout
{
static constexpr const char* name = "NKW";
};
struct NKHW : public BaseTensorLayout
{
static constexpr const char* name = "NKHW";
};
struct NKDHW : public BaseTensorLayout
{
static constexpr const char* name = "NKDHW";
};
// output tensor
// packed GNKW/GNKHW/GNKDHW
struct GNKW : public BaseTensorLayout
{
static constexpr const char* name = "GNKW";
};
struct GNKHW : public BaseTensorLayout
{
static constexpr const char* name = "GNKHW";
};
struct GNKDHW : public BaseTensorLayout
{
static constexpr const char* name = "GNKDHW";
};
// output tensor
......@@ -111,37 +280,54 @@ struct NDHWK : public BaseTensorLayout
};
// output tensor
// packed NKW/NKHW/NKDHW
struct NKW : public BaseTensorLayout
// packed GNWK/GNHWK/GNDHWK
struct GNWK : public BaseTensorLayout
{
static constexpr const char* name = "NKW";
static constexpr const char* name = "GNWK";
};
struct NKHW : public BaseTensorLayout
struct GNHWK : public BaseTensorLayout
{
static constexpr const char* name = "NKHW";
static constexpr const char* name = "GNHWK";
};
struct NKDHW : public BaseTensorLayout
struct GNDHWK : public BaseTensorLayout
{
static constexpr const char* name = "NKDHW";
static constexpr const char* name = "GNDHWK";
};
// output tensor
// packed NWGK/NHWGK/NDHWGK
struct NWGK : public BaseTensorLayout
{
static constexpr const char* name = "NWGK";
};
struct NHWGK : public BaseTensorLayout
{
static constexpr const char* name = "NHWGK";
};
struct NDHWGK : public BaseTensorLayout
{
static constexpr const char* name = "NDHWGK";
};
// output tensor
// strided layout
struct NW_K : public BaseTensorLayout
struct G_NW_K : public BaseTensorLayout
{
static constexpr const char* name = "NW_K";
static constexpr const char* name = "G_NW_K";
};
struct NHW_K : public BaseTensorLayout
struct G_NHW_K : public BaseTensorLayout
{
static constexpr const char* name = "NHW_K";
static constexpr const char* name = "G_NHW_K";
};
struct NDHW_K : public BaseTensorLayout
struct G_NDHW_K : public BaseTensorLayout
{
static constexpr const char* name = "NDHW_K";
static constexpr const char* name = "G_NDHW_K";
};
} // namespace convolution
......
......@@ -30,12 +30,8 @@ namespace host {
// operation.
// @tparam NumDimSpatial Number of spatial dimensions.
//
// FIXME: only support NDimSpatial = 1 to 3; only support NCHW and NHWC layout.
// Need to be more general
// tensor descriptor in GNCHW/GKCXY/GNKHW dimensional order
template <ck::index_t NumDimSpatial,
typename InLayout,
typename WeiLayout,
typename OutLayout,
typename InDataType,
typename WeiDataType,
typename OutDataType,
......@@ -91,7 +87,6 @@ struct ReferenceConvFwd : public device::BaseOperator
float Run(const Argument& arg)
{
// tensor descriptor in NCHW/KXYC/NKHW dimensional order
if constexpr(NumDimSpatial == 1)
{
auto func = [&](auto g, auto n, auto k, auto wo) {
......
......@@ -4,6 +4,7 @@
#pragma once
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
......@@ -11,109 +12,238 @@ namespace ck {
namespace utils {
namespace conv {
template <typename InLayout>
HostTensorDescriptor get_input_host_tensor_descriptor(const ck::utils::conv::ConvParam& param)
namespace detail {
template <typename OldLayout>
std::vector<std::size_t> get_layout_transpose_gnchw_to_old()
{
if constexpr(ck::is_same_v<InLayout, ck::tensor_layout::convolution::NWC> ||
ck::is_same_v<InLayout, ck::tensor_layout::convolution::NHWC> ||
ck::is_same_v<InLayout, ck::tensor_layout::convolution::NDHWC>)
if constexpr(ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GNCW> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GKCX> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GNKW>)
{
return {0, 1, 2, 3};
}
else if constexpr(ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GNCHW> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GKCYX> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GNKHW>)
{
return {0, 1, 2, 3, 4};
}
else if constexpr(ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GNCDHW> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GKCZYX> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GNKDHW>)
{
return {0, 1, 2, 3, 4, 5};
}
if constexpr(ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GNWC> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GKXC> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GNWK>)
{
return {0, 1, 3, 2};
}
else if constexpr(ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GNHWC> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GKYXC> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GNHWK>)
{
return {0, 1, 4, 2, 3};
}
else if constexpr(ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GNDHWC> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GKZYXC> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::GNDHWK>)
{
return {0, 1, 5, 2, 3, 4};
}
else if constexpr(ck::is_same_v<OldLayout, ck::tensor_layout::convolution::NWGC> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::KXGC> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::NWGK>)
{
return {2, 0, 3, 1};
}
else if constexpr(ck::is_same_v<OldLayout, ck::tensor_layout::convolution::NHWGC> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::KYXGC> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::NHWGK>)
{
return {3, 0, 4, 1, 2};
}
else if constexpr(ck::is_same_v<OldLayout, ck::tensor_layout::convolution::NDHWGC> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::KZYXGC> ||
ck::is_same_v<OldLayout, ck::tensor_layout::convolution::NDHWGK>)
{
return {4, 0, 5, 1, 2, 3};
}
else
{
std::vector<std::size_t> nhwc_lengths{static_cast<std::size_t>(param.N_),
static_cast<std::size_t>(param.C_)};
printf("%s\n", __func__);
throw std::runtime_error("wrong! unsupported layout");
}
}
nhwc_lengths.insert(nhwc_lengths.begin() + 1,
param.input_spatial_lengths_.begin(),
param.input_spatial_lengths_.end());
} // namespace detail
// make tensor descriptor for packed input tensor, and order the dimension in the order of GNCHW
// regardless of physical layout
template <typename InLayout>
HostTensorDescriptor
make_input_host_tensor_descriptor_packed(const ck::utils::conv::ConvParam& param)
{
std::vector<std::size_t> physical_lengths;
return HostTensorDescriptor(nhwc_lengths);
if constexpr(ck::is_same_v<InLayout, ck::tensor_layout::convolution::GNCW> ||
ck::is_same_v<InLayout, ck::tensor_layout::convolution::GNCHW> ||
ck::is_same_v<InLayout, ck::tensor_layout::convolution::GNCDHW>)
{
physical_lengths = std::vector<std::size_t>{static_cast<std::size_t>(param.G_),
static_cast<std::size_t>(param.N_),
static_cast<std::size_t>(param.C_)};
physical_lengths.insert(physical_lengths.end(),
param.input_spatial_lengths_.begin(),
param.input_spatial_lengths_.begin() + param.num_dim_spatial_);
}
else if constexpr(ck::is_same_v<InLayout, ck::tensor_layout::convolution::NCW> ||
ck::is_same_v<InLayout, ck::tensor_layout::convolution::NCHW> ||
ck::is_same_v<InLayout, ck::tensor_layout::convolution::NCDHW>)
else if constexpr(ck::is_same_v<InLayout, ck::tensor_layout::convolution::GNWC> ||
ck::is_same_v<InLayout, ck::tensor_layout::convolution::GNHWC> ||
ck::is_same_v<InLayout, ck::tensor_layout::convolution::GNDHWC>)
{
std::vector<std::size_t> nchw_lengths{static_cast<std::size_t>(param.N_),
static_cast<std::size_t>(param.C_)};
physical_lengths = std::vector<std::size_t>{static_cast<std::size_t>(param.G_),
static_cast<std::size_t>(param.N_),
static_cast<std::size_t>(param.C_)};
nchw_lengths.insert(nchw_lengths.end(),
param.input_spatial_lengths_.begin(),
param.input_spatial_lengths_.end());
physical_lengths.insert(physical_lengths.begin() + 2,
param.input_spatial_lengths_.begin(),
param.input_spatial_lengths_.begin() + param.num_dim_spatial_);
}
else if constexpr(ck::is_same_v<InLayout, ck::tensor_layout::convolution::NWGC> ||
ck::is_same_v<InLayout, ck::tensor_layout::convolution::NHWGC> ||
ck::is_same_v<InLayout, ck::tensor_layout::convolution::NDHWGC>)
{
physical_lengths = std::vector<std::size_t>{static_cast<std::size_t>(param.N_),
static_cast<std::size_t>(param.G_),
static_cast<std::size_t>(param.C_)};
return HostTensorDescriptor(nchw_lengths);
physical_lengths.insert(physical_lengths.begin() + 1,
param.input_spatial_lengths_.begin(),
param.input_spatial_lengths_.begin() + param.num_dim_spatial_);
}
else
{
printf("%s\n", __func__);
printf("%s\n", InLayout::name);
throw std::runtime_error("wrong! unsupported layout");
}
return transpose_host_tensor_descriptor_given_new2old(
HostTensorDescriptor(physical_lengths),
detail::get_layout_transpose_gnchw_to_old<InLayout>());
}
// make tensor descriptor for packed weight tensor, and order the dimension in the order of GKCYX
// regardless of physical layout
template <typename WeiLayout>
HostTensorDescriptor get_weight_host_tensor_descriptor(const ck::utils::conv::ConvParam& param)
HostTensorDescriptor
make_weight_host_tensor_descriptor_packed(const ck::utils::conv::ConvParam& param)
{
if constexpr(ck::is_same_v<WeiLayout, ck::tensor_layout::convolution::KXC> ||
ck::is_same_v<WeiLayout, ck::tensor_layout::convolution::KYXC> ||
ck::is_same_v<WeiLayout, ck::tensor_layout::convolution::KZYXC>)
{
std::vector<std::size_t> kyxc_lengths{static_cast<std::size_t>(param.K_),
static_cast<std::size_t>(param.C_)};
std::vector<std::size_t> physical_lengths;
kyxc_lengths.insert(kyxc_lengths.begin() + 1,
param.filter_spatial_lengths_.begin(),
param.filter_spatial_lengths_.end());
if constexpr(ck::is_same_v<WeiLayout, ck::tensor_layout::convolution::GKCX> ||
ck::is_same_v<WeiLayout, ck::tensor_layout::convolution::GKCYX> ||
ck::is_same_v<WeiLayout, ck::tensor_layout::convolution::GKCZYX>)
{
physical_lengths = std::vector<std::size_t>{static_cast<std::size_t>(param.G_),
static_cast<std::size_t>(param.K_),
static_cast<std::size_t>(param.C_)};
return HostTensorDescriptor(kyxc_lengths);
physical_lengths.insert(physical_lengths.end(),
param.filter_spatial_lengths_.begin(),
param.filter_spatial_lengths_.begin() + param.num_dim_spatial_);
}
else if constexpr(ck::is_same_v<WeiLayout, ck::tensor_layout::convolution::KCX> ||
ck::is_same_v<WeiLayout, ck::tensor_layout::convolution::KCYX> ||
ck::is_same_v<WeiLayout, ck::tensor_layout::convolution::KCZYX>)
else if constexpr(ck::is_same_v<WeiLayout, ck::tensor_layout::convolution::KXC> ||
ck::is_same_v<WeiLayout, ck::tensor_layout::convolution::KYXC> ||
ck::is_same_v<WeiLayout, ck::tensor_layout::convolution::KZYXC>)
{
std::vector<std::size_t> kcyx_lengths{static_cast<std::size_t>(param.K_),
static_cast<std::size_t>(param.C_)};
physical_lengths = std::vector<std::size_t>{static_cast<std::size_t>(param.G_),
static_cast<std::size_t>(param.K_),
static_cast<std::size_t>(param.C_)};
kcyx_lengths.insert(kcyx_lengths.end(),
param.filter_spatial_lengths_.begin(),
param.filter_spatial_lengths_.end());
physical_lengths.insert(physical_lengths.begin() + 2,
param.filter_spatial_lengths_.begin(),
param.filter_spatial_lengths_.begin() + param.num_dim_spatial_);
}
else if constexpr(ck::is_same_v<WeiLayout, ck::tensor_layout::convolution::KXGC> ||
ck::is_same_v<WeiLayout, ck::tensor_layout::convolution::KYXGC> ||
ck::is_same_v<WeiLayout, ck::tensor_layout::convolution::KZYXGC>)
{
physical_lengths = std::vector<std::size_t>{static_cast<std::size_t>(param.K_),
static_cast<std::size_t>(param.G_),
static_cast<std::size_t>(param.C_)};
return HostTensorDescriptor(kcyx_lengths);
physical_lengths.insert(physical_lengths.begin() + 1,
param.filter_spatial_lengths_.begin(),
param.filter_spatial_lengths_.begin() + param.num_dim_spatial_);
}
else
{
printf("%s\n", __func__);
throw std::runtime_error("wrong! unsupported layout");
}
return transpose_host_tensor_descriptor_given_new2old(
HostTensorDescriptor(physical_lengths),
detail::get_layout_transpose_gnchw_to_old<WeiLayout>());
}
// make tensor descriptor for packed output tensor, and order the dimension in the order of GNKHW
// regardless of physical layout
template <typename OutLayout>
HostTensorDescriptor get_output_host_tensor_descriptor(const ck::utils::conv::ConvParam& param)
HostTensorDescriptor
make_output_host_tensor_descriptor_packed(const ck::utils::conv::ConvParam& param)
{
if constexpr(ck::is_same_v<OutLayout, ck::tensor_layout::convolution::NWK> ||
ck::is_same_v<OutLayout, ck::tensor_layout::convolution::NHWK> ||
ck::is_same_v<OutLayout, ck::tensor_layout::convolution::NDHWK>)
{
std::vector<std::size_t> nhwk_lengths{static_cast<std::size_t>(param.N_),
static_cast<std::size_t>(param.K_)};
std::vector<std::size_t> physical_lengths;
nhwk_lengths.insert(nhwk_lengths.begin() + 1,
param.output_spatial_lengths_.begin(),
param.output_spatial_lengths_.end());
if constexpr(ck::is_same_v<OutLayout, ck::tensor_layout::convolution::GNKW> ||
ck::is_same_v<OutLayout, ck::tensor_layout::convolution::GNKHW> ||
ck::is_same_v<OutLayout, ck::tensor_layout::convolution::GNKDHW>)
{
physical_lengths = std::vector<std::size_t>{static_cast<std::size_t>(param.G_),
static_cast<std::size_t>(param.N_),
static_cast<std::size_t>(param.K_)};
return HostTensorDescriptor(nhwk_lengths);
physical_lengths.insert(physical_lengths.end(),
param.output_spatial_lengths_.begin(),
param.output_spatial_lengths_.begin() + param.num_dim_spatial_);
}
else if constexpr(ck::is_same_v<OutLayout, ck::tensor_layout::convolution::NKW> ||
ck::is_same_v<OutLayout, ck::tensor_layout::convolution::NKHW> ||
ck::is_same_v<OutLayout, ck::tensor_layout::convolution::NKDHW>)
else if constexpr(ck::is_same_v<OutLayout, ck::tensor_layout::convolution::GNWK> ||
ck::is_same_v<OutLayout, ck::tensor_layout::convolution::GNHWK> ||
ck::is_same_v<OutLayout, ck::tensor_layout::convolution::GNDHWK>)
{
std::vector<std::size_t> nkhw_lengths{static_cast<std::size_t>(param.N_),
static_cast<std::size_t>(param.K_)};
physical_lengths = std::vector<std::size_t>{static_cast<std::size_t>(param.G_),
static_cast<std::size_t>(param.N_),
static_cast<std::size_t>(param.K_)};
nkhw_lengths.insert(nkhw_lengths.end(),
param.output_spatial_lengths_.begin(),
param.output_spatial_lengths_.end());
physical_lengths.insert(physical_lengths.begin() + 2,
param.output_spatial_lengths_.begin(),
param.output_spatial_lengths_.begin() + param.num_dim_spatial_);
}
else if constexpr(ck::is_same_v<OutLayout, ck::tensor_layout::convolution::NWGK> ||
ck::is_same_v<OutLayout, ck::tensor_layout::convolution::NHWGK> ||
ck::is_same_v<OutLayout, ck::tensor_layout::convolution::NDHWGK>)
{
physical_lengths = std::vector<std::size_t>{static_cast<std::size_t>(param.N_),
static_cast<std::size_t>(param.G_),
static_cast<std::size_t>(param.K_)};
return HostTensorDescriptor(nkhw_lengths);
physical_lengths.insert(physical_lengths.begin() + 1,
param.output_spatial_lengths_.begin(),
param.output_spatial_lengths_.begin() + param.num_dim_spatial_);
}
else
{
printf("%s\n", __func__);
throw std::runtime_error("wrong! unsupported layout");
}
return transpose_host_tensor_descriptor_given_new2old(
HostTensorDescriptor(physical_lengths),
detail::get_layout_transpose_gnchw_to_old<OutLayout>());
}
} // namespace conv
......
......@@ -358,6 +358,19 @@ struct Tensor
mDesc.GetLengths()[4])(num_thread);
break;
}
case 6: {
auto f = [&](auto i0, auto i1, auto i2, auto i3, auto i4, auto i5) {
(*this)(i0, i1, i2, i3, i4) = g(i0, i1, i2, i3, i4, i5);
};
make_ParallelTensorFunctor(f,
mDesc.GetLengths()[0],
mDesc.GetLengths()[1],
mDesc.GetLengths()[2],
mDesc.GetLengths()[3],
mDesc.GetLengths()[4],
mDesc.GetLengths()[5])(num_thread);
break;
}
default: throw std::runtime_error("unspported dimension");
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment