Commit 98f349c6 authored by Bartlomiej Kocot's avatar Bartlomiej Kocot
Browse files

Minor stylistic fixes

parent 728b8695
......@@ -11,11 +11,11 @@ using ColumnToImageOp = ck::conv_tensor_rearrange_op::ColumnToImage;
// clang-format off
using DeviceColToImgInstance = ck::tensor_operation::device::DeviceColumnToImageImpl
//#####################| Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp|
//#####################| Spatial| | | | | | | Lengths| Vector| |
//#####################| | | | | | | | | | |
< NDimSpatial, ImLayout, InDataType, OutDataType, 256, 128, 128, S<16, 16>, 1, ColumnToImageOp>;
//#####################| Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
< NDimSpatial, ImLayout, InDataType, OutDataType, 256, 128, 128, S<16, 16>, 1>;
// clang-format on
bool RunColumnToImage(const ExecutionConfig& config, const ck::utils::conv::ConvParam& conv_params)
......
......@@ -11,11 +11,11 @@ using ImageToColumnOp = ck::conv_tensor_rearrange_op::ImageToColumn;
// clang-format off
using DeviceImgToColInstance = ck::tensor_operation::device::DeviceImageToColumnImpl
//#####################| Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp|
//#####################| Spatial| | | | | | | Lengths| Vector| |
//#####################| | | | | | | | | | |
< NDimSpatial, ImLayout, InDataType, OutDataType, 256, 128, 128, S<16, 16>, 1, ImageToColumnOp>;
//#####################| Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
< NDimSpatial, ImLayout, InDataType, OutDataType, 256, 128, 128, S<16, 16>, 1>;
// clang-format on
bool RunImageToColumn(const ExecutionConfig& config, const ck::utils::conv::ConvParam& conv_params)
......
......@@ -16,7 +16,9 @@ namespace device {
*
* This Device operator supports conversion image ([G, N, Di, Hi, Wi, C]) to
* the gemm problem([N * Do * Ho * Wo, Z * Y * X * C]) (Image to Column) and
* conversion gemm form to the image (Column to Image). G must be equal to 1.
* conversion gemm form to the image (Column to Image).
*
* Note that G must be equal to 1.
*
* \tparam NDimSpatial Number of spatial dimensions.
* \tparam ImageLayout Input Layout.
......
......@@ -35,12 +35,13 @@ template <index_t NDimSpatial,
index_t KPerBlock,
typename ThreadClusterLengths,
index_t ScalarPerVector,
typename ConvTensorRearrangeOp>
struct DeviceColumnToImageImpl : public DeviceConvTensorRearrange<NDimSpatial,
typename std::enable_if<NDimSpatial >= 1 && NDimSpatial <= 3, bool>::type = false>
struct DeviceColumnToImageImpl
: public DeviceConvTensorRearrange<NDimSpatial,
ImageLayout,
InputDataType,
OutputDataType,
ConvTensorRearrangeOp>
conv_tensor_rearrange_op::ColumnToImage>
{
static constexpr auto I0 = Number<0>{};
......@@ -476,12 +477,7 @@ struct DeviceColumnToImageImpl : public DeviceConvTensorRearrange<NDimSpatial,
bool IsSupportedArgument(const Argument& arg)
{
using namespace conv_tensor_rearrange_op;
using namespace tensor_layout::convolution;
if constexpr(!std::is_same_v<ConvTensorRearrangeOp, ColumnToImage>)
{
return false;
}
if constexpr(!(std::is_same_v<ImageLayout, GNWC> || std::is_same_v<ImageLayout, GNHWC> ||
std::is_same_v<ImageLayout, GNDHWC>))
{
......
......@@ -33,12 +33,13 @@ template <index_t NDimSpatial,
index_t KPerBlock,
typename ThreadClusterLengths,
index_t ScalarPerVector,
typename ConvTensorRearrangeOp>
struct DeviceImageToColumnImpl : public DeviceConvTensorRearrange<NDimSpatial,
typename std::enable_if<NDimSpatial >= 1 && NDimSpatial <= 3, bool>::type = false>
struct DeviceImageToColumnImpl
: public DeviceConvTensorRearrange<NDimSpatial,
ImageLayout,
InputDataType,
OutputDataType,
ConvTensorRearrangeOp>
conv_tensor_rearrange_op::ImageToColumn>
{
static constexpr auto I0 = Number<0>{};
......@@ -249,12 +250,7 @@ struct DeviceImageToColumnImpl : public DeviceConvTensorRearrange<NDimSpatial,
bool IsSupportedArgument(const Argument& arg)
{
using namespace conv_tensor_rearrange_op;
using namespace tensor_layout::convolution;
if constexpr(!std::is_same_v<ConvTensorRearrangeOp, ImageToColumn>)
{
return false;
}
if constexpr(!(std::is_same_v<ImageLayout, GNWC> || std::is_same_v<ImageLayout, GNHWC> ||
std::is_same_v<ImageLayout, GNDHWC>))
{
......
......@@ -21,19 +21,19 @@ using namespace ck::conv_tensor_rearrange_op;
// Image to Column
// nhwc, 1d
void add_device_image_to_column_nhwc_1d_bf16_instances(
void add_device_image_to_column_nwc_1d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ImageToColumn>>>&
instances);
void add_device_image_to_column_nhwc_1d_f16_instances(
void add_device_image_to_column_nwc_1d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ImageToColumn>>>&
instances);
void add_device_image_to_column_nhwc_1d_f32_instances(
void add_device_image_to_column_nwc_1d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ImageToColumn>>>&
instances);
void add_device_image_to_column_nhwc_1d_i8_instances(
void add_device_image_to_column_nwc_1d_i8_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ImageToColumn>>>&
instances);
// nhwc, 2d
......@@ -54,38 +54,38 @@ void add_device_image_to_column_nhwc_2d_i8_instances(
std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, int8_t, int8_t, ImageToColumn>>>&
instances);
// nhwc, 3d
void add_device_image_to_column_nhwc_3d_bf16_instances(
void add_device_image_to_column_ndhwc_3d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ImageToColumn>>>&
instances);
void add_device_image_to_column_nhwc_3d_f16_instances(
void add_device_image_to_column_ndhwc_3d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ImageToColumn>>>&
instances);
void add_device_image_to_column_nhwc_3d_f32_instances(
void add_device_image_to_column_ndhwc_3d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ImageToColumn>>>&
instances);
void add_device_image_to_column_nhwc_3d_i8_instances(
void add_device_image_to_column_ndhwc_3d_i8_instances(
std::vector<
std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ImageToColumn>>>&
instances);
// Column to Image
// nhwc, 1d
void add_device_column_to_image_nhwc_1d_bf16_instances(
void add_device_column_to_image_nwc_1d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ColumnToImage>>>&
instances);
void add_device_column_to_image_nhwc_1d_f16_instances(
void add_device_column_to_image_nwc_1d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ColumnToImage>>>&
instances);
void add_device_column_to_image_nhwc_1d_f32_instances(
void add_device_column_to_image_nwc_1d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ColumnToImage>>>&
instances);
void add_device_column_to_image_nhwc_1d_i8_instances(
void add_device_column_to_image_nwc_1d_i8_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ColumnToImage>>>&
instances);
// nhwc, 2d
......@@ -106,19 +106,19 @@ void add_device_column_to_image_nhwc_2d_i8_instances(
std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, int8_t, int8_t, ColumnToImage>>>&
instances);
// nhwc, 3d
void add_device_column_to_image_nhwc_3d_bf16_instances(
void add_device_column_to_image_ndhwc_3d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ColumnToImage>>>&
instances);
void add_device_column_to_image_nhwc_3d_f16_instances(
void add_device_column_to_image_ndhwc_3d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ColumnToImage>>>&
instances);
void add_device_column_to_image_nhwc_3d_f32_instances(
void add_device_column_to_image_ndhwc_3d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ColumnToImage>>>&
instances);
void add_device_column_to_image_nhwc_3d_i8_instances(
void add_device_column_to_image_ndhwc_3d_i8_instances(
std::vector<
std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ColumnToImage>>>&
instances);
......@@ -151,20 +151,20 @@ struct DeviceOperationInstanceFactory<
{
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
{
add_device_image_to_column_nhwc_1d_f32_instances(op_ptrs);
add_device_image_to_column_nwc_1d_f32_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
{
add_device_image_to_column_nhwc_1d_f16_instances(op_ptrs);
add_device_image_to_column_nwc_1d_f16_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
is_same_v<OutDataType, ck::bhalf_t>)
{
add_device_image_to_column_nhwc_1d_bf16_instances(op_ptrs);
add_device_image_to_column_nwc_1d_bf16_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
{
add_device_image_to_column_nhwc_1d_i8_instances(op_ptrs);
add_device_image_to_column_nwc_1d_i8_instances(op_ptrs);
}
}
else if constexpr(NumDimSpatial == 2 && is_same_v<ImageLayout, GNHWC>)
......@@ -191,20 +191,20 @@ struct DeviceOperationInstanceFactory<
{
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
{
add_device_image_to_column_nhwc_3d_f32_instances(op_ptrs);
add_device_image_to_column_ndhwc_3d_f32_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
{
add_device_image_to_column_nhwc_3d_f16_instances(op_ptrs);
add_device_image_to_column_ndhwc_3d_f16_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
is_same_v<OutDataType, ck::bhalf_t>)
{
add_device_image_to_column_nhwc_3d_bf16_instances(op_ptrs);
add_device_image_to_column_ndhwc_3d_bf16_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
{
add_device_image_to_column_nhwc_3d_i8_instances(op_ptrs);
add_device_image_to_column_ndhwc_3d_i8_instances(op_ptrs);
}
}
}
......@@ -214,20 +214,20 @@ struct DeviceOperationInstanceFactory<
{
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
{
add_device_column_to_image_nhwc_1d_f32_instances(op_ptrs);
add_device_column_to_image_nwc_1d_f32_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
{
add_device_column_to_image_nhwc_1d_f16_instances(op_ptrs);
add_device_column_to_image_nwc_1d_f16_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
is_same_v<OutDataType, ck::bhalf_t>)
{
add_device_column_to_image_nhwc_1d_bf16_instances(op_ptrs);
add_device_column_to_image_nwc_1d_bf16_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
{
add_device_column_to_image_nhwc_1d_i8_instances(op_ptrs);
add_device_column_to_image_nwc_1d_i8_instances(op_ptrs);
}
}
else if constexpr(NumDimSpatial == 2 && is_same_v<ImageLayout, GNHWC>)
......@@ -254,20 +254,20 @@ struct DeviceOperationInstanceFactory<
{
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
{
add_device_column_to_image_nhwc_3d_f32_instances(op_ptrs);
add_device_column_to_image_ndhwc_3d_f32_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
{
add_device_column_to_image_nhwc_3d_f16_instances(op_ptrs);
add_device_column_to_image_ndhwc_3d_f16_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
is_same_v<OutDataType, ck::bhalf_t>)
{
add_device_column_to_image_nhwc_3d_bf16_instances(op_ptrs);
add_device_column_to_image_ndhwc_3d_bf16_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
{
add_device_column_to_image_nhwc_3d_i8_instances(op_ptrs);
add_device_column_to_image_ndhwc_3d_i8_instances(op_ptrs);
}
}
}
......
......@@ -25,94 +25,94 @@ using S = ck::Sequence<Is...>;
template <ck::index_t NDimSpatial, typename InLayout>
using device_column_to_image_bf16_instances = std::tuple<
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp|
//#####################| Spatial| | | | | | | Lengths| Vector| |
//#####################| | | | | | | | | | |
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 8, 8, S<8, 8>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 16, 16, S<8, 8>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 32, 32, S<8, 8>, 4, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 64, 64, S<8, 8>, 8, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 16, 16, S<8, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 64, S<8, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 32, 64, S<8, 16>, 4, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 128, S<8, 16>, 8, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 16, 16, S<16, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 4, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 4, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 8, ColumnToImage>
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 8, 8, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 16, 16, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 32, 32, S<8, 8>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 64, 64, S<8, 8>, 8>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 16, 16, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 64, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 32, 64, S<8, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 128, S<8, 16>, 8>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 16, 16, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 8>
// clang-format on
>;
template <ck::index_t NDimSpatial, typename InLayout>
using device_column_to_image_f16_instances = std::tuple<
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp|
//#####################| Spatial| | | | | | | Lengths| Vector| |
//#####################| | | | | | | | | | |
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 8, 8, S<8, 8>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 16, 16, S<8, 8>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 32, 32, S<8, 8>, 4, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 64, 64, S<8, 8>, 8, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 16, 16, S<8, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 64, S<8, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 32, 64, S<8, 16>, 4, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 128, S<8, 16>, 8, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 16, 16, S<16, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 4, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 4, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 8, ColumnToImage>
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 8, 8, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 16, 16, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 32, 32, S<8, 8>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 64, 64, S<8, 8>, 8>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 16, 16, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 64, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 32, 64, S<8, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 128, S<8, 16>, 8>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 16, 16, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 8>
// clang-format on
>;
template <ck::index_t NDimSpatial, typename InLayout>
using device_column_to_image_f32_instances = std::tuple<
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp|
//#####################| Spatial| | | | | | | Lengths| Vector| |
//#####################| | | | | | | | | | |
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 64, 8, 8, S<8, 8>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 64, 16, 16, S<8, 8>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 64, 32, 32, S<8, 8>, 4, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 128, 16, 16, S<8, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 128, 64, 64, S<8, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 128, 32, 64, S<8, 16>, 4, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 16, 16, S<16, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 4, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 4, ColumnToImage>
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 64, 8, 8, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 64, 16, 16, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 64, 32, 32, S<8, 8>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 128, 16, 16, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 128, 64, 64, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 128, 32, 64, S<8, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 16, 16, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 4>
// clang-format on
>;
template <ck::index_t NDimSpatial, typename InLayout>
using device_column_to_image_i8_instances = std::tuple<
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp|
//#####################| Spatial| | | | | | | Lengths| Vector| |
//#####################| | | | | | | | | | |
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 8, 8, S<8, 8>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 16, 16, S<8, 8>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 32, 32, S<8, 8>, 4, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 64, 64, S<8, 8>, 8, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 16, 16, S<8, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 64, S<8, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 32, 64, S<8, 16>, 4, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 128, S<8, 16>, 8, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 16, 16, S<16, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 1, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 4, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 4, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 8, ColumnToImage>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 256, 256, S<16, 16>, 16,ColumnToImage>
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 8, 8, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 16, 16, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 32, 32, S<8, 8>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 64, 64, S<8, 8>, 8>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 16, 16, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 64, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 32, 64, S<8, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 128, S<8, 16>, 8>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 16, 16, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 8>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 256, 256, S<16, 16>, 16>
// clang-format on
>;
......
......@@ -25,94 +25,94 @@ using S = ck::Sequence<Is...>;
template <ck::index_t NDimSpatial, typename InLayout>
using device_image_to_column_bf16_instances = std::tuple<
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp|
//#####################| Spatial| | | | | | | Lengths| Vector| |
//#####################| | | | | | | | | | |
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 8, 8, S<8, 8>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 16, 16, S<8, 8>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 32, 32, S<8, 8>, 4, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 64, 64, S<8, 8>, 8, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 16, 16, S<8, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 64, S<8, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 32, 64, S<8, 16>, 4, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 128, S<8, 16>, 8, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 16, 16, S<16, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 4, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 4, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 8, ImageToColumn>
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 8, 8, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 16, 16, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 32, 32, S<8, 8>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 64, 64, S<8, 8>, 8>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 16, 16, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 64, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 32, 64, S<8, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 128, S<8, 16>, 8>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 16, 16, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 8>
// clang-format on
>;
template <ck::index_t NDimSpatial, typename InLayout>
using device_image_to_column_f16_instances = std::tuple<
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp|
//#####################| Spatial| | | | | | | Lengths| Vector| |
//#####################| | | | | | | | | | |
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 8, 8, S<8, 8>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 16, 16, S<8, 8>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 32, 32, S<8, 8>, 4, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 64, 64, S<8, 8>, 8, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 16, 16, S<8, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 64, S<8, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 32, 64, S<8, 16>, 4, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 128, S<8, 16>, 8, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 16, 16, S<16, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 4, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 4, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 8, ImageToColumn>
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 8, 8, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 16, 16, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 32, 32, S<8, 8>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 64, 64, S<8, 8>, 8>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 16, 16, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 64, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 32, 64, S<8, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 128, S<8, 16>, 8>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 16, 16, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 8>
// clang-format on
>;
template <ck::index_t NDimSpatial, typename InLayout>
using device_image_to_column_f32_instances = std::tuple<
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp|
//#####################| Spatial| | | | | | | Lengths| Vector| |
//#####################| | | | | | | | | | |
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 64, 8, 8, S<8, 8>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 64, 16, 16, S<8, 8>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 64, 32, 32, S<8, 8>, 4, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 128, 16, 16, S<8, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 128, 64, 64, S<8, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 128, 32, 64, S<8, 16>, 4, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 16, 16, S<16, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 4, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 4, ImageToColumn>
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 64, 8, 8, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 64, 16, 16, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 64, 32, 32, S<8, 8>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 128, 16, 16, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 128, 64, 64, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 128, 32, 64, S<8, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 16, 16, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 4>
// clang-format on
>;
template <ck::index_t NDimSpatial, typename InLayout>
using device_image_to_column_i8_instances = std::tuple<
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp|
//#####################| Spatial| | | | | | | Lengths| Vector| |
//#####################| | | | | | | | | | |
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 8, 8, S<8, 8>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 16, 16, S<8, 8>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 32, 32, S<8, 8>, 4, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 64, 64, S<8, 8>, 8, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 16, 16, S<8, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 64, S<8, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 32, 64, S<8, 16>, 4, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 128, S<8, 16>, 8, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 16, 16, S<16, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 1, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 4, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 4, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 8, ImageToColumn>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 256, 256, S<16, 16>, 16,ImageToColumn>
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 8, 8, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 16, 16, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 32, 32, S<8, 8>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 64, 64, S<8, 8>, 8>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 16, 16, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 64, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 32, 64, S<8, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 128, S<8, 16>, 8>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 16, 16, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 8>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 256, 256, S<16, 16>, 16>
// clang-format on
>;
......
......@@ -12,7 +12,7 @@ namespace instance {
using namespace ck::conv_tensor_rearrange_op;
#ifdef CK_ENABLE_BF16
void add_device_column_to_image_nhwc_1d_bf16_instances(
void add_device_column_to_image_nwc_1d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ColumnToImage>>>&
instances)
{
......@@ -21,7 +21,7 @@ void add_device_column_to_image_nhwc_1d_bf16_instances(
#endif
#ifdef CK_ENABLE_FP16
void add_device_column_to_image_nhwc_1d_f16_instances(
void add_device_column_to_image_nwc_1d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ColumnToImage>>>&
instances)
{
......@@ -30,7 +30,7 @@ void add_device_column_to_image_nhwc_1d_f16_instances(
#endif
#ifdef CK_ENABLE_FP32
void add_device_column_to_image_nhwc_1d_f32_instances(
void add_device_column_to_image_nwc_1d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ColumnToImage>>>&
instances)
{
......@@ -39,7 +39,7 @@ void add_device_column_to_image_nhwc_1d_f32_instances(
#endif
#ifdef CK_ENABLE_INT8
void add_device_column_to_image_nhwc_1d_i8_instances(
void add_device_column_to_image_nwc_1d_i8_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ColumnToImage>>>&
instances)
{
......
......@@ -12,7 +12,7 @@ namespace instance {
using namespace ck::conv_tensor_rearrange_op;
#ifdef CK_ENABLE_BF16
void add_device_column_to_image_nhwc_3d_bf16_instances(
void add_device_column_to_image_ndhwc_3d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ColumnToImage>>>&
instances)
{
......@@ -21,7 +21,7 @@ void add_device_column_to_image_nhwc_3d_bf16_instances(
#endif
#ifdef CK_ENABLE_FP16
void add_device_column_to_image_nhwc_3d_f16_instances(
void add_device_column_to_image_ndhwc_3d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ColumnToImage>>>&
instances)
{
......@@ -30,7 +30,7 @@ void add_device_column_to_image_nhwc_3d_f16_instances(
#endif
#ifdef CK_ENABLE_FP32
void add_device_column_to_image_nhwc_3d_f32_instances(
void add_device_column_to_image_ndhwc_3d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ColumnToImage>>>&
instances)
{
......@@ -39,7 +39,7 @@ void add_device_column_to_image_nhwc_3d_f32_instances(
#endif
#ifdef CK_ENABLE_INT8
void add_device_column_to_image_nhwc_3d_i8_instances(
void add_device_column_to_image_ndhwc_3d_i8_instances(
std::vector<
std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ColumnToImage>>>&
instances)
......
......@@ -12,7 +12,7 @@ namespace instance {
using namespace ck::conv_tensor_rearrange_op;
#ifdef CK_ENABLE_BF16
void add_device_image_to_column_nhwc_1d_bf16_instances(
void add_device_image_to_column_nwc_1d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ImageToColumn>>>&
instances)
{
......@@ -21,7 +21,7 @@ void add_device_image_to_column_nhwc_1d_bf16_instances(
#endif
#ifdef CK_ENABLE_FP16
void add_device_image_to_column_nhwc_1d_f16_instances(
void add_device_image_to_column_nwc_1d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ImageToColumn>>>&
instances)
{
......@@ -30,7 +30,7 @@ void add_device_image_to_column_nhwc_1d_f16_instances(
#endif
#ifdef CK_ENABLE_FP32
void add_device_image_to_column_nhwc_1d_f32_instances(
void add_device_image_to_column_nwc_1d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ImageToColumn>>>&
instances)
{
......@@ -39,7 +39,7 @@ void add_device_image_to_column_nhwc_1d_f32_instances(
#endif
#ifdef CK_ENABLE_INT8
void add_device_image_to_column_nhwc_1d_i8_instances(
void add_device_image_to_column_nwc_1d_i8_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ImageToColumn>>>&
instances)
{
......
......@@ -12,7 +12,7 @@ namespace instance {
using namespace ck::conv_tensor_rearrange_op;
#ifdef CK_ENABLE_BF16
void add_device_image_to_column_nhwc_3d_bf16_instances(
void add_device_image_to_column_ndhwc_3d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ImageToColumn>>>&
instances)
{
......@@ -21,7 +21,7 @@ void add_device_image_to_column_nhwc_3d_bf16_instances(
#endif
#ifdef CK_ENABLE_FP16
void add_device_image_to_column_nhwc_3d_f16_instances(
void add_device_image_to_column_ndhwc_3d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ImageToColumn>>>&
instances)
{
......@@ -30,7 +30,7 @@ void add_device_image_to_column_nhwc_3d_f16_instances(
#endif
#ifdef CK_ENABLE_FP32
void add_device_image_to_column_nhwc_3d_f32_instances(
void add_device_image_to_column_ndhwc_3d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ImageToColumn>>>&
instances)
{
......@@ -39,7 +39,7 @@ void add_device_image_to_column_nhwc_3d_f32_instances(
#endif
#ifdef CK_ENABLE_INT8
void add_device_image_to_column_nhwc_3d_i8_instances(
void add_device_image_to_column_ndhwc_3d_i8_instances(
std::vector<
std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ImageToColumn>>>&
instances)
......
......@@ -35,17 +35,17 @@ class TestConvTensorRearrangeInterface : public ::testing::Test
// clang-format off
using DeviceImgToColInstance = ck::tensor_operation::device::DeviceImageToColumnImpl
// Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor|
// Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp|
// Spatial| | | | | | | Lengths| Vector| |
// | | | | | | | | | |
< NDimSpatial, ImLayout, DataType, DataType, 256, 128, 128, S<16, 16>,ScalarPerVector, ImageToColumn>;
// Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
// Dim| | | | Size| Block| Block| Cluster| Per|
// Spatial| | | | | | | Lengths| Vector|
// | | | | | | | | |
< NDimSpatial, ImLayout, DataType, DataType, 256, 128, 128, S<16, 16>,ScalarPerVector>;
using DeviceColToimgInstance = ck::tensor_operation::device::DeviceColumnToImageImpl
// Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor|
// Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp|
// Spatial| | | | | | | Lengths| Vector| |
// | | | | | | | | | |
< NDimSpatial, ImLayout, DataType, DataType, 256, 128, 128, S<16, 16>,ScalarPerVector, ColumnToImage>;
// Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
// Dim| | | | Size| Block| Block| Cluster| Per|
// Spatial| | | | | | | Lengths| Vector|
// | | | | | | | | |
< NDimSpatial, ImLayout, DataType, DataType, 256, 128, 128, S<16, 16>,ScalarPerVector>;
// clang-format on
ck::utils::conv::ConvParam conv_param;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment