Commit 98f349c6 authored by Bartlomiej Kocot's avatar Bartlomiej Kocot
Browse files

Minor stylistic fixes

parent 728b8695
...@@ -11,11 +11,11 @@ using ColumnToImageOp = ck::conv_tensor_rearrange_op::ColumnToImage; ...@@ -11,11 +11,11 @@ using ColumnToImageOp = ck::conv_tensor_rearrange_op::ColumnToImage;
// clang-format off // clang-format off
using DeviceColToImgInstance = ck::tensor_operation::device::DeviceColumnToImageImpl using DeviceColToImgInstance = ck::tensor_operation::device::DeviceColumnToImageImpl
//#####################| Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor| //#####################| Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp| //#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector| | //#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | | | //#####################| | | | | | | | | |
< NDimSpatial, ImLayout, InDataType, OutDataType, 256, 128, 128, S<16, 16>, 1, ColumnToImageOp>; < NDimSpatial, ImLayout, InDataType, OutDataType, 256, 128, 128, S<16, 16>, 1>;
// clang-format on // clang-format on
bool RunColumnToImage(const ExecutionConfig& config, const ck::utils::conv::ConvParam& conv_params) bool RunColumnToImage(const ExecutionConfig& config, const ck::utils::conv::ConvParam& conv_params)
......
...@@ -11,11 +11,11 @@ using ImageToColumnOp = ck::conv_tensor_rearrange_op::ImageToColumn; ...@@ -11,11 +11,11 @@ using ImageToColumnOp = ck::conv_tensor_rearrange_op::ImageToColumn;
// clang-format off // clang-format off
using DeviceImgToColInstance = ck::tensor_operation::device::DeviceImageToColumnImpl using DeviceImgToColInstance = ck::tensor_operation::device::DeviceImageToColumnImpl
//#####################| Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor| //#####################| Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp| //#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector| | //#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | | | //#####################| | | | | | | | | |
< NDimSpatial, ImLayout, InDataType, OutDataType, 256, 128, 128, S<16, 16>, 1, ImageToColumnOp>; < NDimSpatial, ImLayout, InDataType, OutDataType, 256, 128, 128, S<16, 16>, 1>;
// clang-format on // clang-format on
bool RunImageToColumn(const ExecutionConfig& config, const ck::utils::conv::ConvParam& conv_params) bool RunImageToColumn(const ExecutionConfig& config, const ck::utils::conv::ConvParam& conv_params)
......
...@@ -16,7 +16,9 @@ namespace device { ...@@ -16,7 +16,9 @@ namespace device {
* *
* This Device operator supports conversion image ([G, N, Di, Hi, Wi, C]) to * This Device operator supports conversion image ([G, N, Di, Hi, Wi, C]) to
* the gemm problem([N * Do * Ho * Wo, Z * Y * X * C]) (Image to Column) and * the gemm problem([N * Do * Ho * Wo, Z * Y * X * C]) (Image to Column) and
* conversion gemm form to the image (Column to Image). G must be equal to 1. * conversion gemm form to the image (Column to Image).
*
* Note that G must be equal to 1.
* *
* \tparam NDimSpatial Number of spatial dimensions. * \tparam NDimSpatial Number of spatial dimensions.
* \tparam ImageLayout Input Layout. * \tparam ImageLayout Input Layout.
......
...@@ -35,12 +35,13 @@ template <index_t NDimSpatial, ...@@ -35,12 +35,13 @@ template <index_t NDimSpatial,
index_t KPerBlock, index_t KPerBlock,
typename ThreadClusterLengths, typename ThreadClusterLengths,
index_t ScalarPerVector, index_t ScalarPerVector,
typename ConvTensorRearrangeOp> typename std::enable_if<NDimSpatial >= 1 && NDimSpatial <= 3, bool>::type = false>
struct DeviceColumnToImageImpl : public DeviceConvTensorRearrange<NDimSpatial, struct DeviceColumnToImageImpl
: public DeviceConvTensorRearrange<NDimSpatial,
ImageLayout, ImageLayout,
InputDataType, InputDataType,
OutputDataType, OutputDataType,
ConvTensorRearrangeOp> conv_tensor_rearrange_op::ColumnToImage>
{ {
static constexpr auto I0 = Number<0>{}; static constexpr auto I0 = Number<0>{};
...@@ -476,12 +477,7 @@ struct DeviceColumnToImageImpl : public DeviceConvTensorRearrange<NDimSpatial, ...@@ -476,12 +477,7 @@ struct DeviceColumnToImageImpl : public DeviceConvTensorRearrange<NDimSpatial,
bool IsSupportedArgument(const Argument& arg) bool IsSupportedArgument(const Argument& arg)
{ {
using namespace conv_tensor_rearrange_op;
using namespace tensor_layout::convolution; using namespace tensor_layout::convolution;
if constexpr(!std::is_same_v<ConvTensorRearrangeOp, ColumnToImage>)
{
return false;
}
if constexpr(!(std::is_same_v<ImageLayout, GNWC> || std::is_same_v<ImageLayout, GNHWC> || if constexpr(!(std::is_same_v<ImageLayout, GNWC> || std::is_same_v<ImageLayout, GNHWC> ||
std::is_same_v<ImageLayout, GNDHWC>)) std::is_same_v<ImageLayout, GNDHWC>))
{ {
......
...@@ -33,12 +33,13 @@ template <index_t NDimSpatial, ...@@ -33,12 +33,13 @@ template <index_t NDimSpatial,
index_t KPerBlock, index_t KPerBlock,
typename ThreadClusterLengths, typename ThreadClusterLengths,
index_t ScalarPerVector, index_t ScalarPerVector,
typename ConvTensorRearrangeOp> typename std::enable_if<NDimSpatial >= 1 && NDimSpatial <= 3, bool>::type = false>
struct DeviceImageToColumnImpl : public DeviceConvTensorRearrange<NDimSpatial, struct DeviceImageToColumnImpl
: public DeviceConvTensorRearrange<NDimSpatial,
ImageLayout, ImageLayout,
InputDataType, InputDataType,
OutputDataType, OutputDataType,
ConvTensorRearrangeOp> conv_tensor_rearrange_op::ImageToColumn>
{ {
static constexpr auto I0 = Number<0>{}; static constexpr auto I0 = Number<0>{};
...@@ -249,12 +250,7 @@ struct DeviceImageToColumnImpl : public DeviceConvTensorRearrange<NDimSpatial, ...@@ -249,12 +250,7 @@ struct DeviceImageToColumnImpl : public DeviceConvTensorRearrange<NDimSpatial,
bool IsSupportedArgument(const Argument& arg) bool IsSupportedArgument(const Argument& arg)
{ {
using namespace conv_tensor_rearrange_op;
using namespace tensor_layout::convolution; using namespace tensor_layout::convolution;
if constexpr(!std::is_same_v<ConvTensorRearrangeOp, ImageToColumn>)
{
return false;
}
if constexpr(!(std::is_same_v<ImageLayout, GNWC> || std::is_same_v<ImageLayout, GNHWC> || if constexpr(!(std::is_same_v<ImageLayout, GNWC> || std::is_same_v<ImageLayout, GNHWC> ||
std::is_same_v<ImageLayout, GNDHWC>)) std::is_same_v<ImageLayout, GNDHWC>))
{ {
......
...@@ -21,19 +21,19 @@ using namespace ck::conv_tensor_rearrange_op; ...@@ -21,19 +21,19 @@ using namespace ck::conv_tensor_rearrange_op;
// Image to Column // Image to Column
// nhwc, 1d // nhwc, 1d
void add_device_image_to_column_nhwc_1d_bf16_instances( void add_device_image_to_column_nwc_1d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ImageToColumn>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ImageToColumn>>>&
instances); instances);
void add_device_image_to_column_nhwc_1d_f16_instances( void add_device_image_to_column_nwc_1d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ImageToColumn>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ImageToColumn>>>&
instances); instances);
void add_device_image_to_column_nhwc_1d_f32_instances( void add_device_image_to_column_nwc_1d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ImageToColumn>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ImageToColumn>>>&
instances); instances);
void add_device_image_to_column_nhwc_1d_i8_instances( void add_device_image_to_column_nwc_1d_i8_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ImageToColumn>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ImageToColumn>>>&
instances); instances);
// nhwc, 2d // nhwc, 2d
...@@ -54,38 +54,38 @@ void add_device_image_to_column_nhwc_2d_i8_instances( ...@@ -54,38 +54,38 @@ void add_device_image_to_column_nhwc_2d_i8_instances(
std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, int8_t, int8_t, ImageToColumn>>>& std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, int8_t, int8_t, ImageToColumn>>>&
instances); instances);
// nhwc, 3d // nhwc, 3d
void add_device_image_to_column_nhwc_3d_bf16_instances( void add_device_image_to_column_ndhwc_3d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ImageToColumn>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ImageToColumn>>>&
instances); instances);
void add_device_image_to_column_nhwc_3d_f16_instances( void add_device_image_to_column_ndhwc_3d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ImageToColumn>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ImageToColumn>>>&
instances); instances);
void add_device_image_to_column_nhwc_3d_f32_instances( void add_device_image_to_column_ndhwc_3d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ImageToColumn>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ImageToColumn>>>&
instances); instances);
void add_device_image_to_column_nhwc_3d_i8_instances( void add_device_image_to_column_ndhwc_3d_i8_instances(
std::vector< std::vector<
std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ImageToColumn>>>& std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ImageToColumn>>>&
instances); instances);
// Column to Image // Column to Image
// nhwc, 1d // nhwc, 1d
void add_device_column_to_image_nhwc_1d_bf16_instances( void add_device_column_to_image_nwc_1d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ColumnToImage>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ColumnToImage>>>&
instances); instances);
void add_device_column_to_image_nhwc_1d_f16_instances( void add_device_column_to_image_nwc_1d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ColumnToImage>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ColumnToImage>>>&
instances); instances);
void add_device_column_to_image_nhwc_1d_f32_instances( void add_device_column_to_image_nwc_1d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ColumnToImage>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ColumnToImage>>>&
instances); instances);
void add_device_column_to_image_nhwc_1d_i8_instances( void add_device_column_to_image_nwc_1d_i8_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ColumnToImage>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ColumnToImage>>>&
instances); instances);
// nhwc, 2d // nhwc, 2d
...@@ -106,19 +106,19 @@ void add_device_column_to_image_nhwc_2d_i8_instances( ...@@ -106,19 +106,19 @@ void add_device_column_to_image_nhwc_2d_i8_instances(
std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, int8_t, int8_t, ColumnToImage>>>& std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, int8_t, int8_t, ColumnToImage>>>&
instances); instances);
// nhwc, 3d // nhwc, 3d
void add_device_column_to_image_nhwc_3d_bf16_instances( void add_device_column_to_image_ndhwc_3d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ColumnToImage>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ColumnToImage>>>&
instances); instances);
void add_device_column_to_image_nhwc_3d_f16_instances( void add_device_column_to_image_ndhwc_3d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ColumnToImage>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ColumnToImage>>>&
instances); instances);
void add_device_column_to_image_nhwc_3d_f32_instances( void add_device_column_to_image_ndhwc_3d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ColumnToImage>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ColumnToImage>>>&
instances); instances);
void add_device_column_to_image_nhwc_3d_i8_instances( void add_device_column_to_image_ndhwc_3d_i8_instances(
std::vector< std::vector<
std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ColumnToImage>>>& std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ColumnToImage>>>&
instances); instances);
...@@ -151,20 +151,20 @@ struct DeviceOperationInstanceFactory< ...@@ -151,20 +151,20 @@ struct DeviceOperationInstanceFactory<
{ {
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>) if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
{ {
add_device_image_to_column_nhwc_1d_f32_instances(op_ptrs); add_device_image_to_column_nwc_1d_f32_instances(op_ptrs);
} }
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>) else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
{ {
add_device_image_to_column_nhwc_1d_f16_instances(op_ptrs); add_device_image_to_column_nwc_1d_f16_instances(op_ptrs);
} }
else if constexpr(is_same_v<InDataType, ck::bhalf_t> && else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
is_same_v<OutDataType, ck::bhalf_t>) is_same_v<OutDataType, ck::bhalf_t>)
{ {
add_device_image_to_column_nhwc_1d_bf16_instances(op_ptrs); add_device_image_to_column_nwc_1d_bf16_instances(op_ptrs);
} }
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>) else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
{ {
add_device_image_to_column_nhwc_1d_i8_instances(op_ptrs); add_device_image_to_column_nwc_1d_i8_instances(op_ptrs);
} }
} }
else if constexpr(NumDimSpatial == 2 && is_same_v<ImageLayout, GNHWC>) else if constexpr(NumDimSpatial == 2 && is_same_v<ImageLayout, GNHWC>)
...@@ -191,20 +191,20 @@ struct DeviceOperationInstanceFactory< ...@@ -191,20 +191,20 @@ struct DeviceOperationInstanceFactory<
{ {
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>) if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
{ {
add_device_image_to_column_nhwc_3d_f32_instances(op_ptrs); add_device_image_to_column_ndhwc_3d_f32_instances(op_ptrs);
} }
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>) else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
{ {
add_device_image_to_column_nhwc_3d_f16_instances(op_ptrs); add_device_image_to_column_ndhwc_3d_f16_instances(op_ptrs);
} }
else if constexpr(is_same_v<InDataType, ck::bhalf_t> && else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
is_same_v<OutDataType, ck::bhalf_t>) is_same_v<OutDataType, ck::bhalf_t>)
{ {
add_device_image_to_column_nhwc_3d_bf16_instances(op_ptrs); add_device_image_to_column_ndhwc_3d_bf16_instances(op_ptrs);
} }
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>) else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
{ {
add_device_image_to_column_nhwc_3d_i8_instances(op_ptrs); add_device_image_to_column_ndhwc_3d_i8_instances(op_ptrs);
} }
} }
} }
...@@ -214,20 +214,20 @@ struct DeviceOperationInstanceFactory< ...@@ -214,20 +214,20 @@ struct DeviceOperationInstanceFactory<
{ {
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>) if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
{ {
add_device_column_to_image_nhwc_1d_f32_instances(op_ptrs); add_device_column_to_image_nwc_1d_f32_instances(op_ptrs);
} }
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>) else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
{ {
add_device_column_to_image_nhwc_1d_f16_instances(op_ptrs); add_device_column_to_image_nwc_1d_f16_instances(op_ptrs);
} }
else if constexpr(is_same_v<InDataType, ck::bhalf_t> && else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
is_same_v<OutDataType, ck::bhalf_t>) is_same_v<OutDataType, ck::bhalf_t>)
{ {
add_device_column_to_image_nhwc_1d_bf16_instances(op_ptrs); add_device_column_to_image_nwc_1d_bf16_instances(op_ptrs);
} }
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>) else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
{ {
add_device_column_to_image_nhwc_1d_i8_instances(op_ptrs); add_device_column_to_image_nwc_1d_i8_instances(op_ptrs);
} }
} }
else if constexpr(NumDimSpatial == 2 && is_same_v<ImageLayout, GNHWC>) else if constexpr(NumDimSpatial == 2 && is_same_v<ImageLayout, GNHWC>)
...@@ -254,20 +254,20 @@ struct DeviceOperationInstanceFactory< ...@@ -254,20 +254,20 @@ struct DeviceOperationInstanceFactory<
{ {
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>) if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
{ {
add_device_column_to_image_nhwc_3d_f32_instances(op_ptrs); add_device_column_to_image_ndhwc_3d_f32_instances(op_ptrs);
} }
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>) else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
{ {
add_device_column_to_image_nhwc_3d_f16_instances(op_ptrs); add_device_column_to_image_ndhwc_3d_f16_instances(op_ptrs);
} }
else if constexpr(is_same_v<InDataType, ck::bhalf_t> && else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
is_same_v<OutDataType, ck::bhalf_t>) is_same_v<OutDataType, ck::bhalf_t>)
{ {
add_device_column_to_image_nhwc_3d_bf16_instances(op_ptrs); add_device_column_to_image_ndhwc_3d_bf16_instances(op_ptrs);
} }
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>) else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
{ {
add_device_column_to_image_nhwc_3d_i8_instances(op_ptrs); add_device_column_to_image_ndhwc_3d_i8_instances(op_ptrs);
} }
} }
} }
......
...@@ -25,94 +25,94 @@ using S = ck::Sequence<Is...>; ...@@ -25,94 +25,94 @@ using S = ck::Sequence<Is...>;
template <ck::index_t NDimSpatial, typename InLayout> template <ck::index_t NDimSpatial, typename InLayout>
using device_column_to_image_bf16_instances = std::tuple< using device_column_to_image_bf16_instances = std::tuple<
// clang-format off // clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor| //#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp| //#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector| | //#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | | | //#####################| | | | | | | | | |
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 8, 8, S<8, 8>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 8, 8, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 16, 16, S<8, 8>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 16, 16, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 32, 32, S<8, 8>, 4, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 32, 32, S<8, 8>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 64, 64, S<8, 8>, 8, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 64, 64, S<8, 8>, 8>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 16, 16, S<8, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 16, 16, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 64, S<8, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 64, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 32, 64, S<8, 16>, 4, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 32, 64, S<8, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 128, S<8, 16>, 8, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 128, S<8, 16>, 8>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 16, 16, S<16, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 16, 16, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 4, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 4, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 8, ColumnToImage> DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 8>
// clang-format on // clang-format on
>; >;
template <ck::index_t NDimSpatial, typename InLayout> template <ck::index_t NDimSpatial, typename InLayout>
using device_column_to_image_f16_instances = std::tuple< using device_column_to_image_f16_instances = std::tuple<
// clang-format off // clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor| //#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp| //#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector| | //#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | | | //#####################| | | | | | | | | |
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 8, 8, S<8, 8>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 8, 8, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 16, 16, S<8, 8>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 16, 16, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 32, 32, S<8, 8>, 4, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 32, 32, S<8, 8>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 64, 64, S<8, 8>, 8, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 64, 64, S<8, 8>, 8>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 16, 16, S<8, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 16, 16, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 64, S<8, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 64, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 32, 64, S<8, 16>, 4, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 32, 64, S<8, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 128, S<8, 16>, 8, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 128, S<8, 16>, 8>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 16, 16, S<16, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 16, 16, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 4, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 4, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 8, ColumnToImage> DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 8>
// clang-format on // clang-format on
>; >;
template <ck::index_t NDimSpatial, typename InLayout> template <ck::index_t NDimSpatial, typename InLayout>
using device_column_to_image_f32_instances = std::tuple< using device_column_to_image_f32_instances = std::tuple<
// clang-format off // clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor| //#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp| //#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector| | //#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | | | //#####################| | | | | | | | | |
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 64, 8, 8, S<8, 8>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 64, 8, 8, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 64, 16, 16, S<8, 8>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 64, 16, 16, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 64, 32, 32, S<8, 8>, 4, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 64, 32, 32, S<8, 8>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 128, 16, 16, S<8, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 128, 16, 16, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 128, 64, 64, S<8, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 128, 64, 64, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 128, 32, 64, S<8, 16>, 4, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 128, 32, 64, S<8, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 16, 16, S<16, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 16, 16, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 4, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 4, ColumnToImage> DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 4>
// clang-format on // clang-format on
>; >;
template <ck::index_t NDimSpatial, typename InLayout> template <ck::index_t NDimSpatial, typename InLayout>
using device_column_to_image_i8_instances = std::tuple< using device_column_to_image_i8_instances = std::tuple<
// clang-format off // clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor| //#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp| //#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector| | //#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | | | //#####################| | | | | | | | | |
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 8, 8, S<8, 8>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 8, 8, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 16, 16, S<8, 8>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 16, 16, S<8, 8>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 32, 32, S<8, 8>, 4, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 32, 32, S<8, 8>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 64, 64, S<8, 8>, 8, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 64, 64, S<8, 8>, 8>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 16, 16, S<8, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 16, 16, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 64, S<8, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 64, S<8, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 32, 64, S<8, 16>, 4, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 32, 64, S<8, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 128, S<8, 16>, 8, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 128, S<8, 16>, 8>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 16, 16, S<16, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 16, 16, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 1, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 1>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 4, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 4, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 4>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 8, ColumnToImage>, DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 8>,
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 256, 256, S<16, 16>, 16,ColumnToImage> DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 256, 256, S<16, 16>, 16>
// clang-format on // clang-format on
>; >;
......
...@@ -25,94 +25,94 @@ using S = ck::Sequence<Is...>; ...@@ -25,94 +25,94 @@ using S = ck::Sequence<Is...>;
template <ck::index_t NDimSpatial, typename InLayout> template <ck::index_t NDimSpatial, typename InLayout>
using device_image_to_column_bf16_instances = std::tuple< using device_image_to_column_bf16_instances = std::tuple<
// clang-format off // clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor| //#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp| //#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector| | //#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | | | //#####################| | | | | | | | | |
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 8, 8, S<8, 8>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 8, 8, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 16, 16, S<8, 8>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 16, 16, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 32, 32, S<8, 8>, 4, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 32, 32, S<8, 8>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 64, 64, S<8, 8>, 8, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 64, 64, S<8, 8>, 8>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 16, 16, S<8, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 16, 16, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 64, S<8, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 64, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 32, 64, S<8, 16>, 4, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 32, 64, S<8, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 128, S<8, 16>, 8, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 128, S<8, 16>, 8>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 16, 16, S<16, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 16, 16, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 4, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 4, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 8, ImageToColumn> DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 8>
// clang-format on // clang-format on
>; >;
template <ck::index_t NDimSpatial, typename InLayout> template <ck::index_t NDimSpatial, typename InLayout>
using device_image_to_column_f16_instances = std::tuple< using device_image_to_column_f16_instances = std::tuple<
// clang-format off // clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor| //#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp| //#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector| | //#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | | | //#####################| | | | | | | | | |
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 8, 8, S<8, 8>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 8, 8, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 16, 16, S<8, 8>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 16, 16, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 32, 32, S<8, 8>, 4, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 32, 32, S<8, 8>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 64, 64, S<8, 8>, 8, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 64, 64, S<8, 8>, 8>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 16, 16, S<8, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 16, 16, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 64, S<8, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 64, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 32, 64, S<8, 16>, 4, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 32, 64, S<8, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 128, S<8, 16>, 8, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 128, S<8, 16>, 8>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 16, 16, S<16, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 16, 16, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 4, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 4, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 8, ImageToColumn> DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 8>
// clang-format on // clang-format on
>; >;
template <ck::index_t NDimSpatial, typename InLayout> template <ck::index_t NDimSpatial, typename InLayout>
using device_image_to_column_f32_instances = std::tuple< using device_image_to_column_f32_instances = std::tuple<
// clang-format off // clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor| //#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp| //#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector| | //#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | | | //#####################| | | | | | | | | |
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 64, 8, 8, S<8, 8>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 64, 8, 8, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 64, 16, 16, S<8, 8>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 64, 16, 16, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 64, 32, 32, S<8, 8>, 4, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 64, 32, 32, S<8, 8>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 128, 16, 16, S<8, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 128, 16, 16, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 128, 64, 64, S<8, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 128, 64, 64, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 128, 32, 64, S<8, 16>, 4, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 128, 32, 64, S<8, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 16, 16, S<16, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 16, 16, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 4, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 4, ImageToColumn> DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 4>
// clang-format on // clang-format on
>; >;
template <ck::index_t NDimSpatial, typename InLayout> template <ck::index_t NDimSpatial, typename InLayout>
using device_image_to_column_i8_instances = std::tuple< using device_image_to_column_i8_instances = std::tuple<
// clang-format off // clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor| //#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp| //#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector| | //#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | | | //#####################| | | | | | | | | |
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 8, 8, S<8, 8>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 8, 8, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 16, 16, S<8, 8>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 16, 16, S<8, 8>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 32, 32, S<8, 8>, 4, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 32, 32, S<8, 8>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 64, 64, S<8, 8>, 8, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 64, 64, S<8, 8>, 8>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 16, 16, S<8, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 16, 16, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 64, S<8, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 64, S<8, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 32, 64, S<8, 16>, 4, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 32, 64, S<8, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 128, S<8, 16>, 8, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 128, S<8, 16>, 8>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 16, 16, S<16, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 16, 16, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 1, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 1>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 4, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 4, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 4>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 8, ImageToColumn>, DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 8>,
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 256, 256, S<16, 16>, 16,ImageToColumn> DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 256, 256, S<16, 16>, 16>
// clang-format on // clang-format on
>; >;
......
...@@ -12,7 +12,7 @@ namespace instance { ...@@ -12,7 +12,7 @@ namespace instance {
using namespace ck::conv_tensor_rearrange_op; using namespace ck::conv_tensor_rearrange_op;
#ifdef CK_ENABLE_BF16 #ifdef CK_ENABLE_BF16
void add_device_column_to_image_nhwc_1d_bf16_instances( void add_device_column_to_image_nwc_1d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ColumnToImage>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ColumnToImage>>>&
instances) instances)
{ {
...@@ -21,7 +21,7 @@ void add_device_column_to_image_nhwc_1d_bf16_instances( ...@@ -21,7 +21,7 @@ void add_device_column_to_image_nhwc_1d_bf16_instances(
#endif #endif
#ifdef CK_ENABLE_FP16 #ifdef CK_ENABLE_FP16
void add_device_column_to_image_nhwc_1d_f16_instances( void add_device_column_to_image_nwc_1d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ColumnToImage>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ColumnToImage>>>&
instances) instances)
{ {
...@@ -30,7 +30,7 @@ void add_device_column_to_image_nhwc_1d_f16_instances( ...@@ -30,7 +30,7 @@ void add_device_column_to_image_nhwc_1d_f16_instances(
#endif #endif
#ifdef CK_ENABLE_FP32 #ifdef CK_ENABLE_FP32
void add_device_column_to_image_nhwc_1d_f32_instances( void add_device_column_to_image_nwc_1d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ColumnToImage>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ColumnToImage>>>&
instances) instances)
{ {
...@@ -39,7 +39,7 @@ void add_device_column_to_image_nhwc_1d_f32_instances( ...@@ -39,7 +39,7 @@ void add_device_column_to_image_nhwc_1d_f32_instances(
#endif #endif
#ifdef CK_ENABLE_INT8 #ifdef CK_ENABLE_INT8
void add_device_column_to_image_nhwc_1d_i8_instances( void add_device_column_to_image_nwc_1d_i8_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ColumnToImage>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ColumnToImage>>>&
instances) instances)
{ {
......
...@@ -12,7 +12,7 @@ namespace instance { ...@@ -12,7 +12,7 @@ namespace instance {
using namespace ck::conv_tensor_rearrange_op; using namespace ck::conv_tensor_rearrange_op;
#ifdef CK_ENABLE_BF16 #ifdef CK_ENABLE_BF16
void add_device_column_to_image_nhwc_3d_bf16_instances( void add_device_column_to_image_ndhwc_3d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ColumnToImage>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ColumnToImage>>>&
instances) instances)
{ {
...@@ -21,7 +21,7 @@ void add_device_column_to_image_nhwc_3d_bf16_instances( ...@@ -21,7 +21,7 @@ void add_device_column_to_image_nhwc_3d_bf16_instances(
#endif #endif
#ifdef CK_ENABLE_FP16 #ifdef CK_ENABLE_FP16
void add_device_column_to_image_nhwc_3d_f16_instances( void add_device_column_to_image_ndhwc_3d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ColumnToImage>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ColumnToImage>>>&
instances) instances)
{ {
...@@ -30,7 +30,7 @@ void add_device_column_to_image_nhwc_3d_f16_instances( ...@@ -30,7 +30,7 @@ void add_device_column_to_image_nhwc_3d_f16_instances(
#endif #endif
#ifdef CK_ENABLE_FP32 #ifdef CK_ENABLE_FP32
void add_device_column_to_image_nhwc_3d_f32_instances( void add_device_column_to_image_ndhwc_3d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ColumnToImage>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ColumnToImage>>>&
instances) instances)
{ {
...@@ -39,7 +39,7 @@ void add_device_column_to_image_nhwc_3d_f32_instances( ...@@ -39,7 +39,7 @@ void add_device_column_to_image_nhwc_3d_f32_instances(
#endif #endif
#ifdef CK_ENABLE_INT8 #ifdef CK_ENABLE_INT8
void add_device_column_to_image_nhwc_3d_i8_instances( void add_device_column_to_image_ndhwc_3d_i8_instances(
std::vector< std::vector<
std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ColumnToImage>>>& std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ColumnToImage>>>&
instances) instances)
......
...@@ -12,7 +12,7 @@ namespace instance { ...@@ -12,7 +12,7 @@ namespace instance {
using namespace ck::conv_tensor_rearrange_op; using namespace ck::conv_tensor_rearrange_op;
#ifdef CK_ENABLE_BF16 #ifdef CK_ENABLE_BF16
void add_device_image_to_column_nhwc_1d_bf16_instances( void add_device_image_to_column_nwc_1d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ImageToColumn>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ImageToColumn>>>&
instances) instances)
{ {
...@@ -21,7 +21,7 @@ void add_device_image_to_column_nhwc_1d_bf16_instances( ...@@ -21,7 +21,7 @@ void add_device_image_to_column_nhwc_1d_bf16_instances(
#endif #endif
#ifdef CK_ENABLE_FP16 #ifdef CK_ENABLE_FP16
void add_device_image_to_column_nhwc_1d_f16_instances( void add_device_image_to_column_nwc_1d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ImageToColumn>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ImageToColumn>>>&
instances) instances)
{ {
...@@ -30,7 +30,7 @@ void add_device_image_to_column_nhwc_1d_f16_instances( ...@@ -30,7 +30,7 @@ void add_device_image_to_column_nhwc_1d_f16_instances(
#endif #endif
#ifdef CK_ENABLE_FP32 #ifdef CK_ENABLE_FP32
void add_device_image_to_column_nhwc_1d_f32_instances( void add_device_image_to_column_nwc_1d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ImageToColumn>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ImageToColumn>>>&
instances) instances)
{ {
...@@ -39,7 +39,7 @@ void add_device_image_to_column_nhwc_1d_f32_instances( ...@@ -39,7 +39,7 @@ void add_device_image_to_column_nhwc_1d_f32_instances(
#endif #endif
#ifdef CK_ENABLE_INT8 #ifdef CK_ENABLE_INT8
void add_device_image_to_column_nhwc_1d_i8_instances( void add_device_image_to_column_nwc_1d_i8_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ImageToColumn>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ImageToColumn>>>&
instances) instances)
{ {
......
...@@ -12,7 +12,7 @@ namespace instance { ...@@ -12,7 +12,7 @@ namespace instance {
using namespace ck::conv_tensor_rearrange_op; using namespace ck::conv_tensor_rearrange_op;
#ifdef CK_ENABLE_BF16 #ifdef CK_ENABLE_BF16
void add_device_image_to_column_nhwc_3d_bf16_instances( void add_device_image_to_column_ndhwc_3d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ImageToColumn>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ImageToColumn>>>&
instances) instances)
{ {
...@@ -21,7 +21,7 @@ void add_device_image_to_column_nhwc_3d_bf16_instances( ...@@ -21,7 +21,7 @@ void add_device_image_to_column_nhwc_3d_bf16_instances(
#endif #endif
#ifdef CK_ENABLE_FP16 #ifdef CK_ENABLE_FP16
void add_device_image_to_column_nhwc_3d_f16_instances( void add_device_image_to_column_ndhwc_3d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ImageToColumn>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ImageToColumn>>>&
instances) instances)
{ {
...@@ -30,7 +30,7 @@ void add_device_image_to_column_nhwc_3d_f16_instances( ...@@ -30,7 +30,7 @@ void add_device_image_to_column_nhwc_3d_f16_instances(
#endif #endif
#ifdef CK_ENABLE_FP32 #ifdef CK_ENABLE_FP32
void add_device_image_to_column_nhwc_3d_f32_instances( void add_device_image_to_column_ndhwc_3d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ImageToColumn>>>& std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ImageToColumn>>>&
instances) instances)
{ {
...@@ -39,7 +39,7 @@ void add_device_image_to_column_nhwc_3d_f32_instances( ...@@ -39,7 +39,7 @@ void add_device_image_to_column_nhwc_3d_f32_instances(
#endif #endif
#ifdef CK_ENABLE_INT8 #ifdef CK_ENABLE_INT8
void add_device_image_to_column_nhwc_3d_i8_instances( void add_device_image_to_column_ndhwc_3d_i8_instances(
std::vector< std::vector<
std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ImageToColumn>>>& std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ImageToColumn>>>&
instances) instances)
......
...@@ -35,17 +35,17 @@ class TestConvTensorRearrangeInterface : public ::testing::Test ...@@ -35,17 +35,17 @@ class TestConvTensorRearrangeInterface : public ::testing::Test
// clang-format off // clang-format off
using DeviceImgToColInstance = ck::tensor_operation::device::DeviceImageToColumnImpl using DeviceImgToColInstance = ck::tensor_operation::device::DeviceImageToColumnImpl
// Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor| // Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
// Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp| // Dim| | | | Size| Block| Block| Cluster| Per|
// Spatial| | | | | | | Lengths| Vector| | // Spatial| | | | | | | Lengths| Vector|
// | | | | | | | | | | // | | | | | | | | |
< NDimSpatial, ImLayout, DataType, DataType, 256, 128, 128, S<16, 16>,ScalarPerVector, ImageToColumn>; < NDimSpatial, ImLayout, DataType, DataType, 256, 128, 128, S<16, 16>,ScalarPerVector>;
using DeviceColToimgInstance = ck::tensor_operation::device::DeviceColumnToImageImpl using DeviceColToimgInstance = ck::tensor_operation::device::DeviceColumnToImageImpl
// Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| ConvTensor| // Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
// Dim| | | | Size| Block| Block| Cluster| Per| RearrangeOp| // Dim| | | | Size| Block| Block| Cluster| Per|
// Spatial| | | | | | | Lengths| Vector| | // Spatial| | | | | | | Lengths| Vector|
// | | | | | | | | | | // | | | | | | | | |
< NDimSpatial, ImLayout, DataType, DataType, 256, 128, 128, S<16, 16>,ScalarPerVector, ColumnToImage>; < NDimSpatial, ImLayout, DataType, DataType, 256, 128, 128, S<16, 16>,ScalarPerVector>;
// clang-format on // clang-format on
ck::utils::conv::ConvParam conv_param; ck::utils::conv::ConvParam conv_param;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment