Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
98f349c6
Commit
98f349c6
authored
Sep 26, 2023
by
Bartlomiej Kocot
Browse files
Minor stylistic fixes
parent
728b8695
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
225 additions
and
231 deletions
+225
-231
example/52_im2col_col2im/column_to_image_f32.cpp
example/52_im2col_col2im/column_to_image_f32.cpp
+5
-5
example/52_im2col_col2im/image_to_column_f32.cpp
example/52_im2col_col2im/image_to_column_f32.cpp
+5
-5
include/ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp
...sor_operation/gpu/device/device_conv_tensor_rearrange.hpp
+3
-1
include/ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp
...operation/gpu/device/impl/device_column_to_image_impl.hpp
+7
-11
include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp
...operation/gpu/device/impl/device_image_to_column_impl.hpp
+7
-11
library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp
...y/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp
+32
-32
library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp
...conv_tensor_rearrange/device_column_to_image_instance.hpp
+70
-70
library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp
...conv_tensor_rearrange/device_image_to_column_instance.hpp
+70
-70
library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_1d_instance.cpp
...lumn_to_image/device_column_to_image_nhwc_1d_instance.cpp
+4
-4
library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_3d_instance.cpp
...lumn_to_image/device_column_to_image_nhwc_3d_instance.cpp
+4
-4
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_1d_instance.cpp
...age_to_column/device_image_to_column_nhwc_1d_instance.cpp
+4
-4
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_3d_instance.cpp
...age_to_column/device_image_to_column_nhwc_3d_instance.cpp
+4
-4
test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp
...tensor_rearrange/test_conv_tensor_rearrange_interface.cpp
+10
-10
No files found.
example/52_im2col_col2im/column_to_image_f32.cpp
View file @
98f349c6
...
@@ -11,11 +11,11 @@ using ColumnToImageOp = ck::conv_tensor_rearrange_op::ColumnToImage;
...
@@ -11,11 +11,11 @@ using ColumnToImageOp = ck::conv_tensor_rearrange_op::ColumnToImage;
// clang-format off
// clang-format off
using
DeviceColToImgInstance
=
ck
::
tensor_operation
::
device
::
DeviceColumnToImageImpl
using
DeviceColToImgInstance
=
ck
::
tensor_operation
::
device
::
DeviceColumnToImageImpl
//#####################| Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
ConvTensor|
//#####################| Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
RearrangeOp|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
|
//#####################| | | | | | | | | |
<
NDimSpatial
,
ImLayout
,
InDataType
,
OutDataType
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
,
ColumnToImageOp
>
;
<
NDimSpatial
,
ImLayout
,
InDataType
,
OutDataType
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
>
;
// clang-format on
// clang-format on
bool
RunColumnToImage
(
const
ExecutionConfig
&
config
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_params
)
bool
RunColumnToImage
(
const
ExecutionConfig
&
config
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_params
)
...
...
example/52_im2col_col2im/image_to_column_f32.cpp
View file @
98f349c6
...
@@ -11,11 +11,11 @@ using ImageToColumnOp = ck::conv_tensor_rearrange_op::ImageToColumn;
...
@@ -11,11 +11,11 @@ using ImageToColumnOp = ck::conv_tensor_rearrange_op::ImageToColumn;
// clang-format off
// clang-format off
using
DeviceImgToColInstance
=
ck
::
tensor_operation
::
device
::
DeviceImageToColumnImpl
using
DeviceImgToColInstance
=
ck
::
tensor_operation
::
device
::
DeviceImageToColumnImpl
//#####################| Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
ConvTensor|
//#####################| Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
RearrangeOp|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
|
//#####################| | | | | | | | | |
<
NDimSpatial
,
ImLayout
,
InDataType
,
OutDataType
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
,
ImageToColumnOp
>
;
<
NDimSpatial
,
ImLayout
,
InDataType
,
OutDataType
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
>
;
// clang-format on
// clang-format on
bool
RunImageToColumn
(
const
ExecutionConfig
&
config
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_params
)
bool
RunImageToColumn
(
const
ExecutionConfig
&
config
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_params
)
...
...
include/ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp
View file @
98f349c6
...
@@ -16,7 +16,9 @@ namespace device {
...
@@ -16,7 +16,9 @@ namespace device {
*
*
* This Device operator supports conversion image ([G, N, Di, Hi, Wi, C]) to
* This Device operator supports conversion image ([G, N, Di, Hi, Wi, C]) to
* the gemm problem([N * Do * Ho * Wo, Z * Y * X * C]) (Image to Column) and
* the gemm problem([N * Do * Ho * Wo, Z * Y * X * C]) (Image to Column) and
* conversion gemm form to the image (Column to Image). G must be equal to 1.
* conversion gemm form to the image (Column to Image).
*
* Note that G must be equal to 1.
*
*
* \tparam NDimSpatial Number of spatial dimensions.
* \tparam NDimSpatial Number of spatial dimensions.
* \tparam ImageLayout Input Layout.
* \tparam ImageLayout Input Layout.
...
...
include/ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp
View file @
98f349c6
...
@@ -35,12 +35,13 @@ template <index_t NDimSpatial,
...
@@ -35,12 +35,13 @@ template <index_t NDimSpatial,
index_t
KPerBlock
,
index_t
KPerBlock
,
typename
ThreadClusterLengths
,
typename
ThreadClusterLengths
,
index_t
ScalarPerVector
,
index_t
ScalarPerVector
,
typename
ConvTensorRearrangeOp
>
typename
std
::
enable_if
<
NDimSpatial
>
=
1
&&
NDimSpatial
<=
3
,
bool
>::
type
=
false
>
struct
DeviceColumnToImageImpl
:
public
DeviceConvTensorRearrange
<
NDimSpatial
,
struct
DeviceColumnToImageImpl
ImageLayout
,
:
public
DeviceConvTensorRearrange
<
NDimSpatial
,
InputDataType
,
ImageLayout
,
OutputDataType
,
InputDataType
,
ConvTensorRearrangeOp
>
OutputDataType
,
conv_tensor_rearrange_op
::
ColumnToImage
>
{
{
static
constexpr
auto
I0
=
Number
<
0
>
{};
static
constexpr
auto
I0
=
Number
<
0
>
{};
...
@@ -476,12 +477,7 @@ struct DeviceColumnToImageImpl : public DeviceConvTensorRearrange<NDimSpatial,
...
@@ -476,12 +477,7 @@ struct DeviceColumnToImageImpl : public DeviceConvTensorRearrange<NDimSpatial,
bool
IsSupportedArgument
(
const
Argument
&
arg
)
bool
IsSupportedArgument
(
const
Argument
&
arg
)
{
{
using
namespace
conv_tensor_rearrange_op
;
using
namespace
tensor_layout
::
convolution
;
using
namespace
tensor_layout
::
convolution
;
if
constexpr
(
!
std
::
is_same_v
<
ConvTensorRearrangeOp
,
ColumnToImage
>
)
{
return
false
;
}
if
constexpr
(
!
(
std
::
is_same_v
<
ImageLayout
,
GNWC
>
||
std
::
is_same_v
<
ImageLayout
,
GNHWC
>
||
if
constexpr
(
!
(
std
::
is_same_v
<
ImageLayout
,
GNWC
>
||
std
::
is_same_v
<
ImageLayout
,
GNHWC
>
||
std
::
is_same_v
<
ImageLayout
,
GNDHWC
>
))
std
::
is_same_v
<
ImageLayout
,
GNDHWC
>
))
{
{
...
...
include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp
View file @
98f349c6
...
@@ -33,12 +33,13 @@ template <index_t NDimSpatial,
...
@@ -33,12 +33,13 @@ template <index_t NDimSpatial,
index_t
KPerBlock
,
index_t
KPerBlock
,
typename
ThreadClusterLengths
,
typename
ThreadClusterLengths
,
index_t
ScalarPerVector
,
index_t
ScalarPerVector
,
typename
ConvTensorRearrangeOp
>
typename
std
::
enable_if
<
NDimSpatial
>
=
1
&&
NDimSpatial
<=
3
,
bool
>::
type
=
false
>
struct
DeviceImageToColumnImpl
:
public
DeviceConvTensorRearrange
<
NDimSpatial
,
struct
DeviceImageToColumnImpl
ImageLayout
,
:
public
DeviceConvTensorRearrange
<
NDimSpatial
,
InputDataType
,
ImageLayout
,
OutputDataType
,
InputDataType
,
ConvTensorRearrangeOp
>
OutputDataType
,
conv_tensor_rearrange_op
::
ImageToColumn
>
{
{
static
constexpr
auto
I0
=
Number
<
0
>
{};
static
constexpr
auto
I0
=
Number
<
0
>
{};
...
@@ -249,12 +250,7 @@ struct DeviceImageToColumnImpl : public DeviceConvTensorRearrange<NDimSpatial,
...
@@ -249,12 +250,7 @@ struct DeviceImageToColumnImpl : public DeviceConvTensorRearrange<NDimSpatial,
bool
IsSupportedArgument
(
const
Argument
&
arg
)
bool
IsSupportedArgument
(
const
Argument
&
arg
)
{
{
using
namespace
conv_tensor_rearrange_op
;
using
namespace
tensor_layout
::
convolution
;
using
namespace
tensor_layout
::
convolution
;
if
constexpr
(
!
std
::
is_same_v
<
ConvTensorRearrangeOp
,
ImageToColumn
>
)
{
return
false
;
}
if
constexpr
(
!
(
std
::
is_same_v
<
ImageLayout
,
GNWC
>
||
std
::
is_same_v
<
ImageLayout
,
GNHWC
>
||
if
constexpr
(
!
(
std
::
is_same_v
<
ImageLayout
,
GNWC
>
||
std
::
is_same_v
<
ImageLayout
,
GNHWC
>
||
std
::
is_same_v
<
ImageLayout
,
GNDHWC
>
))
std
::
is_same_v
<
ImageLayout
,
GNDHWC
>
))
{
{
...
...
library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp
View file @
98f349c6
...
@@ -21,19 +21,19 @@ using namespace ck::conv_tensor_rearrange_op;
...
@@ -21,19 +21,19 @@ using namespace ck::conv_tensor_rearrange_op;
// Image to Column
// Image to Column
// nhwc, 1d
// nhwc, 1d
void
add_device_image_to_column_n
h
wc_1d_bf16_instances
(
void
add_device_image_to_column_nwc_1d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
BF16
,
BF16
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
BF16
,
BF16
,
ImageToColumn
>>>&
instances
);
instances
);
void
add_device_image_to_column_n
h
wc_1d_f16_instances
(
void
add_device_image_to_column_nwc_1d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F16
,
F16
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F16
,
F16
,
ImageToColumn
>>>&
instances
);
instances
);
void
add_device_image_to_column_n
h
wc_1d_f32_instances
(
void
add_device_image_to_column_nwc_1d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F32
,
F32
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F32
,
F32
,
ImageToColumn
>>>&
instances
);
instances
);
void
add_device_image_to_column_n
h
wc_1d_i8_instances
(
void
add_device_image_to_column_nwc_1d_i8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
instances
);
instances
);
// nhwc, 2d
// nhwc, 2d
...
@@ -54,38 +54,38 @@ void add_device_image_to_column_nhwc_2d_i8_instances(
...
@@ -54,38 +54,38 @@ void add_device_image_to_column_nhwc_2d_i8_instances(
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
GNHWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
GNHWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
instances
);
instances
);
// nhwc, 3d
// nhwc, 3d
void
add_device_image_to_column_nhwc_3d_bf16_instances
(
void
add_device_image_to_column_n
d
hwc_3d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
BF16
,
BF16
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
BF16
,
BF16
,
ImageToColumn
>>>&
instances
);
instances
);
void
add_device_image_to_column_nhwc_3d_f16_instances
(
void
add_device_image_to_column_n
d
hwc_3d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F16
,
F16
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F16
,
F16
,
ImageToColumn
>>>&
instances
);
instances
);
void
add_device_image_to_column_nhwc_3d_f32_instances
(
void
add_device_image_to_column_n
d
hwc_3d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F32
,
F32
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F32
,
F32
,
ImageToColumn
>>>&
instances
);
instances
);
void
add_device_image_to_column_nhwc_3d_i8_instances
(
void
add_device_image_to_column_n
d
hwc_3d_i8_instances
(
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
instances
);
instances
);
// Column to Image
// Column to Image
// nhwc, 1d
// nhwc, 1d
void
add_device_column_to_image_n
h
wc_1d_bf16_instances
(
void
add_device_column_to_image_nwc_1d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
BF16
,
BF16
,
ColumnToImage
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
BF16
,
BF16
,
ColumnToImage
>>>&
instances
);
instances
);
void
add_device_column_to_image_n
h
wc_1d_f16_instances
(
void
add_device_column_to_image_nwc_1d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F16
,
F16
,
ColumnToImage
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F16
,
F16
,
ColumnToImage
>>>&
instances
);
instances
);
void
add_device_column_to_image_n
h
wc_1d_f32_instances
(
void
add_device_column_to_image_nwc_1d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F32
,
F32
,
ColumnToImage
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F32
,
F32
,
ColumnToImage
>>>&
instances
);
instances
);
void
add_device_column_to_image_n
h
wc_1d_i8_instances
(
void
add_device_column_to_image_nwc_1d_i8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
int8_t
,
int8_t
,
ColumnToImage
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
int8_t
,
int8_t
,
ColumnToImage
>>>&
instances
);
instances
);
// nhwc, 2d
// nhwc, 2d
...
@@ -106,19 +106,19 @@ void add_device_column_to_image_nhwc_2d_i8_instances(
...
@@ -106,19 +106,19 @@ void add_device_column_to_image_nhwc_2d_i8_instances(
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
GNHWC
,
int8_t
,
int8_t
,
ColumnToImage
>>>&
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
2
,
GNHWC
,
int8_t
,
int8_t
,
ColumnToImage
>>>&
instances
);
instances
);
// nhwc, 3d
// nhwc, 3d
void
add_device_column_to_image_nhwc_3d_bf16_instances
(
void
add_device_column_to_image_n
d
hwc_3d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
BF16
,
BF16
,
ColumnToImage
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
BF16
,
BF16
,
ColumnToImage
>>>&
instances
);
instances
);
void
add_device_column_to_image_nhwc_3d_f16_instances
(
void
add_device_column_to_image_n
d
hwc_3d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F16
,
F16
,
ColumnToImage
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F16
,
F16
,
ColumnToImage
>>>&
instances
);
instances
);
void
add_device_column_to_image_nhwc_3d_f32_instances
(
void
add_device_column_to_image_n
d
hwc_3d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F32
,
F32
,
ColumnToImage
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F32
,
F32
,
ColumnToImage
>>>&
instances
);
instances
);
void
add_device_column_to_image_nhwc_3d_i8_instances
(
void
add_device_column_to_image_n
d
hwc_3d_i8_instances
(
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
int8_t
,
int8_t
,
ColumnToImage
>>>&
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
int8_t
,
int8_t
,
ColumnToImage
>>>&
instances
);
instances
);
...
@@ -151,20 +151,20 @@ struct DeviceOperationInstanceFactory<
...
@@ -151,20 +151,20 @@ struct DeviceOperationInstanceFactory<
{
{
if
constexpr
(
is_same_v
<
InDataType
,
float
>
&&
is_same_v
<
OutDataType
,
float
>
)
if
constexpr
(
is_same_v
<
InDataType
,
float
>
&&
is_same_v
<
OutDataType
,
float
>
)
{
{
add_device_image_to_column_n
h
wc_1d_f32_instances
(
op_ptrs
);
add_device_image_to_column_nwc_1d_f32_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
InDataType
,
half_t
>
&&
is_same_v
<
OutDataType
,
half_t
>
)
else
if
constexpr
(
is_same_v
<
InDataType
,
half_t
>
&&
is_same_v
<
OutDataType
,
half_t
>
)
{
{
add_device_image_to_column_n
h
wc_1d_f16_instances
(
op_ptrs
);
add_device_image_to_column_nwc_1d_f16_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
InDataType
,
ck
::
bhalf_t
>
&&
else
if
constexpr
(
is_same_v
<
InDataType
,
ck
::
bhalf_t
>
&&
is_same_v
<
OutDataType
,
ck
::
bhalf_t
>
)
is_same_v
<
OutDataType
,
ck
::
bhalf_t
>
)
{
{
add_device_image_to_column_n
h
wc_1d_bf16_instances
(
op_ptrs
);
add_device_image_to_column_nwc_1d_bf16_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
InDataType
,
int8_t
>
&&
is_same_v
<
OutDataType
,
int8_t
>
)
else
if
constexpr
(
is_same_v
<
InDataType
,
int8_t
>
&&
is_same_v
<
OutDataType
,
int8_t
>
)
{
{
add_device_image_to_column_n
h
wc_1d_i8_instances
(
op_ptrs
);
add_device_image_to_column_nwc_1d_i8_instances
(
op_ptrs
);
}
}
}
}
else
if
constexpr
(
NumDimSpatial
==
2
&&
is_same_v
<
ImageLayout
,
GNHWC
>
)
else
if
constexpr
(
NumDimSpatial
==
2
&&
is_same_v
<
ImageLayout
,
GNHWC
>
)
...
@@ -191,20 +191,20 @@ struct DeviceOperationInstanceFactory<
...
@@ -191,20 +191,20 @@ struct DeviceOperationInstanceFactory<
{
{
if
constexpr
(
is_same_v
<
InDataType
,
float
>
&&
is_same_v
<
OutDataType
,
float
>
)
if
constexpr
(
is_same_v
<
InDataType
,
float
>
&&
is_same_v
<
OutDataType
,
float
>
)
{
{
add_device_image_to_column_nhwc_3d_f32_instances
(
op_ptrs
);
add_device_image_to_column_n
d
hwc_3d_f32_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
InDataType
,
half_t
>
&&
is_same_v
<
OutDataType
,
half_t
>
)
else
if
constexpr
(
is_same_v
<
InDataType
,
half_t
>
&&
is_same_v
<
OutDataType
,
half_t
>
)
{
{
add_device_image_to_column_nhwc_3d_f16_instances
(
op_ptrs
);
add_device_image_to_column_n
d
hwc_3d_f16_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
InDataType
,
ck
::
bhalf_t
>
&&
else
if
constexpr
(
is_same_v
<
InDataType
,
ck
::
bhalf_t
>
&&
is_same_v
<
OutDataType
,
ck
::
bhalf_t
>
)
is_same_v
<
OutDataType
,
ck
::
bhalf_t
>
)
{
{
add_device_image_to_column_nhwc_3d_bf16_instances
(
op_ptrs
);
add_device_image_to_column_n
d
hwc_3d_bf16_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
InDataType
,
int8_t
>
&&
is_same_v
<
OutDataType
,
int8_t
>
)
else
if
constexpr
(
is_same_v
<
InDataType
,
int8_t
>
&&
is_same_v
<
OutDataType
,
int8_t
>
)
{
{
add_device_image_to_column_nhwc_3d_i8_instances
(
op_ptrs
);
add_device_image_to_column_n
d
hwc_3d_i8_instances
(
op_ptrs
);
}
}
}
}
}
}
...
@@ -214,20 +214,20 @@ struct DeviceOperationInstanceFactory<
...
@@ -214,20 +214,20 @@ struct DeviceOperationInstanceFactory<
{
{
if
constexpr
(
is_same_v
<
InDataType
,
float
>
&&
is_same_v
<
OutDataType
,
float
>
)
if
constexpr
(
is_same_v
<
InDataType
,
float
>
&&
is_same_v
<
OutDataType
,
float
>
)
{
{
add_device_column_to_image_n
h
wc_1d_f32_instances
(
op_ptrs
);
add_device_column_to_image_nwc_1d_f32_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
InDataType
,
half_t
>
&&
is_same_v
<
OutDataType
,
half_t
>
)
else
if
constexpr
(
is_same_v
<
InDataType
,
half_t
>
&&
is_same_v
<
OutDataType
,
half_t
>
)
{
{
add_device_column_to_image_n
h
wc_1d_f16_instances
(
op_ptrs
);
add_device_column_to_image_nwc_1d_f16_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
InDataType
,
ck
::
bhalf_t
>
&&
else
if
constexpr
(
is_same_v
<
InDataType
,
ck
::
bhalf_t
>
&&
is_same_v
<
OutDataType
,
ck
::
bhalf_t
>
)
is_same_v
<
OutDataType
,
ck
::
bhalf_t
>
)
{
{
add_device_column_to_image_n
h
wc_1d_bf16_instances
(
op_ptrs
);
add_device_column_to_image_nwc_1d_bf16_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
InDataType
,
int8_t
>
&&
is_same_v
<
OutDataType
,
int8_t
>
)
else
if
constexpr
(
is_same_v
<
InDataType
,
int8_t
>
&&
is_same_v
<
OutDataType
,
int8_t
>
)
{
{
add_device_column_to_image_n
h
wc_1d_i8_instances
(
op_ptrs
);
add_device_column_to_image_nwc_1d_i8_instances
(
op_ptrs
);
}
}
}
}
else
if
constexpr
(
NumDimSpatial
==
2
&&
is_same_v
<
ImageLayout
,
GNHWC
>
)
else
if
constexpr
(
NumDimSpatial
==
2
&&
is_same_v
<
ImageLayout
,
GNHWC
>
)
...
@@ -254,20 +254,20 @@ struct DeviceOperationInstanceFactory<
...
@@ -254,20 +254,20 @@ struct DeviceOperationInstanceFactory<
{
{
if
constexpr
(
is_same_v
<
InDataType
,
float
>
&&
is_same_v
<
OutDataType
,
float
>
)
if
constexpr
(
is_same_v
<
InDataType
,
float
>
&&
is_same_v
<
OutDataType
,
float
>
)
{
{
add_device_column_to_image_nhwc_3d_f32_instances
(
op_ptrs
);
add_device_column_to_image_n
d
hwc_3d_f32_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
InDataType
,
half_t
>
&&
is_same_v
<
OutDataType
,
half_t
>
)
else
if
constexpr
(
is_same_v
<
InDataType
,
half_t
>
&&
is_same_v
<
OutDataType
,
half_t
>
)
{
{
add_device_column_to_image_nhwc_3d_f16_instances
(
op_ptrs
);
add_device_column_to_image_n
d
hwc_3d_f16_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
InDataType
,
ck
::
bhalf_t
>
&&
else
if
constexpr
(
is_same_v
<
InDataType
,
ck
::
bhalf_t
>
&&
is_same_v
<
OutDataType
,
ck
::
bhalf_t
>
)
is_same_v
<
OutDataType
,
ck
::
bhalf_t
>
)
{
{
add_device_column_to_image_nhwc_3d_bf16_instances
(
op_ptrs
);
add_device_column_to_image_n
d
hwc_3d_bf16_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
InDataType
,
int8_t
>
&&
is_same_v
<
OutDataType
,
int8_t
>
)
else
if
constexpr
(
is_same_v
<
InDataType
,
int8_t
>
&&
is_same_v
<
OutDataType
,
int8_t
>
)
{
{
add_device_column_to_image_nhwc_3d_i8_instances
(
op_ptrs
);
add_device_column_to_image_n
d
hwc_3d_i8_instances
(
op_ptrs
);
}
}
}
}
}
}
...
...
library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp
View file @
98f349c6
...
@@ -25,94 +25,94 @@ using S = ck::Sequence<Is...>;
...
@@ -25,94 +25,94 @@ using S = ck::Sequence<Is...>;
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
using
device_column_to_image_bf16_instances
=
std
::
tuple
<
using
device_column_to_image_bf16_instances
=
std
::
tuple
<
// clang-format off
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
ConvTensor|
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
RearrangeOp|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
|
//#####################| | | | | | | | | |
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
64
,
64
,
S
<
8
,
8
>
,
8
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
64
,
64
,
S
<
8
,
8
>
,
8
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
64
,
128
,
S
<
8
,
16
>
,
8
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
64
,
128
,
S
<
8
,
16
>
,
8
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
8
,
ColumnToImage
>
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
8
>
// clang-format on
// clang-format on
>
;
>
;
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
using
device_column_to_image_f16_instances
=
std
::
tuple
<
using
device_column_to_image_f16_instances
=
std
::
tuple
<
// clang-format off
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
ConvTensor|
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
RearrangeOp|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
|
//#####################| | | | | | | | | |
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
64
,
64
,
S
<
8
,
8
>
,
8
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
64
,
64
,
S
<
8
,
8
>
,
8
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
64
,
128
,
S
<
8
,
16
>
,
8
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
64
,
128
,
S
<
8
,
16
>
,
8
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
8
,
ColumnToImage
>
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
8
>
// clang-format on
// clang-format on
>
;
>
;
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
using
device_column_to_image_f32_instances
=
std
::
tuple
<
using
device_column_to_image_f32_instances
=
std
::
tuple
<
// clang-format off
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
ConvTensor|
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
RearrangeOp|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
|
//#####################| | | | | | | | | |
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
,
ColumnToImage
>
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
>
// clang-format on
// clang-format on
>
;
>
;
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
using
device_column_to_image_i8_instances
=
std
::
tuple
<
using
device_column_to_image_i8_instances
=
std
::
tuple
<
// clang-format off
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
ConvTensor|
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
RearrangeOp|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
|
//#####################| | | | | | | | | |
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
64
,
64
,
S
<
8
,
8
>
,
8
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
64
,
64
,
S
<
8
,
8
>
,
8
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
64
,
128
,
S
<
8
,
16
>
,
8
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
64
,
128
,
S
<
8
,
16
>
,
8
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
128
,
128
,
S
<
16
,
16
>
,
8
,
ColumnToImage
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
128
,
128
,
S
<
16
,
16
>
,
8
>
,
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
256
,
256
,
S
<
16
,
16
>
,
16
,
ColumnToImage
>
DeviceColumnToImageImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
256
,
256
,
S
<
16
,
16
>
,
16
>
// clang-format on
// clang-format on
>
;
>
;
...
...
library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp
View file @
98f349c6
...
@@ -25,94 +25,94 @@ using S = ck::Sequence<Is...>;
...
@@ -25,94 +25,94 @@ using S = ck::Sequence<Is...>;
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
using
device_image_to_column_bf16_instances
=
std
::
tuple
<
using
device_image_to_column_bf16_instances
=
std
::
tuple
<
// clang-format off
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
ConvTensor|
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
RearrangeOp|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
|
//#####################| | | | | | | | | |
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
64
,
64
,
S
<
8
,
8
>
,
8
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
64
,
64
,
64
,
S
<
8
,
8
>
,
8
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
64
,
128
,
S
<
8
,
16
>
,
8
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
128
,
64
,
128
,
S
<
8
,
16
>
,
8
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
8
,
ImageToColumn
>
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
BF16
,
BF16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
8
>
// clang-format on
// clang-format on
>
;
>
;
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
using
device_image_to_column_f16_instances
=
std
::
tuple
<
using
device_image_to_column_f16_instances
=
std
::
tuple
<
// clang-format off
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
ConvTensor|
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
RearrangeOp|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
|
//#####################| | | | | | | | | |
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
64
,
64
,
S
<
8
,
8
>
,
8
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
64
,
64
,
64
,
S
<
8
,
8
>
,
8
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
64
,
128
,
S
<
8
,
16
>
,
8
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
128
,
64
,
128
,
S
<
8
,
16
>
,
8
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
8
,
ImageToColumn
>
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F16
,
F16
,
256
,
128
,
128
,
S
<
16
,
16
>
,
8
>
// clang-format on
// clang-format on
>
;
>
;
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
using
device_image_to_column_f32_instances
=
std
::
tuple
<
using
device_image_to_column_f32_instances
=
std
::
tuple
<
// clang-format off
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
ConvTensor|
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
RearrangeOp|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
|
//#####################| | | | | | | | | |
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
,
ImageToColumn
>
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
F32
,
F32
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
>
// clang-format on
// clang-format on
>
;
>
;
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
template
<
ck
::
index_t
NDimSpatial
,
typename
InLayout
>
using
device_image_to_column_i8_instances
=
std
::
tuple
<
using
device_image_to_column_i8_instances
=
std
::
tuple
<
// clang-format off
// clang-format off
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
ConvTensor|
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
RearrangeOp|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
|
//#####################| | | | | | | | | |
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
8
,
8
,
S
<
8
,
8
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
16
,
16
,
S
<
8
,
8
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
32
,
32
,
S
<
8
,
8
>
,
4
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
64
,
64
,
S
<
8
,
8
>
,
8
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
64
,
64
,
64
,
S
<
8
,
8
>
,
8
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
16
,
16
,
S
<
8
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
64
,
64
,
S
<
8
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
32
,
64
,
S
<
8
,
16
>
,
4
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
64
,
128
,
S
<
8
,
16
>
,
8
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
128
,
64
,
128
,
S
<
8
,
16
>
,
8
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
16
,
16
,
S
<
16
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
64
,
64
,
S
<
16
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
128
,
128
,
S
<
16
,
16
>
,
1
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
64
,
64
,
S
<
16
,
16
>
,
4
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
128
,
128
,
S
<
16
,
16
>
,
4
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
128
,
128
,
S
<
16
,
16
>
,
8
,
ImageToColumn
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
128
,
128
,
S
<
16
,
16
>
,
8
>
,
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
256
,
256
,
S
<
16
,
16
>
,
16
,
ImageToColumn
>
DeviceImageToColumnImpl
<
NDimSpatial
,
InLayout
,
int8_t
,
int8_t
,
256
,
256
,
256
,
S
<
16
,
16
>
,
16
>
// clang-format on
// clang-format on
>
;
>
;
...
...
library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_1d_instance.cpp
View file @
98f349c6
...
@@ -12,7 +12,7 @@ namespace instance {
...
@@ -12,7 +12,7 @@ namespace instance {
using
namespace
ck
::
conv_tensor_rearrange_op
;
using
namespace
ck
::
conv_tensor_rearrange_op
;
#ifdef CK_ENABLE_BF16
#ifdef CK_ENABLE_BF16
void
add_device_column_to_image_n
h
wc_1d_bf16_instances
(
void
add_device_column_to_image_nwc_1d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
BF16
,
BF16
,
ColumnToImage
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
BF16
,
BF16
,
ColumnToImage
>>>&
instances
)
instances
)
{
{
...
@@ -21,7 +21,7 @@ void add_device_column_to_image_nhwc_1d_bf16_instances(
...
@@ -21,7 +21,7 @@ void add_device_column_to_image_nhwc_1d_bf16_instances(
#endif
#endif
#ifdef CK_ENABLE_FP16
#ifdef CK_ENABLE_FP16
void
add_device_column_to_image_n
h
wc_1d_f16_instances
(
void
add_device_column_to_image_nwc_1d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F16
,
F16
,
ColumnToImage
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F16
,
F16
,
ColumnToImage
>>>&
instances
)
instances
)
{
{
...
@@ -30,7 +30,7 @@ void add_device_column_to_image_nhwc_1d_f16_instances(
...
@@ -30,7 +30,7 @@ void add_device_column_to_image_nhwc_1d_f16_instances(
#endif
#endif
#ifdef CK_ENABLE_FP32
#ifdef CK_ENABLE_FP32
void
add_device_column_to_image_n
h
wc_1d_f32_instances
(
void
add_device_column_to_image_nwc_1d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F32
,
F32
,
ColumnToImage
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F32
,
F32
,
ColumnToImage
>>>&
instances
)
instances
)
{
{
...
@@ -39,7 +39,7 @@ void add_device_column_to_image_nhwc_1d_f32_instances(
...
@@ -39,7 +39,7 @@ void add_device_column_to_image_nhwc_1d_f32_instances(
#endif
#endif
#ifdef CK_ENABLE_INT8
#ifdef CK_ENABLE_INT8
void
add_device_column_to_image_n
h
wc_1d_i8_instances
(
void
add_device_column_to_image_nwc_1d_i8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
int8_t
,
int8_t
,
ColumnToImage
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
int8_t
,
int8_t
,
ColumnToImage
>>>&
instances
)
instances
)
{
{
...
...
library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_3d_instance.cpp
View file @
98f349c6
...
@@ -12,7 +12,7 @@ namespace instance {
...
@@ -12,7 +12,7 @@ namespace instance {
using
namespace
ck
::
conv_tensor_rearrange_op
;
using
namespace
ck
::
conv_tensor_rearrange_op
;
#ifdef CK_ENABLE_BF16
#ifdef CK_ENABLE_BF16
void
add_device_column_to_image_nhwc_3d_bf16_instances
(
void
add_device_column_to_image_n
d
hwc_3d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
BF16
,
BF16
,
ColumnToImage
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
BF16
,
BF16
,
ColumnToImage
>>>&
instances
)
instances
)
{
{
...
@@ -21,7 +21,7 @@ void add_device_column_to_image_nhwc_3d_bf16_instances(
...
@@ -21,7 +21,7 @@ void add_device_column_to_image_nhwc_3d_bf16_instances(
#endif
#endif
#ifdef CK_ENABLE_FP16
#ifdef CK_ENABLE_FP16
void
add_device_column_to_image_nhwc_3d_f16_instances
(
void
add_device_column_to_image_n
d
hwc_3d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F16
,
F16
,
ColumnToImage
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F16
,
F16
,
ColumnToImage
>>>&
instances
)
instances
)
{
{
...
@@ -30,7 +30,7 @@ void add_device_column_to_image_nhwc_3d_f16_instances(
...
@@ -30,7 +30,7 @@ void add_device_column_to_image_nhwc_3d_f16_instances(
#endif
#endif
#ifdef CK_ENABLE_FP32
#ifdef CK_ENABLE_FP32
void
add_device_column_to_image_nhwc_3d_f32_instances
(
void
add_device_column_to_image_n
d
hwc_3d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F32
,
F32
,
ColumnToImage
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F32
,
F32
,
ColumnToImage
>>>&
instances
)
instances
)
{
{
...
@@ -39,7 +39,7 @@ void add_device_column_to_image_nhwc_3d_f32_instances(
...
@@ -39,7 +39,7 @@ void add_device_column_to_image_nhwc_3d_f32_instances(
#endif
#endif
#ifdef CK_ENABLE_INT8
#ifdef CK_ENABLE_INT8
void
add_device_column_to_image_nhwc_3d_i8_instances
(
void
add_device_column_to_image_n
d
hwc_3d_i8_instances
(
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
int8_t
,
int8_t
,
ColumnToImage
>>>&
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
int8_t
,
int8_t
,
ColumnToImage
>>>&
instances
)
instances
)
...
...
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_1d_instance.cpp
View file @
98f349c6
...
@@ -12,7 +12,7 @@ namespace instance {
...
@@ -12,7 +12,7 @@ namespace instance {
using
namespace
ck
::
conv_tensor_rearrange_op
;
using
namespace
ck
::
conv_tensor_rearrange_op
;
#ifdef CK_ENABLE_BF16
#ifdef CK_ENABLE_BF16
void
add_device_image_to_column_n
h
wc_1d_bf16_instances
(
void
add_device_image_to_column_nwc_1d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
BF16
,
BF16
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
BF16
,
BF16
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
@@ -21,7 +21,7 @@ void add_device_image_to_column_nhwc_1d_bf16_instances(
...
@@ -21,7 +21,7 @@ void add_device_image_to_column_nhwc_1d_bf16_instances(
#endif
#endif
#ifdef CK_ENABLE_FP16
#ifdef CK_ENABLE_FP16
void
add_device_image_to_column_n
h
wc_1d_f16_instances
(
void
add_device_image_to_column_nwc_1d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F16
,
F16
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F16
,
F16
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
@@ -30,7 +30,7 @@ void add_device_image_to_column_nhwc_1d_f16_instances(
...
@@ -30,7 +30,7 @@ void add_device_image_to_column_nhwc_1d_f16_instances(
#endif
#endif
#ifdef CK_ENABLE_FP32
#ifdef CK_ENABLE_FP32
void
add_device_image_to_column_n
h
wc_1d_f32_instances
(
void
add_device_image_to_column_nwc_1d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F32
,
F32
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
F32
,
F32
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
@@ -39,7 +39,7 @@ void add_device_image_to_column_nhwc_1d_f32_instances(
...
@@ -39,7 +39,7 @@ void add_device_image_to_column_nhwc_1d_f32_instances(
#endif
#endif
#ifdef CK_ENABLE_INT8
#ifdef CK_ENABLE_INT8
void
add_device_image_to_column_n
h
wc_1d_i8_instances
(
void
add_device_image_to_column_nwc_1d_i8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
1
,
GNWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
...
library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_3d_instance.cpp
View file @
98f349c6
...
@@ -12,7 +12,7 @@ namespace instance {
...
@@ -12,7 +12,7 @@ namespace instance {
using
namespace
ck
::
conv_tensor_rearrange_op
;
using
namespace
ck
::
conv_tensor_rearrange_op
;
#ifdef CK_ENABLE_BF16
#ifdef CK_ENABLE_BF16
void
add_device_image_to_column_nhwc_3d_bf16_instances
(
void
add_device_image_to_column_n
d
hwc_3d_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
BF16
,
BF16
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
BF16
,
BF16
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
@@ -21,7 +21,7 @@ void add_device_image_to_column_nhwc_3d_bf16_instances(
...
@@ -21,7 +21,7 @@ void add_device_image_to_column_nhwc_3d_bf16_instances(
#endif
#endif
#ifdef CK_ENABLE_FP16
#ifdef CK_ENABLE_FP16
void
add_device_image_to_column_nhwc_3d_f16_instances
(
void
add_device_image_to_column_n
d
hwc_3d_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F16
,
F16
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F16
,
F16
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
@@ -30,7 +30,7 @@ void add_device_image_to_column_nhwc_3d_f16_instances(
...
@@ -30,7 +30,7 @@ void add_device_image_to_column_nhwc_3d_f16_instances(
#endif
#endif
#ifdef CK_ENABLE_FP32
#ifdef CK_ENABLE_FP32
void
add_device_image_to_column_nhwc_3d_f32_instances
(
void
add_device_image_to_column_n
d
hwc_3d_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F32
,
F32
,
ImageToColumn
>>>&
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
F32
,
F32
,
ImageToColumn
>>>&
instances
)
instances
)
{
{
...
@@ -39,7 +39,7 @@ void add_device_image_to_column_nhwc_3d_f32_instances(
...
@@ -39,7 +39,7 @@ void add_device_image_to_column_nhwc_3d_f32_instances(
#endif
#endif
#ifdef CK_ENABLE_INT8
#ifdef CK_ENABLE_INT8
void
add_device_image_to_column_nhwc_3d_i8_instances
(
void
add_device_image_to_column_n
d
hwc_3d_i8_instances
(
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
std
::
unique_ptr
<
DeviceConvTensorRearrange
<
3
,
GNDHWC
,
int8_t
,
int8_t
,
ImageToColumn
>>>&
instances
)
instances
)
...
...
test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp
View file @
98f349c6
...
@@ -35,17 +35,17 @@ class TestConvTensorRearrangeInterface : public ::testing::Test
...
@@ -35,17 +35,17 @@ class TestConvTensorRearrangeInterface : public ::testing::Test
// clang-format off
// clang-format off
using
DeviceImgToColInstance
=
ck
::
tensor_operation
::
device
::
DeviceImageToColumnImpl
using
DeviceImgToColInstance
=
ck
::
tensor_operation
::
device
::
DeviceImageToColumnImpl
// Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
ConvTensor|
// Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
// Dim| | | | Size| Block| Block| Cluster| Per|
RearrangeOp|
// Dim| | | | Size| Block| Block| Cluster| Per|
// Spatial| | | | | | | Lengths| Vector|
|
// Spatial| | | | | | | Lengths| Vector|
// | | | | | | | | |
|
// | | | | | | | | |
<
NDimSpatial
,
ImLayout
,
DataType
,
DataType
,
256
,
128
,
128
,
S
<
16
,
16
>
,
ScalarPerVector
,
ImageToColumn
>
;
<
NDimSpatial
,
ImLayout
,
DataType
,
DataType
,
256
,
128
,
128
,
S
<
16
,
16
>
,
ScalarPerVector
>
;
using
DeviceColToimgInstance
=
ck
::
tensor_operation
::
device
::
DeviceColumnToImageImpl
using
DeviceColToimgInstance
=
ck
::
tensor_operation
::
device
::
DeviceColumnToImageImpl
// Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
ConvTensor|
// Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
// Dim| | | | Size| Block| Block| Cluster| Per|
RearrangeOp|
// Dim| | | | Size| Block| Block| Cluster| Per|
// Spatial| | | | | | | Lengths| Vector|
|
// Spatial| | | | | | | Lengths| Vector|
// | | | | | | | | |
|
// | | | | | | | | |
<
NDimSpatial
,
ImLayout
,
DataType
,
DataType
,
256
,
128
,
128
,
S
<
16
,
16
>
,
ScalarPerVector
,
ColumnToImage
>
;
<
NDimSpatial
,
ImLayout
,
DataType
,
DataType
,
256
,
128
,
128
,
S
<
16
,
16
>
,
ScalarPerVector
>
;
// clang-format on
// clang-format on
ck
::
utils
::
conv
::
ConvParam
conv_param
;
ck
::
utils
::
conv
::
ConvParam
conv_param
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment