Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
8f62b6a5
Commit
8f62b6a5
authored
Aug 30, 2023
by
Bartlomiej Kocot
Browse files
Several fixes of image to column
parent
887967c8
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
61 additions
and
37 deletions
+61
-37
example/52_image_to_column/image_to_column_f32.cpp
example/52_image_to_column/image_to_column_f32.cpp
+3
-5
include/ck/tensor_operation/gpu/device/device_image_to_column.hpp
...ck/tensor_operation/gpu/device/device_image_to_column.hpp
+32
-5
include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp
...operation/gpu/device/impl/device_image_to_column_impl.hpp
+12
-10
include/ck/tensor_operation/gpu/grid/gridwise_image_to_column.hpp
...ck/tensor_operation/gpu/grid/gridwise_image_to_column.hpp
+0
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp
...erence_tensor_operation/cpu/reference_image_to_column.hpp
+14
-16
No files found.
example/52_image_to_column/image_to_column_f32.cpp
View file @
8f62b6a5
...
...
@@ -107,11 +107,9 @@ bool RunImageToColumn(const ExecutionConfig& config, const ck::utils::conv::Conv
}
float
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
config
.
time_kernel
});
std
::
size_t
num_btype
=
NDoHoWo
*
CZYX
*
sizeof
(
InDataType
);
std
::
size_t
num_btype
=
NDoHoWo
*
CZYX
*
sizeof
(
OutDataType
)
+
conv_params
.
GetInputByte
<
InputDataType
>
();
float
gb_per_sec
=
num_btype
/
1.E6
/
ave_time
;
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
gb_per_sec
<<
" GB/s"
<<
std
::
endl
;
if
(
config
.
do_verification
)
...
...
@@ -159,7 +157,7 @@ int RunImageToColumnExample(int argc, char* argv[])
if
(
conv_params
.
num_dim_spatial_
!=
NDimSpatial
)
{
std
::
cerr
<<
"unsupported # of spatial
s
dimensions"
<<
std
::
endl
;
std
::
cerr
<<
"unsupported # of spatial dimensions"
<<
std
::
endl
;
return
EXIT_FAILURE
;
}
...
...
include/ck/tensor_operation/gpu/device/device_image_to_column.hpp
View file @
8f62b6a5
...
...
@@ -11,18 +11,45 @@ namespace ck {
namespace
tensor_operation
{
namespace
device
{
// Image to column:
// input : input image [N, Di, Hi, Wi, C],
// output : output image [N * Do * Ho * Wo, Z * Y * X * C]
/**
* \brief Image to column.
*
* This Device operator converts image ([G, N, Di, Hi, Wi, C]) to the gemm
* problem([N * Do * Ho * Wo, Z * Y * X * C]). G must be equal to 1.
*
* \tparam NDimSpatial Number of spatial dimensions.
* \tparam InputLayout Input Layout.
* \tparam InputDataType Input Data Type.
* \tparam OutputDataType Output Data Type.
*/
template
<
index_t
NDimSpatial
,
typename
InputLayout
,
typename
InputDataType
,
typename
OutputDataType
>
struct
DeviceImageToColumn
:
public
BaseOperator
{
/**
* \brief Make argument pointer for image to column.
*
* \param p_in A pointer to the device memory of the input image.
* \param p_out A pointer to the device memory of the output.
* \param N Convolution batch size.
* \param C Convolution number of channels.
* \param input_spatial_lengths Input spatial lengths.
* \param filter_spatial_lengths Filter spatial lengths.
* \param output_spatial_lengths Output spatial lengths.
* \param input_g_n_c_wis_strides Input strides in order [G, N, C, D, H, W].
* \param output_m_k_strides Output strides.
* \param conv_filter_strides Convolution filter strides.
* \param conv_filter_dilations Convolution filter dilations.
* \param input_left_pads Convolution left pads.
* \param input_right_pads Convolution right pads.
* \return Pointer to the argument.
*/
virtual
std
::
unique_ptr
<
BaseArgument
>
MakeArgumentPointer
(
const
void
*
p_in
,
// input image
void
*
p_out
,
// output image
MakeArgumentPointer
(
const
void
*
p_in
,
void
*
p_out
,
const
ck
::
index_t
N
,
const
ck
::
index_t
C
,
const
std
::
array
<
index_t
,
NDimSpatial
>&
input_spatial_lengths
,
...
...
include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp
View file @
8f62b6a5
...
...
@@ -7,9 +7,7 @@
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/device/device_image_to_column.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_image_to_column.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp"
#include "ck/host_utility/stream_utility.hpp"
#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"
#include "ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp"
#include "ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp"
...
...
@@ -284,24 +282,28 @@ struct DeviceImageToColumnImpl
return
false
;
}
const
auto
x
_pad_left
=
arg
.
input_left_pads_
[
NDimSpatial
-
I1
];
const
auto
x
_pad_right
=
arg
.
input_right_pads_
[
NDimSpatial
-
I1
];
const
auto
w
_pad_left
=
arg
.
input_left_pads_
[
NDimSpatial
-
I1
];
const
auto
w
_pad_right
=
arg
.
input_right_pads_
[
NDimSpatial
-
I1
];
const
auto
dilation_x
=
arg
.
conv_filter_dilations_
[
NDimSpatial
-
I1
];
const
auto
stride_x
=
arg
.
conv_filter_strides_
[
NDimSpatial
-
I1
];
bool
is_c_packed
=
arg
.
input_g_n_c_wis_strides_
[
NDimSpatial
+
I2
]
==
arg
.
C_
;
bool
is_x_packed
=
arg
.
input_g_n_c_wis_strides_
[
NDimSpatial
+
I2
]
==
arg
.
C_
;
bool
is_c_packed
=
arg
.
input_g_n_c_wis_strides_
[
I2
]
==
1
;
// check vector acces with c not packed
if
(
!
is_c_packed
&&
ScalarPerVector
!=
1
)
return
false
;
// check vector access of filter window row (only C if C is not packed)
if
(
!
is_
c
_packed
&&
arg
.
C_
%
ScalarPerVector
!=
0
)
if
(
!
is_
x
_packed
&&
arg
.
C_
%
ScalarPerVector
!=
0
)
return
false
;
// check vector access of filter window row (X * C)
if
(
arg
.
X_
*
arg
.
C_
%
ScalarPerVector
!=
0
)
return
false
;
// check vector access of pads (
x
_pad_left/
x
_pad_right * C)
if
(
x
_pad_left
*
arg
.
C_
%
ScalarPerVector
!=
0
||
x
_pad_right
*
arg
.
C_
%
ScalarPerVector
!=
0
)
// check vector access of pads (
w
_pad_left/
w
_pad_right * C)
if
(
w
_pad_left
*
arg
.
C_
%
ScalarPerVector
!=
0
||
w
_pad_right
*
arg
.
C_
%
ScalarPerVector
!=
0
)
return
false
;
// check vector access of with stride and pad
if
((
x
_pad_left
!=
0
||
x
_pad_right
!=
0
)
&&
stride_x
>
1
&&
arg
.
C_
%
ScalarPerVector
!=
0
)
if
((
w
_pad_left
!=
0
||
w
_pad_right
!=
0
)
&&
stride_x
>
1
&&
arg
.
C_
%
ScalarPerVector
!=
0
)
return
false
;
// check vector access of with dilation
if
(
dilation_x
>
1
&&
arg
.
C_
%
ScalarPerVector
!=
0
)
...
...
include/ck/tensor_operation/gpu/grid/gridwise_image_to_column.hpp
View file @
8f62b6a5
...
...
@@ -10,7 +10,6 @@
#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp"
#include "ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp"
#include "ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp"
#include "ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp"
#include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp
View file @
8f62b6a5
...
...
@@ -15,19 +15,17 @@ namespace ck {
namespace
tensor_operation
{
namespace
host
{
//
// @brief Reference implementation for image to column.
//
// @paragraph
// Tensor descriptor in NCHW dimensional order
//
// @tparam InDataType Input tensor data type.
// @tparam OutDataType Output tensor data type.
// @tparam NDimSpatial Number of spatial dimensions.
//
// input descriptor in [N, C, Di, Hi, Wi] order
// output descriptor in [N * Do * Ho * Wo, C * Z * Y * X] order
// phyiscal layout is [N, Di, Hi, Wi, C]
/**
* \brief Reference implementation for image to column.
*
* Tensor descriptor has [G, N, C, Di, Hi, Wi] data layout.
* G must be equal to 1. Memory layout is [G, N, Di, Hi, Wi, C].
*
* \tparam NDimSpatial Number of spatial dimensions.
* \tparam InputLayout Input Layout.
* \tparam InDataType Input Data Type.
* \tparam OutDataType Output Data Type.
*/
template
<
ck
::
index_t
NDimSpatial
,
typename
InputLayout
,
typename
InDataType
,
...
...
@@ -242,12 +240,12 @@ struct ReferenceImageToColumn : public device::BaseOperator
{
using
namespace
tensor_layout
::
convolution
;
if
(
!
(
std
::
is_same_v
<
InputLayout
,
GNWC
>
||
std
::
is_same_v
<
InputLayout
,
GNHWC
>
||
std
::
is_same_v
<
InputLayout
,
GNDHWC
>
))
if
constexpr
(
!
(
std
::
is_same_v
<
InputLayout
,
GNWC
>
||
std
::
is_same_v
<
InputLayout
,
GNHWC
>
||
std
::
is_same_v
<
InputLayout
,
GNDHWC
>
))
{
return
false
;
}
if
(
!
(
NDimSpatial
>=
1
&&
NDimSpatial
<=
3
))
if
constexpr
(
!
(
NDimSpatial
>=
1
&&
NDimSpatial
<=
3
))
{
return
false
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment