Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
f6922d3f
"...composable_kernel.git" did not exist on "425175241fd0807430c8a5518e0a929369f140c5"
Commit
f6922d3f
authored
Jul 17, 2022
by
Chao Liu
Browse files
fix reference conv bwd data bug; update conv bwd data test
parent
8f722700
Changes
32
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
295 additions
and
959 deletions
+295
-959
example/09_convnd_fwd/convnd_fwd_common.hpp
example/09_convnd_fwd/convnd_fwd_common.hpp
+20
-11
example/17_convnd_bwd_data/convnd_bwd_data_common.hpp
example/17_convnd_bwd_data/convnd_bwd_data_common.hpp
+26
-11
example/20_convnd_bwd_weight/convnd_bwd_weight_common.hpp
example/20_convnd_bwd_weight/convnd_bwd_weight_common.hpp
+21
-12
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp
...eference_tensor_operation/cpu/reference_conv_bwd_data.hpp
+36
-40
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp
...erence_tensor_operation/cpu/reference_conv_bwd_weight.hpp
+18
-23
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
...ary/reference_tensor_operation/cpu/reference_conv_fwd.hpp
+18
-23
profiler/include/profile_conv_bwd_weight_impl.hpp
profiler/include/profile_conv_bwd_weight_impl.hpp
+1
-1
test/CMakeLists.txt
test/CMakeLists.txt
+1
-2
test/batched_gemm/CMakeLists.txt
test/batched_gemm/CMakeLists.txt
+1
-1
test/batched_gemm_reduce/CMakeLists.txt
test/batched_gemm_reduce/CMakeLists.txt
+1
-1
test/conv2d_bwd_data/CMakeLists.txt
test/conv2d_bwd_data/CMakeLists.txt
+0
-3
test/conv2d_bwd_data/conv2d_bwd_data.cpp
test/conv2d_bwd_data/conv2d_bwd_data.cpp
+0
-330
test/conv2d_bwd_weight/CMakeLists.txt
test/conv2d_bwd_weight/CMakeLists.txt
+0
-2
test/conv2d_bwd_weight/conv2d_bwd_weight.cpp
test/conv2d_bwd_weight/conv2d_bwd_weight.cpp
+0
-217
test/conv_util/CMakeLists.txt
test/conv_util/CMakeLists.txt
+1
-1
test/conv_util/conv_util.cpp
test/conv_util/conv_util.cpp
+1
-1
test/convnd_bwd_data/CMakeLists.txt
test/convnd_bwd_data/CMakeLists.txt
+1
-1
test/convnd_bwd_data/convnd_bwd_data.cpp
test/convnd_bwd_data/convnd_bwd_data.cpp
+145
-275
test/convnd_bwd_weight/CMakeLists.txt
test/convnd_bwd_weight/CMakeLists.txt
+1
-1
test/convnd_fwd/CMakeLists.txt
test/convnd_fwd/CMakeLists.txt
+3
-3
No files found.
example/09_convnd_fwd/convnd_fwd_common.hpp
View file @
f6922d3f
...
@@ -188,17 +188,26 @@ int run_conv_fwd_nhwc(bool do_verification,
...
@@ -188,17 +188,26 @@ int run_conv_fwd_nhwc(bool do_verification,
if
(
do_verification
)
if
(
do_verification
)
{
{
auto
ref_conv
=
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
NDimSpatial
,
NDimSpatial
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
InDataType
,
ck
::
tensor_layout
::
convolution
::
NDHWC
>>
,
WeiDataType
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
OutDataType
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
KXC
,
InElementOp
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
WeiElementOp
,
ck
::
tensor_layout
::
convolution
::
KZYXC
>>
,
OutElementOp
>
();
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>>
,
InDataType
,
WeiDataType
,
OutDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
>
();
auto
ref_invoker
=
ref_conv
.
MakeInvoker
();
auto
ref_invoker
=
ref_conv
.
MakeInvoker
();
auto
ref_argument
=
ref_conv
.
MakeArgument
(
in_n_hi_wi_c
,
auto
ref_argument
=
ref_conv
.
MakeArgument
(
in_n_hi_wi_c
,
...
...
example/17_convnd_bwd_data/convnd_bwd_data_common.hpp
View file @
f6922d3f
...
@@ -194,17 +194,28 @@ int run_conv_bwd_data_nhwc(bool do_verification,
...
@@ -194,17 +194,28 @@ int run_conv_bwd_data_nhwc(bool do_verification,
if
(
do_verification
)
if
(
do_verification
)
{
{
auto
ref_conv
=
std
::
cout
<<
"before ref"
<<
std
::
endl
;
ck
::
tensor_operation
::
host
::
ReferenceConvBwdData
<
NDimSpatial
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvBwdData
<
ck
::
tensor_layout
::
convolution
::
KYXC
,
NDimSpatial
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
InDataType
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWC
,
WeiDataType
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
OutDataType
,
ck
::
tensor_layout
::
convolution
::
NDHWC
>>
,
InElementOp
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
WeiElementOp
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
KXC
,
OutElementOp
>
();
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
>>
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>>
,
InDataType
,
WeiDataType
,
OutDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
>
();
auto
ref_invoker
=
ref_conv
.
MakeInvoker
();
auto
ref_invoker
=
ref_conv
.
MakeInvoker
();
...
@@ -219,8 +230,12 @@ int run_conv_bwd_data_nhwc(bool do_verification,
...
@@ -219,8 +230,12 @@ int run_conv_bwd_data_nhwc(bool do_verification,
wei_element_op
,
wei_element_op
,
out_element_op
);
out_element_op
);
std
::
cout
<<
"before ref"
<<
std
::
endl
;
ref_invoker
.
Run
(
ref_argument
);
ref_invoker
.
Run
(
ref_argument
);
std
::
cout
<<
"after ref"
<<
std
::
endl
;
in_device_buf
.
FromDevice
(
in_n_hi_wi_c_device
.
mData
.
data
());
in_device_buf
.
FromDevice
(
in_n_hi_wi_c_device
.
mData
.
data
());
return
ck
::
utils
::
check_err
(
in_n_hi_wi_c_device
.
mData
,
in_n_hi_wi_c_host
.
mData
)
?
0
:
1
;
return
ck
::
utils
::
check_err
(
in_n_hi_wi_c_device
.
mData
,
in_n_hi_wi_c_host
.
mData
)
?
0
:
1
;
...
...
example/20_convnd_bwd_weight/convnd_bwd_weight_common.hpp
View file @
f6922d3f
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_b
ackwar
d_weight.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_b
w
d_weight.hpp"
void
print_helper_msg
()
void
print_helper_msg
()
{
{
...
@@ -197,17 +197,26 @@ int run_conv_bwd_weight_nhwc(bool do_verification,
...
@@ -197,17 +197,26 @@ int run_conv_bwd_weight_nhwc(bool do_verification,
if
(
do_verification
)
if
(
do_verification
)
{
{
auto
ref_conv
=
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvBwdWeight
<
ck
::
tensor_operation
::
host
::
ReferenceConvBwdWeight
<
2
,
2
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
InDataType
,
ck
::
tensor_layout
::
convolution
::
NDHWC
>>
,
WeiDataType
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
OutDataType
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
KXC
,
InElementOp
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
WeiElementOp
,
ck
::
tensor_layout
::
convolution
::
KZYXC
>>
,
OutElementOp
>
{};
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>>
,
InDataType
,
WeiDataType
,
OutDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
>
{};
auto
ref_invoker
=
ref_conv
.
MakeInvoker
();
auto
ref_invoker
=
ref_conv
.
MakeInvoker
();
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp
View file @
f6922d3f
...
@@ -142,14 +142,14 @@ struct ReferenceConvBwdData : public device::BaseOperator
...
@@ -142,14 +142,14 @@ struct ReferenceConvBwdData : public device::BaseOperator
for
(
std
::
size_t
x
=
0
;
x
<
X
;
++
x
)
for
(
std
::
size_t
x
=
0
;
x
<
X
;
++
x
)
{
{
auto
w_tmp
=
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
wi
)
+
auto
w_tmp
=
static_cas
t
<
ck
::
long_index_t
>
(
wi
)
+
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
])
-
static_cas
t
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
])
-
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
0
]);
static_cas
t
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
0
]);
if
(
w_tmp
%
arg
.
conv_strides_
[
0
]
==
0
)
if
(
w_tmp
%
arg
.
conv_strides_
[
0
]
==
0
)
{
{
auto
wo
=
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
w_tmp
)
/
auto
wo
=
static_cas
t
<
ck
::
long_index_t
>
(
w_tmp
)
/
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
arg
.
conv_strides_
[
0
]);
static_cas
t
<
ck
::
long_index_t
>
(
arg
.
conv_strides_
[
0
]);
if
(
wo
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wo
)
<
Wo
)
if
(
wo
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wo
)
<
Wo
)
{
{
...
@@ -209,27 +209,26 @@ struct ReferenceConvBwdData : public device::BaseOperator
...
@@ -209,27 +209,26 @@ struct ReferenceConvBwdData : public device::BaseOperator
for
(
std
::
size_t
y
=
0
;
y
<
Y
;
++
y
)
for
(
std
::
size_t
y
=
0
;
y
<
Y
;
++
y
)
{
{
auto
h_tmp
=
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
hi
)
+
auto
h_tmp
=
static_cas
t
<
ck
::
long_index_t
>
(
hi
)
+
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
])
-
static_cas
t
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
])
-
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
0
]);
static_cas
t
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
0
]);
if
(
h_tmp
%
arg
.
conv_strides_
[
0
]
==
0
)
if
(
h_tmp
%
arg
.
conv_strides_
[
0
]
==
0
)
{
{
auto
ho
=
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
h_tmp
)
/
auto
ho
=
static_cas
t
<
ck
::
long_index_t
>
(
h_tmp
)
/
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
arg
.
conv_strides_
[
0
]);
static_cas
t
<
ck
::
long_index_t
>
(
arg
.
conv_strides_
[
0
]);
if
(
ho
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
ho
)
<
Ho
)
if
(
ho
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
ho
)
<
Ho
)
{
{
for
(
std
::
size_t
x
=
0
;
x
<
X
;
++
x
)
for
(
std
::
size_t
x
=
0
;
x
<
X
;
++
x
)
{
{
auto
w_tmp
=
auto
w_tmp
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
wi
)
+
static_cast
<
ck
::
long_index_t
>
(
wi
)
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
])
-
static_cast
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
static_cast
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
1
]);
arg
.
conv_dilations_
[
1
]);
if
(
w_tmp
%
arg
.
conv_strides_
[
1
]
==
0
)
if
(
w_tmp
%
arg
.
conv_strides_
[
1
]
==
0
)
{
{
auto
wo
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
w_tmp
)
/
auto
wo
=
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
static_cas
t
<
ck
::
long_index_t
>
(
w_tmp
)
/
arg
.
conv_strides_
[
1
]);
static_cast
<
ck
::
long_index_t
>
(
arg
.
conv_strides_
[
1
]);
if
(
wo
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wo
)
<
Wo
)
if
(
wo
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wo
)
<
Wo
)
{
{
for
(
std
::
size_t
k
=
0
;
k
<
K
;
++
k
)
for
(
std
::
size_t
k
=
0
;
k
<
K
;
++
k
)
...
@@ -296,44 +295,41 @@ struct ReferenceConvBwdData : public device::BaseOperator
...
@@ -296,44 +295,41 @@ struct ReferenceConvBwdData : public device::BaseOperator
for
(
std
::
size_t
z
=
0
;
z
<
Z
;
++
z
)
for
(
std
::
size_t
z
=
0
;
z
<
Z
;
++
z
)
{
{
auto
d_tmp
=
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
di
)
+
auto
d_tmp
=
static_cas
t
<
ck
::
long_index_t
>
(
di
)
+
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
])
-
static_cas
t
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
])
-
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
z
*
arg
.
conv_dilations_
[
0
]);
static_cas
t
<
ck
::
long_index_t
>
(
z
*
arg
.
conv_dilations_
[
0
]);
if
(
d_tmp
%
arg
.
conv_strides_
[
0
]
==
0
)
if
(
d_tmp
%
arg
.
conv_strides_
[
0
]
==
0
)
{
{
auto
do_
=
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
d_tmp
)
/
auto
do_
=
static_cas
t
<
ck
::
long_index_t
>
(
d_tmp
)
/
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
arg
.
conv_strides_
[
0
]);
static_cas
t
<
ck
::
long_index_t
>
(
arg
.
conv_strides_
[
0
]);
if
(
do_
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
do_
)
<
Do
)
if
(
do_
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
do_
)
<
Do
)
{
{
for
(
std
::
size_t
y
=
0
;
y
<
Y
;
++
y
)
for
(
std
::
size_t
y
=
0
;
y
<
Y
;
++
y
)
{
{
auto
h_tmp
=
auto
h_tmp
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
hi
)
+
static_cast
<
ck
::
long_index_t
>
(
hi
)
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
])
-
static_cast
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
y
*
static_cast
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
1
]);
arg
.
conv_dilations_
[
1
]);
if
(
h_tmp
%
arg
.
conv_strides_
[
1
]
==
0
)
if
(
h_tmp
%
arg
.
conv_strides_
[
1
]
==
0
)
{
{
auto
ho
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
h_tmp
)
/
auto
ho
=
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
static_cas
t
<
ck
::
long_index_t
>
(
h_tmp
)
/
arg
.
conv_strides_
[
1
]);
static_cast
<
ck
::
long_index_t
>
(
arg
.
conv_strides_
[
1
]);
if
(
ho
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
ho
)
<
Ho
)
if
(
ho
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
ho
)
<
Ho
)
{
{
for
(
std
::
size_t
x
=
0
;
x
<
X
;
++
x
)
for
(
std
::
size_t
x
=
0
;
x
<
X
;
++
x
)
{
{
auto
w_tmp
=
auto
w_tmp
=
static_cast
<
ck
::
long_index_t
>
(
wi
)
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
wi
)
+
static_cast
<
ck
::
long_index_t
>
(
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
2
])
-
arg
.
in_left_pads_
[
2
])
-
static_cast
<
ck
::
long_index_t
>
(
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
2
]);
x
*
arg
.
conv_dilations_
[
2
]);
if
(
w_tmp
%
arg
.
conv_strides_
[
2
]
==
0
)
if
(
w_tmp
%
arg
.
conv_strides_
[
2
]
==
0
)
{
{
auto
wo
=
auto
wo
=
static_cast
<
ck
::
long_index_t
>
(
w_tmp
)
/
ck
::
type_convert
<
ck
::
long_index_t
>
(
w_tmp
)
/
static_cast
<
ck
::
long_index_t
>
(
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
conv_strides_
[
2
]);
arg
.
conv_strides_
[
2
]);
if
(
wo
>=
0
&&
if
(
wo
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wo
)
<
Wo
)
ck
::
type_convert
<
std
::
size_t
>
(
wo
)
<
Wo
)
{
{
...
@@ -381,7 +377,7 @@ struct ReferenceConvBwdData : public device::BaseOperator
...
@@ -381,7 +377,7 @@ struct ReferenceConvBwdData : public device::BaseOperator
arg
.
in_element_op_
(
v_in
,
v_acc
);
arg
.
in_element_op_
(
v_in
,
v_acc
);
// FIXME hacky
// FIXME hacky
arg
.
input_
.
mData
[
in_desc
.
GetOffsetFromMultiIndex
(
n
,
c
,
wi
)]
=
arg
.
input_
.
mData
[
in_desc
.
GetOffsetFromMultiIndex
(
n
,
c
,
di
,
hi
,
wi
)]
=
ck
::
type_convert
<
InDataType
>
(
v_acc
);
ck
::
type_convert
<
InDataType
>
(
v_acc
);
};
};
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_b
ackwar
d_weight.hpp
→
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_b
w
d_weight.hpp
View file @
f6922d3f
...
@@ -139,10 +139,9 @@ struct ReferenceConvBwdWeight : public device::BaseOperator
...
@@ -139,10 +139,9 @@ struct ReferenceConvBwdWeight : public device::BaseOperator
{
{
for
(
std
::
size_t
wo
=
0
;
wo
<
out_desc
.
GetLengths
()[
2
];
++
wo
)
for
(
std
::
size_t
wo
=
0
;
wo
<
out_desc
.
GetLengths
()[
2
];
++
wo
)
{
{
auto
wi
=
auto
wi
=
static_cast
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
0
])
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
0
])
+
static_cast
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
0
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
0
])
-
static_cast
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
if
(
wi
>=
0
&&
if
(
wi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wi
)
<
in_desc
.
GetLengths
()[
2
])
ck
::
type_convert
<
std
::
size_t
>
(
wi
)
<
in_desc
.
GetLengths
()[
2
])
...
@@ -195,17 +194,16 @@ struct ReferenceConvBwdWeight : public device::BaseOperator
...
@@ -195,17 +194,16 @@ struct ReferenceConvBwdWeight : public device::BaseOperator
{
{
for
(
std
::
size_t
ho
=
0
;
ho
<
out_desc
.
GetLengths
()[
2
];
++
ho
)
for
(
std
::
size_t
ho
=
0
;
ho
<
out_desc
.
GetLengths
()[
2
];
++
ho
)
{
{
auto
hi
=
auto
hi
=
static_cast
<
ck
::
long_index_t
>
(
ho
*
arg
.
conv_strides_
[
0
])
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
ho
*
arg
.
conv_strides_
[
0
])
+
static_cast
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
0
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
0
])
-
static_cast
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
for
(
std
::
size_t
wo
=
0
;
wo
<
out_desc
.
GetLengths
()[
3
];
++
wo
)
for
(
std
::
size_t
wo
=
0
;
wo
<
out_desc
.
GetLengths
()[
3
];
++
wo
)
{
{
auto
wi
=
auto
wi
=
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
1
])
+
static_cas
t
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
1
])
+
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
1
])
-
static_cas
t
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
1
])
-
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
]);
static_cas
t
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
]);
if
(
hi
>=
0
&&
if
(
hi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
hi
)
<
in_desc
.
GetLengths
()[
2
]
&&
ck
::
type_convert
<
std
::
size_t
>
(
hi
)
<
in_desc
.
GetLengths
()[
2
]
&&
...
@@ -261,24 +259,21 @@ struct ReferenceConvBwdWeight : public device::BaseOperator
...
@@ -261,24 +259,21 @@ struct ReferenceConvBwdWeight : public device::BaseOperator
{
{
for
(
std
::
size_t
do_
=
0
;
do_
<
out_desc
.
GetLengths
()[
2
];
++
do_
)
for
(
std
::
size_t
do_
=
0
;
do_
<
out_desc
.
GetLengths
()[
2
];
++
do_
)
{
{
auto
di
=
auto
di
=
static_cast
<
ck
::
long_index_t
>
(
do_
*
arg
.
conv_strides_
[
0
])
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
do_
*
arg
.
conv_strides_
[
0
])
+
static_cast
<
ck
::
long_index_t
>
(
z
*
arg
.
conv_dilations_
[
0
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
z
*
arg
.
conv_dilations_
[
0
])
-
static_cast
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
for
(
std
::
size_t
ho
=
0
;
ho
<
out_desc
.
GetLengths
()[
3
];
++
ho
)
for
(
std
::
size_t
ho
=
0
;
ho
<
out_desc
.
GetLengths
()[
3
];
++
ho
)
{
{
auto
hi
=
auto
hi
=
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
ho
*
arg
.
conv_strides_
[
1
])
+
static_cas
t
<
ck
::
long_index_t
>
(
ho
*
arg
.
conv_strides_
[
1
])
+
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
1
])
-
static_cas
t
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
1
])
-
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
]);
static_cas
t
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
]);
for
(
std
::
size_t
wo
=
0
;
wo
<
out_desc
.
GetLengths
()[
4
];
++
wo
)
for
(
std
::
size_t
wo
=
0
;
wo
<
out_desc
.
GetLengths
()[
4
];
++
wo
)
{
{
auto
wi
=
auto
wi
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
wo
*
static_cast
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
2
])
+
arg
.
conv_strides_
[
2
])
+
static_cast
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
2
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
static_cast
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
2
]);
arg
.
conv_dilations_
[
2
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
2
]);
if
(
di
>=
0
&&
if
(
di
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
di
)
<
ck
::
type_convert
<
std
::
size_t
>
(
di
)
<
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
View file @
f6922d3f
...
@@ -157,10 +157,9 @@ struct ReferenceConvFwd : public device::BaseOperator
...
@@ -157,10 +157,9 @@ struct ReferenceConvFwd : public device::BaseOperator
{
{
for
(
std
::
size_t
x
=
0
;
x
<
wei_desc
.
GetLengths
()[
2
];
++
x
)
for
(
std
::
size_t
x
=
0
;
x
<
wei_desc
.
GetLengths
()[
2
];
++
x
)
{
{
auto
wi
=
auto
wi
=
static_cast
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
0
])
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
0
])
+
static_cast
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
0
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
0
])
-
static_cast
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
if
(
wi
>=
0
&&
if
(
wi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wi
)
<
in_desc
.
GetLengths
()[
2
])
ck
::
type_convert
<
std
::
size_t
>
(
wi
)
<
in_desc
.
GetLengths
()[
2
])
...
@@ -213,17 +212,16 @@ struct ReferenceConvFwd : public device::BaseOperator
...
@@ -213,17 +212,16 @@ struct ReferenceConvFwd : public device::BaseOperator
{
{
for
(
std
::
size_t
y
=
0
;
y
<
wei_desc
.
GetLengths
()[
2
];
++
y
)
for
(
std
::
size_t
y
=
0
;
y
<
wei_desc
.
GetLengths
()[
2
];
++
y
)
{
{
auto
hi
=
auto
hi
=
static_cast
<
ck
::
long_index_t
>
(
ho
*
arg
.
conv_strides_
[
0
])
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
ho
*
arg
.
conv_strides_
[
0
])
+
static_cast
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
0
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
0
])
-
static_cast
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
for
(
std
::
size_t
x
=
0
;
x
<
wei_desc
.
GetLengths
()[
3
];
++
x
)
for
(
std
::
size_t
x
=
0
;
x
<
wei_desc
.
GetLengths
()[
3
];
++
x
)
{
{
auto
wi
=
auto
wi
=
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
1
])
+
static_cas
t
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
1
])
+
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
1
])
-
static_cas
t
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
1
])
-
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
]);
static_cas
t
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
]);
if
(
hi
>=
0
&&
if
(
hi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
hi
)
<
in_desc
.
GetLengths
()[
2
]
&&
ck
::
type_convert
<
std
::
size_t
>
(
hi
)
<
in_desc
.
GetLengths
()[
2
]
&&
...
@@ -280,24 +278,21 @@ struct ReferenceConvFwd : public device::BaseOperator
...
@@ -280,24 +278,21 @@ struct ReferenceConvFwd : public device::BaseOperator
{
{
for
(
std
::
size_t
z
=
0
;
z
<
wei_desc
.
GetLengths
()[
2
];
++
z
)
for
(
std
::
size_t
z
=
0
;
z
<
wei_desc
.
GetLengths
()[
2
];
++
z
)
{
{
auto
di
=
auto
di
=
static_cast
<
ck
::
long_index_t
>
(
d_o
*
arg
.
conv_strides_
[
0
])
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
d_o
*
arg
.
conv_strides_
[
0
])
+
static_cast
<
ck
::
long_index_t
>
(
z
*
arg
.
conv_dilations_
[
0
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
z
*
arg
.
conv_dilations_
[
0
])
-
static_cast
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
for
(
std
::
size_t
y
=
0
;
y
<
wei_desc
.
GetLengths
()[
3
];
++
y
)
for
(
std
::
size_t
y
=
0
;
y
<
wei_desc
.
GetLengths
()[
3
];
++
y
)
{
{
auto
hi
=
auto
hi
=
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
ho
*
arg
.
conv_strides_
[
1
])
+
static_cas
t
<
ck
::
long_index_t
>
(
ho
*
arg
.
conv_strides_
[
1
])
+
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
1
])
-
static_cas
t
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
1
])
-
ck
::
type_conver
t
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
]);
static_cas
t
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
]);
for
(
std
::
size_t
x
=
0
;
x
<
wei_desc
.
GetLengths
()[
4
];
++
x
)
for
(
std
::
size_t
x
=
0
;
x
<
wei_desc
.
GetLengths
()[
4
];
++
x
)
{
{
auto
wi
=
auto
wi
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
wo
*
static_cast
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
2
])
+
arg
.
conv_strides_
[
2
])
+
static_cast
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
2
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
static_cast
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
2
]);
arg
.
conv_dilations_
[
2
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
2
]);
if
(
di
>=
0
&&
if
(
di
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
di
)
<
ck
::
type_convert
<
std
::
size_t
>
(
di
)
<
in_desc
.
GetLengths
()[
2
]
&&
in_desc
.
GetLengths
()[
2
]
&&
...
...
profiler/include/profile_conv_bwd_weight_impl.hpp
View file @
f6922d3f
...
@@ -20,7 +20,7 @@
...
@@ -20,7 +20,7 @@
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_b
ackwar
d_weight.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_b
w
d_weight.hpp"
namespace
ck
{
namespace
ck
{
namespace
profiler
{
namespace
profiler
{
...
...
test/CMakeLists.txt
View file @
f6922d3f
...
@@ -41,9 +41,8 @@ add_subdirectory(gemm_reduce)
...
@@ -41,9 +41,8 @@ add_subdirectory(gemm_reduce)
add_subdirectory
(
batched_gemm
)
add_subdirectory
(
batched_gemm
)
add_subdirectory
(
batched_gemm_reduce
)
add_subdirectory
(
batched_gemm_reduce
)
add_subdirectory
(
grouped_gemm
)
add_subdirectory
(
grouped_gemm
)
add_subdirectory
(
convnd_fwd
)
add_subdirectory
(
reduce
)
add_subdirectory
(
reduce
)
add_subdirectory
(
conv
2
d_
b
wd
_weight
)
add_subdirectory
(
conv
n
d_
f
wd
)
add_subdirectory
(
convnd_bwd_weight
)
add_subdirectory
(
convnd_bwd_weight
)
add_subdirectory
(
convnd_bwd_data
)
add_subdirectory
(
convnd_bwd_data
)
add_subdirectory
(
block_to_ctile_map
)
add_subdirectory
(
block_to_ctile_map
)
...
...
test/batched_gemm/CMakeLists.txt
View file @
f6922d3f
add_test_executable
(
test_batched_gemm_fp16 batched_gemm_fp16.cpp
)
add_test_executable
(
test_batched_gemm_fp16 batched_gemm_fp16.cpp
)
target_link_libraries
(
test_batched_gemm_fp16 PRIVATE
host_tensor
)
target_link_libraries
(
test_batched_gemm_fp16 PRIVATE
utility
)
target_link_libraries
(
test_batched_gemm_fp16 PRIVATE device_batched_gemm_instance
)
target_link_libraries
(
test_batched_gemm_fp16 PRIVATE device_batched_gemm_instance
)
test/batched_gemm_reduce/CMakeLists.txt
View file @
f6922d3f
add_test_executable
(
test_batched_gemm_reduce_fp16 batched_gemm_reduce_fp16.cpp
)
add_test_executable
(
test_batched_gemm_reduce_fp16 batched_gemm_reduce_fp16.cpp
)
target_link_libraries
(
test_batched_gemm_reduce_fp16 PRIVATE
host_tensor
)
target_link_libraries
(
test_batched_gemm_reduce_fp16 PRIVATE
utility
)
target_link_libraries
(
test_batched_gemm_reduce_fp16 PRIVATE device_batched_gemm_reduce_instance
)
target_link_libraries
(
test_batched_gemm_reduce_fp16 PRIVATE device_batched_gemm_reduce_instance
)
test/conv2d_bwd_data/CMakeLists.txt
deleted
100644 → 0
View file @
8f722700
add_test_executable
(
test_conv2d_bwd_data conv2d_bwd_data.cpp
)
target_link_libraries
(
test_conv2d_bwd_data PRIVATE host_tensor
)
target_link_libraries
(
test_conv2d_bwd_data PRIVATE device_conv2d_bwd_data_instance
)
test/conv2d_bwd_data/conv2d_bwd_data.cpp
deleted
100644 → 0
View file @
8f722700
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "host_conv.hpp"
#include "tensor_layout.hpp"
#include "device_tensor.hpp"
#include "device_conv_bwd_data.hpp"
#include "element_wise_operation.hpp"
#include "reference_conv_bwd_data.hpp"
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
BF16
=
ck
::
bhalf_t
;
using
INT8
=
int8_t
;
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
using
DeviceConvBwdDataNoOpPtr
=
DeviceConvBwdDataPtr
<
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
>
;
void
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instances
(
std
::
vector
<
DeviceConvBwdDataNoOpPtr
>&
);
void
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instances
(
std
::
vector
<
DeviceConvBwdDataNoOpPtr
>&
);
void
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instances
(
std
::
vector
<
DeviceConvBwdDataNoOpPtr
>&
);
void
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instances
(
std
::
vector
<
DeviceConvBwdDataNoOpPtr
>&
);
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
template
<
typename
T
>
static
bool
check_out
(
const
Tensor
<
T
>&
ref
,
const
Tensor
<
T
>&
result
)
{
float
max_diff
=
1e-6
;
for
(
int
i
=
0
;
i
<
ref
.
mData
.
size
();
++
i
)
{
float
diff
=
std
::
abs
(
double
(
ref
.
mData
[
i
])
-
double
(
result
.
mData
[
i
]));
if
(
max_diff
<
diff
)
{
return
false
;
}
}
return
true
;
}
int
main
(
int
argc
,
char
*
argv
[])
{
int
data_type
=
0
;
int
init_method
=
0
;
// Conv shape
ck
::
index_t
N
=
128
;
ck
::
index_t
K
=
256
;
ck
::
index_t
C
=
192
;
ck
::
index_t
Y
=
3
;
ck
::
index_t
X
=
3
;
ck
::
index_t
Hi
=
71
;
ck
::
index_t
Wi
=
71
;
ck
::
index_t
conv_stride_h
=
2
;
ck
::
index_t
conv_stride_w
=
2
;
ck
::
index_t
conv_dilation_h
=
1
;
ck
::
index_t
conv_dilation_w
=
1
;
ck
::
index_t
in_left_pad_h
=
1
;
ck
::
index_t
in_left_pad_w
=
1
;
ck
::
index_t
in_right_pad_h
=
1
;
ck
::
index_t
in_right_pad_w
=
1
;
if
(
argc
==
1
)
{
data_type
=
1
;
init_method
=
1
;
}
else
if
(
argc
==
3
)
{
data_type
=
std
::
stoi
(
argv
[
1
]);
init_method
=
std
::
stoi
(
argv
[
2
]);
}
else
if
(
argc
==
18
)
{
data_type
=
std
::
stoi
(
argv
[
1
]);
init_method
=
std
::
stoi
(
argv
[
2
]);
N
=
std
::
stoi
(
argv
[
3
]);
K
=
std
::
stoi
(
argv
[
4
]);
C
=
std
::
stoi
(
argv
[
5
]);
Y
=
std
::
stoi
(
argv
[
6
]);
X
=
std
::
stoi
(
argv
[
7
]);
Hi
=
std
::
stoi
(
argv
[
8
]);
Wi
=
std
::
stoi
(
argv
[
9
]);
conv_stride_h
=
std
::
stoi
(
argv
[
10
]);
conv_stride_w
=
std
::
stoi
(
argv
[
11
]);
conv_dilation_h
=
std
::
stoi
(
argv
[
12
]);
conv_dilation_w
=
std
::
stoi
(
argv
[
13
]);
in_left_pad_h
=
std
::
stoi
(
argv
[
14
]);
in_left_pad_w
=
std
::
stoi
(
argv
[
15
]);
in_right_pad_h
=
std
::
stoi
(
argv
[
16
]);
in_right_pad_w
=
std
::
stoi
(
argv
[
17
]);
}
else
{
printf
(
"arg1: data type (0=fp32, 1=fp16, 2= bfp16, 3= int8_t )
\n
"
);
printf
(
"arg2: initialization (0=no init, 1=integer value, 2=decimal value)
\n
"
);
printf
(
"arg3 to 17: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
exit
(
1
);
}
auto
Run
=
[
&
](
auto
input_type
,
auto
wei_type
,
auto
out_type
,
auto
acc_type
)
{
using
InDataType
=
decltype
(
input_type
);
using
WeiDataType
=
decltype
(
wei_type
);
using
OutDataType
=
decltype
(
out_type
);
using
AccDataType
=
decltype
(
acc_type
);
using
ReferenceConvBwdInstance
=
ck
::
tensor_operation
::
host
::
ReferenceConvBwdData
<
InDataType
,
WeiDataType
,
OutDataType
,
AccDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
>
;
const
ck
::
index_t
YEff
=
(
Y
-
1
)
*
conv_dilation_h
+
1
;
const
ck
::
index_t
XEff
=
(
X
-
1
)
*
conv_dilation_w
+
1
;
const
ck
::
index_t
Ho
=
(
Hi
+
in_left_pad_h
+
in_right_pad_h
-
YEff
)
/
conv_stride_h
+
1
;
const
ck
::
index_t
Wo
=
(
Wi
+
in_left_pad_w
+
in_right_pad_w
-
XEff
)
/
conv_stride_w
+
1
;
const
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths
{{
Hi
,
Wi
}};
const
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths
{{
Y
,
X
}};
const
std
::
vector
<
ck
::
index_t
>
output_spatial_lengths
{{
Ho
,
Wo
}};
const
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
{{
conv_stride_h
,
conv_stride_w
}};
const
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
{{
conv_dilation_h
,
conv_dilation_w
}};
const
std
::
vector
<
ck
::
index_t
>
input_left_pads
{{
in_left_pad_h
,
in_left_pad_w
}};
const
std
::
vector
<
ck
::
index_t
>
input_right_pads
{{
in_right_pad_h
,
in_right_pad_w
}};
auto
f_host_tensor_descriptor
=
[](
std
::
size_t
N_
,
std
::
size_t
C_
,
std
::
size_t
H
,
std
::
size_t
W
)
{
return
HostTensorDescriptor
(
std
::
vector
<
std
::
size_t
>
({
N_
,
C_
,
H
,
W
}),
std
::
vector
<
std
::
size_t
>
({
C_
*
H
*
W
,
1
,
W
*
C_
,
C_
}));
};
Tensor
<
OutDataType
>
out_n_k_ho_wo
(
f_host_tensor_descriptor
(
N
,
K
,
Ho
,
Wo
));
Tensor
<
WeiDataType
>
wei_k_c_y_x
(
f_host_tensor_descriptor
(
K
,
C
,
Y
,
X
));
Tensor
<
InDataType
>
in_n_c_hi_wi_host_result
(
f_host_tensor_descriptor
(
N
,
C
,
Hi
,
Wi
));
Tensor
<
InDataType
>
in_n_c_hi_wi_device_result
(
f_host_tensor_descriptor
(
N
,
C
,
Hi
,
Wi
));
std
::
cout
<<
"in_n_c_hi_wi: "
<<
in_n_c_hi_wi_host_result
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"wei_k_c_y_x: "
<<
wei_k_c_y_x
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"out_n_k_ho_wo: "
<<
out_n_k_ho_wo
.
mDesc
<<
std
::
endl
;
switch
(
init_method
)
{
case
0
:
break
;
case
1
:
out_n_k_ho_wo
.
GenerateTensorValue
(
GeneratorTensor_2
<
OutDataType
>
{
-
5
,
5
});
wei_k_c_y_x
.
GenerateTensorValue
(
GeneratorTensor_2
<
WeiDataType
>
{
-
5
,
5
});
break
;
default:
out_n_k_ho_wo
.
GenerateTensorValue
(
GeneratorTensor_1
<
OutDataType
>
{
1
});
wei_k_c_y_x
.
GenerateTensorValue
(
GeneratorTensor_1
<
WeiDataType
>
{
1
});
}
DeviceMem
in_device_buf
(
sizeof
(
InDataType
)
*
in_n_c_hi_wi_device_result
.
mDesc
.
GetElementSpace
());
DeviceMem
wei_device_buf
(
sizeof
(
WeiDataType
)
*
wei_k_c_y_x
.
mDesc
.
GetElementSpace
());
DeviceMem
out_device_buf
(
sizeof
(
OutDataType
)
*
out_n_k_ho_wo
.
mDesc
.
GetElementSpace
());
out_device_buf
.
ToDevice
(
out_n_k_ho_wo
.
mData
.
data
());
wei_device_buf
.
ToDevice
(
wei_k_c_y_x
.
mData
.
data
());
// reset input to zero
in_n_c_hi_wi_device_result
.
GenerateTensorValue
(
GeneratorTensor_1
<
InDataType
>
{
0
});
in_device_buf
.
ToDevice
(
in_n_c_hi_wi_device_result
.
mData
.
data
());
// get host result
{
auto
ref_conv
=
ReferenceConvBwdInstance
{};
auto
ref_invoker
=
ref_conv
.
MakeInvoker
();
auto
ref_argument
=
ref_conv
.
MakeArgument
(
in_n_c_hi_wi_host_result
,
wei_k_c_y_x
,
out_n_k_ho_wo
,
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
input_right_pads
,
InElementOp
{},
WeiElementOp
{},
OutElementOp
{});
ref_invoker
.
Run
(
ref_argument
);
}
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
DeviceConvBwdDataNoOpPtr
=
ck
::
tensor_operation
::
device
::
DeviceConvBwdDataPtr
<
PassThrough
,
PassThrough
,
PassThrough
>
;
// add device Conv instances
std
::
vector
<
DeviceConvBwdDataNoOpPtr
>
conv_ptrs
;
if
constexpr
(
ck
::
is_same_v
<
ck
::
remove_cv_t
<
InDataType
>
,
float
>
&&
ck
::
is_same_v
<
ck
::
remove_cv_t
<
WeiDataType
>
,
float
>
&&
ck
::
is_same_v
<
ck
::
remove_cv_t
<
OutDataType
>
,
float
>
)
{
ck
::
tensor_operation
::
device
::
instance
::
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instances
(
conv_ptrs
);
}
else
if
constexpr
(
ck
::
is_same_v
<
ck
::
remove_cv_t
<
InDataType
>
,
ck
::
half_t
>
&&
ck
::
is_same_v
<
ck
::
remove_cv_t
<
WeiDataType
>
,
ck
::
half_t
>
&&
ck
::
is_same_v
<
ck
::
remove_cv_t
<
OutDataType
>
,
ck
::
half_t
>
)
{
ck
::
tensor_operation
::
device
::
instance
::
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instances
(
conv_ptrs
);
}
else
if
constexpr
(
ck
::
is_same_v
<
ck
::
remove_cv_t
<
InDataType
>
,
ck
::
bhalf_t
>
&&
ck
::
is_same_v
<
ck
::
remove_cv_t
<
WeiDataType
>
,
ck
::
bhalf_t
>
&&
ck
::
is_same_v
<
ck
::
remove_cv_t
<
OutDataType
>
,
ck
::
bhalf_t
>
)
{
ck
::
tensor_operation
::
device
::
instance
::
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instances
(
conv_ptrs
);
}
else
if
constexpr
(
ck
::
is_same_v
<
ck
::
remove_cv_t
<
InDataType
>
,
int8_t
>
&&
ck
::
is_same_v
<
ck
::
remove_cv_t
<
WeiDataType
>
,
int8_t
>
&&
ck
::
is_same_v
<
ck
::
remove_cv_t
<
OutDataType
>
,
int8_t
>
)
{
ck
::
tensor_operation
::
device
::
instance
::
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instances
(
conv_ptrs
);
}
if
(
conv_ptrs
.
size
()
<=
0
)
{
throw
std
::
runtime_error
(
"wrong! no device Conv instance found"
);
}
// profile device Conv instances
bool
success
=
true
;
for
(
auto
&
conv_ptr
:
conv_ptrs
)
{
auto
argument_ptr
=
conv_ptr
->
MakeArgumentPointer
(
static_cast
<
InDataType
*>
(
in_device_buf
.
GetDeviceBuffer
()),
static_cast
<
WeiDataType
*>
(
wei_device_buf
.
GetDeviceBuffer
()),
static_cast
<
OutDataType
*>
(
out_device_buf
.
GetDeviceBuffer
()),
N
,
K
,
C
,
input_spatial_lengths
,
filter_spatial_lengths
,
output_spatial_lengths
,
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
input_right_pads
,
InElementOp
{},
WeiElementOp
{},
OutElementOp
{});
if
(
conv_ptr
->
IsSupportedArgument
(
argument_ptr
.
get
()))
{
auto
invoker_ptr
=
conv_ptr
->
MakeInvokerPointer
();
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
1
);
in_device_buf
.
FromDevice
(
in_n_c_hi_wi_device_result
.
mData
.
data
());
if
(
!
check_out
(
in_n_c_hi_wi_host_result
,
in_n_c_hi_wi_device_result
))
{
std
::
cout
<<
"Fail Info: "
<<
conv_ptr
->
GetTypeString
()
<<
std
::
endl
;
success
=
false
;
}
else
{
std
::
cout
<<
"Pass Info: "
<<
conv_ptr
->
GetTypeString
()
<<
std
::
endl
;
}
}
else
{
std
::
cout
<<
"Not support Info: "
<<
conv_ptr
->
GetTypeString
()
<<
std
::
endl
;
}
}
if
(
success
)
{
std
::
cout
<<
"test conv2d bwd : Pass"
<<
std
::
endl
;
return
0
;
}
else
{
std
::
cout
<<
"test conv2d bwd: Fail "
<<
std
::
endl
;
return
-
1
;
}
};
if
(
data_type
==
0
)
{
return
Run
(
F32
(),
F32
(),
F32
(),
F32
());
}
else
if
(
data_type
==
1
)
{
return
Run
(
F16
(),
F16
(),
F16
(),
F32
());
}
else
if
(
data_type
==
2
)
{
return
Run
(
BF16
(),
BF16
(),
BF16
(),
F32
());
}
else
if
(
data_type
==
3
)
{
return
Run
(
INT8
(),
INT8
(),
INT8
(),
int
());
}
else
{
return
1
;
}
}
test/conv2d_bwd_weight/CMakeLists.txt
deleted
100644 → 0
View file @
8f722700
add_test_executable
(
test_conv2d_bwd_weight conv2d_bwd_weight.cpp
)
target_link_libraries
(
test_conv2d_bwd_weight PRIVATE host_tensor device_conv2d_bwd_weight_instance conv_util
)
test/conv2d_bwd_weight/conv2d_bwd_weight.cpp
deleted
100644 → 0
View file @
8f722700
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <vector>
#include "test/convnd_fwd/conv_util.hpp"
#include "profiler/include/profile_conv_bwd_weight_impl.hpp"
int
test_self
()
{
bool
pass
=
true
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParams
>
params
;
params
.
push_back
({
2
,
128
,
256
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
params
.
push_back
({
2
,
128
,
256
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
params
.
push_back
({
2
,
128
,
256
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
for
(
auto
&
param
:
params
)
{
// f32
pass
&=
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
2
,
float
,
float
,
float
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
.
N_
,
param
.
K_
,
param
.
C_
,
param
.
input_spatial_lengths_
,
param
.
filter_spatial_lengths_
,
param
.
GetOutputSpatialLengths
(),
param
.
conv_filter_strides_
,
param
.
conv_filter_dilations_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
,
2
);
// fp16
pass
&=
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
2
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
.
N_
,
param
.
K_
,
param
.
C_
,
param
.
input_spatial_lengths_
,
param
.
filter_spatial_lengths_
,
param
.
GetOutputSpatialLengths
(),
param
.
conv_filter_strides_
,
param
.
conv_filter_dilations_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
,
2
);
}
return
pass
;
}
int
main
(
int
argc
,
char
*
argv
[])
{
int
data_type
=
1
;
int
init_method
=
1
;
// Conv shape
ck
::
index_t
N
=
128
;
ck
::
index_t
K
=
256
;
ck
::
index_t
C
=
192
;
ck
::
index_t
Y
=
3
;
ck
::
index_t
X
=
3
;
ck
::
index_t
Hi
=
71
;
ck
::
index_t
Wi
=
71
;
ck
::
index_t
conv_stride_h
=
2
;
ck
::
index_t
conv_stride_w
=
2
;
ck
::
index_t
conv_dilation_h
=
1
;
ck
::
index_t
conv_dilation_w
=
1
;
ck
::
index_t
in_left_pad_h
=
1
;
ck
::
index_t
in_left_pad_w
=
1
;
ck
::
index_t
in_right_pad_h
=
1
;
ck
::
index_t
in_right_pad_w
=
1
;
ck
::
index_t
split_k
=
1
;
bool
pass
=
true
;
if
(
argc
==
1
)
{
pass
=
test_self
();
}
else
{
if
(
argc
==
3
)
{
data_type
=
std
::
stoi
(
argv
[
1
]);
init_method
=
std
::
stoi
(
argv
[
2
]);
}
else
if
(
argc
==
19
)
{
data_type
=
std
::
stoi
(
argv
[
1
]);
init_method
=
std
::
stoi
(
argv
[
2
]);
N
=
std
::
stoi
(
argv
[
3
]);
K
=
std
::
stoi
(
argv
[
4
]);
C
=
std
::
stoi
(
argv
[
5
]);
Y
=
std
::
stoi
(
argv
[
6
]);
X
=
std
::
stoi
(
argv
[
7
]);
Hi
=
std
::
stoi
(
argv
[
8
]);
Wi
=
std
::
stoi
(
argv
[
9
]);
conv_stride_h
=
std
::
stoi
(
argv
[
10
]);
conv_stride_w
=
std
::
stoi
(
argv
[
11
]);
conv_dilation_h
=
std
::
stoi
(
argv
[
12
]);
conv_dilation_w
=
std
::
stoi
(
argv
[
13
]);
in_left_pad_h
=
std
::
stoi
(
argv
[
14
]);
in_left_pad_w
=
std
::
stoi
(
argv
[
15
]);
in_right_pad_h
=
std
::
stoi
(
argv
[
16
]);
in_right_pad_w
=
std
::
stoi
(
argv
[
17
]);
split_k
=
std
::
stoi
(
argv
[
18
]);
}
else
{
printf
(
"arg1: data type (0=fp32, 1=fp16, 2= bfp16, 3= int8_t )
\n
"
);
printf
(
"arg2: initialization (0=no init, 1=integer value, 2=decimal value)
\n
"
);
printf
(
"arg3 to 17: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
exit
(
1
);
}
ck
::
utils
::
conv
::
ConvParams
param
{
2
,
N
,
K
,
C
,
{
Y
,
X
},
{
Hi
,
Wi
},
{
conv_stride_h
,
conv_stride_w
},
{
conv_dilation_h
,
conv_dilation_w
},
{
in_left_pad_h
,
in_left_pad_w
},
{
in_right_pad_h
,
in_right_pad_w
}};
if
(
data_type
==
0
)
{
pass
=
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
2
,
float
,
float
,
float
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
>
(
true
,
// do_verification
init_method
,
false
,
// do_log
false
,
// time_kernel
param
.
N_
,
param
.
K_
,
param
.
C_
,
param
.
input_spatial_lengths_
,
param
.
filter_spatial_lengths_
,
param
.
GetOutputSpatialLengths
(),
param
.
conv_filter_strides_
,
param
.
conv_filter_dilations_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
,
split_k
);
}
else
if
(
data_type
==
1
)
{
pass
=
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
2
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
>
(
true
,
// do_verification
init_method
,
false
,
// do_log
false
,
// time_kernel
param
.
N_
,
param
.
K_
,
param
.
C_
,
param
.
input_spatial_lengths_
,
param
.
filter_spatial_lengths_
,
param
.
GetOutputSpatialLengths
(),
param
.
conv_filter_strides_
,
param
.
conv_filter_dilations_
,
param
.
input_left_pads_
,
param
.
input_right_pads_
,
split_k
);
}
else
{
std
::
cout
<<
"Not support data type"
<<
std
::
endl
;
return
1
;
}
}
if
(
pass
)
{
std
::
cout
<<
"test conv2d bwd weight : Pass"
<<
std
::
endl
;
return
0
;
}
else
{
std
::
cout
<<
"test conv2d bwd weight: Fail "
<<
std
::
endl
;
return
-
1
;
}
}
test/conv_util/CMakeLists.txt
View file @
f6922d3f
add_gtest_executable
(
test_conv_util conv_util.cpp
)
add_gtest_executable
(
test_conv_util conv_util.cpp
)
target_link_libraries
(
test_conv_util PRIVATE
host_tensor conv_
util
)
target_link_libraries
(
test_conv_util PRIVATE util
ity
)
test/conv_util/conv_util.cpp
View file @
f6922d3f
...
@@ -10,7 +10,7 @@
...
@@ -10,7 +10,7 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/conv
_
uti
l
.hpp"
#include "ck/library/utility/conv
ol
uti
on_parameter
.hpp"
namespace
{
namespace
{
...
...
test/convnd_bwd_data/CMakeLists.txt
View file @
f6922d3f
add_test_executable
(
test_convnd_bwd_data convnd_bwd_data.cpp
)
add_test_executable
(
test_convnd_bwd_data convnd_bwd_data.cpp
)
target_link_libraries
(
test_convnd_bwd_data PRIVATE
host_tensor
device_conv
n
d_bwd_data_instance
conv_util
)
target_link_libraries
(
test_convnd_bwd_data PRIVATE
utility device_conv1d_bwd_data_instance
device_conv
2
d_bwd_data_instance
device_conv3d_bwd_data_instance
)
test/convnd_bwd_data/convnd_bwd_data.cpp
View file @
f6922d3f
This diff is collapsed.
Click to expand it.
test/convnd_bwd_weight/CMakeLists.txt
View file @
f6922d3f
add_test_executable
(
test_convnd_bwd_weight convnd_bwd_weight.cpp
)
add_test_executable
(
test_convnd_bwd_weight convnd_bwd_weight.cpp
)
target_link_libraries
(
test_convnd_bwd_weight PRIVATE
host_tensor
device_convnd_bwd_weight_instance
conv_util
)
target_link_libraries
(
test_convnd_bwd_weight PRIVATE
utility
device_convnd_bwd_weight_instance
)
test/convnd_fwd/CMakeLists.txt
View file @
f6922d3f
add_custom_target
(
test_convnd_fwd
)
add_custom_target
(
test_convnd_fwd
)
add_gtest_executable
(
test_conv1d_fwd conv1d_fwd.cpp
)
add_gtest_executable
(
test_conv1d_fwd conv1d_fwd.cpp
)
target_link_libraries
(
test_conv1d_fwd PRIVATE
host_tensor
device_conv1d_fwd_instance
conv_util
)
target_link_libraries
(
test_conv1d_fwd PRIVATE
utility
device_conv1d_fwd_instance
)
add_dependencies
(
test_convnd_fwd test_conv1d_fwd
)
add_dependencies
(
test_convnd_fwd test_conv1d_fwd
)
add_gtest_executable
(
test_conv2d_fwd conv2d_fwd.cpp
)
add_gtest_executable
(
test_conv2d_fwd conv2d_fwd.cpp
)
target_link_libraries
(
test_conv2d_fwd PRIVATE
host_tensor device_conv2d_fwd_instance
device_conv
nd_
2d_fwd_instance
conv_util
)
target_link_libraries
(
test_conv2d_fwd PRIVATE
utility
device_conv2d_fwd_instance
)
add_dependencies
(
test_convnd_fwd test_conv2d_fwd
)
add_dependencies
(
test_convnd_fwd test_conv2d_fwd
)
add_gtest_executable
(
test_conv3d_fwd conv3d_fwd.cpp
)
add_gtest_executable
(
test_conv3d_fwd conv3d_fwd.cpp
)
target_link_libraries
(
test_conv3d_fwd PRIVATE
host_tensor
device_conv3d_fwd_instance
conv_util
)
target_link_libraries
(
test_conv3d_fwd PRIVATE
utility
device_conv3d_fwd_instance
)
add_dependencies
(
test_convnd_fwd test_conv3d_fwd
)
add_dependencies
(
test_convnd_fwd test_conv3d_fwd
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment