Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
b8bb1480
Commit
b8bb1480
authored
Jul 22, 2021
by
Jing Zhang
Browse files
3x3 test
parent
40b7d356
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
43 additions
and
27 deletions
+43
-27
composable_kernel/include/driver/driver_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw_outpad.hpp
...tion_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw_outpad.hpp
+8
-0
host/driver_offline/conv_fwd_driver_offline.cpp
host/driver_offline/conv_fwd_driver_offline.cpp
+35
-1
host/driver_offline/include/device_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
...convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
+0
-26
No files found.
composable_kernel/include/driver/driver_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw_outpad.hpp
View file @
b8bb1480
...
...
@@ -271,6 +271,14 @@ struct DriverStaticConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
index_t
nrepeat
=
100
;
std
::
cout
<<
"conv_v5r1__NCHWc"
<<
K1
<<
"_n"
<<
N
<<
"c"
<<
C
<<
"h"
<<
Hi
<<
"w"
<<
Wi
<<
"-k"
<<
K
<<
"c"
<<
C
<<
"y"
<<
Y
<<
"x"
<<
X
<<
"-u"
<<
conv_strides
[
I0
]
<<
"v"
<<
conv_strides
[
I1
]
<<
"l"
<<
conv_dilations
[
I0
]
<<
"j"
<<
conv_dilations
[
I1
]
<<
"q"
<<
in_left_pads
[
I0
]
<<
"p"
<<
in_right_pads
[
I0
]
<<
std
::
endl
;
std
::
cout
<<
"GridSize = "
<<
GridSize
<<
" BlockSize = "
<<
BlockSize
<<
std
::
endl
;
for
(
index_t
i
=
0
;
i
<
5
;
++
i
)
{
std
::
cout
<<
"Start running "
<<
nrepeat
<<
" times..."
<<
std
::
endl
;
...
...
host/driver_offline/conv_fwd_driver_offline.cpp
View file @
b8bb1480
...
...
@@ -103,13 +103,47 @@ int main(int argc, char* argv[])
const
bool
do_log
=
atoi
(
argv
[
5
]);
const
int
nrepeat
=
atoi
(
argv
[
6
]);
#if 0
constexpr index_t N = 1;
constexpr index_t C = 16;
constexpr index_t Hi = 1080;
constexpr index_t Wi = 1920;
constexpr index_t K = 16;
constexpr index_t Y = 3;
constexpr index_t X = 3;
#elif
0
constexpr
index_t
N
=
1
;
constexpr
index_t
C
=
16
;
constexpr
index_t
Hi
=
54
4
;
constexpr
index_t
Hi
=
54
0
;
constexpr
index_t
Wi
=
960
;
constexpr
index_t
K
=
16
;
constexpr
index_t
Y
=
3
;
constexpr
index_t
X
=
3
;
#elif 0
constexpr
index_t
N
=
1
;
constexpr
index_t
C
=
16
;
constexpr
index_t
Hi
=
480
;
constexpr
index_t
Wi
=
270
;
constexpr
index_t
K
=
16
;
constexpr
index_t
Y
=
3
;
constexpr
index_t
X
=
3
;
#elif 1
constexpr
index_t
N
=
1
;
constexpr
index_t
C
=
16
;
constexpr
index_t
Hi
=
240
;
constexpr
index_t
Wi
=
135
;
constexpr
index_t
K
=
16
;
constexpr
index_t
Y
=
3
;
constexpr
index_t
X
=
3
;
#elif 1
constexpr
index_t
N
=
1
;
constexpr
index_t
C
=
16
;
constexpr
index_t
Hi
=
1080
;
constexpr
index_t
Wi
=
1920
;
constexpr
index_t
K
=
16
;
constexpr
index_t
Y
=
1
;
constexpr
index_t
X
=
1
;
#endif
const
index_t
conv_stride_h
=
1
;
const
index_t
conv_stride_w
=
1
;
...
...
host/driver_offline/include/device_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
View file @
b8bb1480
...
...
@@ -92,7 +92,6 @@ void device_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(
const
auto
out_n_k0_ho_wo_k1_desc
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_tuple
(
N
,
K0
,
Ho
,
Wo
,
K1
));
#if 1
// cdata = 64, BlockSize = 64, 16x8x32x4
constexpr
index_t
BlockSize
=
64
;
...
...
@@ -114,34 +113,9 @@ void device_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(
constexpr
index_t
BThreadTransferSrcScalarPerVector_W
=
1
;
constexpr
index_t
CThreadTransferDstScalarPerVector_W
=
8
;
static_assert
(
KPerThread
%
CThreadTransferDstScalarPerVector_W
==
0
,
""
);
#else
constexpr
index_t
BlockSize
=
64
;
constexpr
index_t
KPerBlock
=
16
;
constexpr
index_t
HoPerBlock
=
8
;
constexpr
index_t
WoPerBlock
=
32
;
constexpr
index_t
EPerBlock
=
1
;
constexpr
index_t
KPerThread
=
16
;
constexpr
index_t
HoPerThread
=
2
;
constexpr
index_t
WoPerThread
=
2
;
constexpr
index_t
EPerThread
=
EPerBlock
;
using
ABlockTransferThreadSliceLengths_E_K
=
Sequence
<
9
,
1
>
;
using
ABlockTransferThreadClusterLengths_E_K
=
Sequence
<
EPerBlock
,
16
>
;
constexpr
index_t
ABlockTransferSrcScalarPerVector_E
=
1
;
constexpr
index_t
ABlockTransferDstScalarPerVector_K
=
1
;
constexpr
index_t
BThreadTransferSrcScalarPerVector_W
=
1
;
constexpr
index_t
CThreadTransferDstScalarPerVector_W
=
K1
;
static_assert
(
KPerThread
%
CThreadTransferDstScalarPerVector_W
==
0
,
""
);
#endif
constexpr
auto
conv_driver
=
#if 0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment