Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
27bad50b
Commit
27bad50b
authored
Oct 29, 2021
by
Jing Zhang
Browse files
add dynamic mode of maxpool
parent
982c3b60
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
31 deletions
+28
-31
host/driver_offline/include/device_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
...ward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
+15
-12
host/driver_offline/include/driver_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
...ward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
+7
-16
host/driver_offline/src/conv_maxpool_fwd_driver_offline_nchwc.cpp
...ver_offline/src/conv_maxpool_fwd_driver_offline_nchwc.cpp
+6
-3
No files found.
host/driver_offline/include/device_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
View file @
27bad50b
...
...
@@ -73,7 +73,7 @@ void device_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1
bias_k0_k1_device_buf
.
ToDevice
(
bias_k0_k1
.
mData
.
data
());
max_n_k0_hx_wx_k1_device_buf
.
ToDevice
(
max_n_k0_hx_wx_k1
.
mData
.
data
());
constexpr
index_t
InWeiVectorSize
=
C1
;
constexpr
index_t
InWeiVectorSize
=
8
;
if
(
C1
%
InWeiVectorSize
!=
0
)
{
...
...
@@ -108,32 +108,35 @@ void device_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1
#elif
1
constexpr
index_t
BlockSize
=
64
;
constexpr
index_t
KPerBlock
=
K
;
constexpr
index_t
KPerBlock
=
8
;
constexpr
index_t
HoPerBlock
=
8
;
constexpr
index_t
WoPerBlock
=
32
;
constexpr
index_t
E1
=
C0
*
Y
*
X
;
constexpr
index_t
E2
=
C1
/
InWeiVectorSize
;
constexpr
index_t
E1
=
2
*
9
;
constexpr
index_t
E2
=
1
;
constexpr
index_t
K2
=
2
;
constexpr
index_t
E1PerBlock
=
C0
;
constexpr
index_t
E1PerBlock
=
2
;
constexpr
index_t
KPerThread
=
K
;
constexpr
index_t
KPerThread
=
K
PerBlock
;
constexpr
index_t
HoPerThread
=
2
;
constexpr
index_t
WoPerThread
=
2
;
constexpr
index_t
EPerThread
=
1
;
using
ABlockTransferThreadSliceLengths_E0_E1_K0_K1_E2
=
Sequence
<
1
,
Y
*
X
,
1
,
1
,
E2
>
;
using
ABlockTransferThreadSliceLengths_E0_E1_K0_K1_E2
=
Sequence
<
1
,
9
,
1
,
1
,
E2
>
;
using
ABlockTransferThreadClusterLengths_E0_E1_K0_K1_E2
=
Sequence
<
1
,
E1PerBlock
,
1
,
KPerBlock
,
1
>
;
constexpr
index_t
ABlockTransferSrcScalarPerVector_E2
=
E2
;
constexpr
index_t
ABlockTransferDstScalarPerVector_E2
=
E2
;
constexpr
index_t
ABlockTransferSrcScalarPerVector_E2
=
E2
;
constexpr
index_t
ABlockTransferDstScalarPerVector_E2
=
E2
;
constexpr
index_t
BThreadTransferSrcScalarPerVector_E2
=
E2
;
constexpr
index_t
CThreadTransferDstScalarPerVector_K
=
K1
;
constexpr
index_t
CThreadTransferDstScalarPerVector_K
=
InWeiVectorSize
;
#endif
if
(
KPerThread
%
InWeiVectorSize
!=
0
)
{
throw
std
::
runtime_error
(
"wrong! C1 cannot be divided by InWeiVectorSize"
);
}
const
auto
in_n_c0_hi_wi_c1_desc
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
N
,
C0
,
Hi
,
Wi
,
E2
));
const
auto
wei_k_c0_y_x_c1_desc
=
...
...
host/driver_offline/include/driver_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
View file @
27bad50b
...
...
@@ -85,7 +85,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
const
auto
ConvDilationH
=
conv_dilations
[
I0
];
const
auto
ConvDilationW
=
conv_dilations
[
I1
];
#if
1
#if
CK_EXPERIMENTAL_STATIC_TENSOR_DESCRIPTOR
const
auto
Hop
=
Number
<
(
Ho
+
HoPerBlock
-
1
)
/
HoPerBlock
*
HoPerBlock
>
{};
const
auto
Wop
=
Number
<
(
Wo
+
WoPerBlock
-
1
)
/
WoPerBlock
*
WoPerBlock
>
{};
#else
...
...
@@ -190,8 +190,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
make_naive_tensor_descriptor_packed
(
make_tuple
(
N
,
K0
,
Hx
,
Wx
,
K1
)),
make_tuple
(
make_merge_transform
(
make_tuple
(
K0
,
K1
)),
make_pass_through_transform
(
N
),
make_pad_transform
(
Hx
,
I0
,
Number
<
OutRightPadHx
>
{}
),
make_pad_transform
(
Wx
,
I0
,
Number
<
OutRightPadWx
>
{}
)),
make_pad_transform
(
Hx
,
I0
,
OutRightPadHx
),
make_pad_transform
(
Wx
,
I0
,
OutRightPadWx
)),
make_tuple
(
Sequence
<
1
,
4
>
{},
Sequence
<
0
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}));
...
...
@@ -293,10 +293,6 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
// clang-format on
static_assert
(
a_e0_e1_k_e2_grid_desc
.
IsKnownAtCompileTime
(),
""
);
static_assert
(
b_e0_e1_n_ho_wo_e2_grid_desc
.
IsKnownAtCompileTime
(),
""
);
static_assert
(
c_k_n_hop_wop_grid_desc
.
IsKnownAtCompileTime
(),
""
);
// GEMM
using
GridwiseGemm
=
GridwiseGemmDlops_km_kn_mn_v3
<
BlockSize
,
...
...
@@ -371,16 +367,11 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
float
ave_time
=
0
;
static_assert
(
a_e0_e1_k0_k1_e2_grid_desc
.
IsKnownAtCompileTime
(),
""
);
static_assert
(
b_e0_e1_n_h0_h1_h2_w0_w1_w2_e2_grid_desc
.
IsKnownAtCompileTime
(),
""
);
static_assert
(
c_k0_k1_n_h0_h1_h2_w0_w1_w2_grid_desc
.
IsKnownAtCompileTime
(),
""
);
static_assert
(
c_blockid_to_k_n_h_w_block_cluster_adaptor
.
IsKnownAtCompileTime
(),
""
);
#if CK_EXPERIMENTAL_PASS_TENSOR_DESCRIPTOR_BY_VALUE
if
(
has_main_e0_block_loop
)
{
const
auto
kernel
=
kernel_gemm_dlops_v
2
_maxpool
<
const
auto
kernel
=
kernel_gemm_dlops_v
3
_maxpool
<
GridwiseGemm
,
FloatAB
,
FloatC
,
...
...
@@ -409,7 +400,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
}
else
{
const
auto
kernel
=
kernel_gemm_dlops_v
2
_maxpool
<
const
auto
kernel
=
kernel_gemm_dlops_v
3
_maxpool
<
GridwiseGemm
,
FloatAB
,
FloatC
,
...
...
@@ -461,7 +452,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
if
(
has_main_e0_block_loop
)
{
const
auto
kernel
=
kernel_gemm_dlops_v
2
_maxpool
<
const
auto
kernel
=
kernel_gemm_dlops_v
3
_maxpool
<
GridwiseGemm
,
FloatAB
,
FloatC
,
...
...
@@ -497,7 +488,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
else
{
const
auto
kernel
=
kernel_gemm_dlops_v
2
_maxpool
<
const
auto
kernel
=
kernel_gemm_dlops_v
3
_maxpool
<
GridwiseGemm
,
FloatAB
,
FloatC
,
...
...
host/driver_offline/src/conv_maxpool_fwd_driver_offline_nchwc.cpp
View file @
27bad50b
...
...
@@ -15,7 +15,7 @@
#include "device_tensor.hpp"
#include "device_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp"
#define USE_DYNAMIC_MODE
0
#define USE_DYNAMIC_MODE
1
#define USE_CONV_FWD_V5R1_NCHWC 1
enum
ConvForwardAlgo
...
...
@@ -46,7 +46,7 @@ int main(int argc, char* argv[])
exit
(
1
);
}
constexpr
index_t
activ_type
=
0
;
constexpr
ck
::
ActivTypeEnum_t
activ_type
=
ActivTypeEnum_t
::
LeakyRelu
;
const
ConvForwardAlgo
algo
=
static_cast
<
ConvForwardAlgo
>
(
std
::
stoi
(
argv
[
1
]));
const
bool
do_verification
=
std
::
stoi
(
argv
[
2
]);
...
...
@@ -78,6 +78,9 @@ int main(int argc, char* argv[])
const
index_t
Ho
=
(
Hi
+
in_left_pad_h
+
in_right_pad_h
-
YEff
)
/
conv_stride_h
+
1
;
const
index_t
Wo
=
(
Wi
+
in_left_pad_w
+
in_right_pad_w
-
XEff
)
/
conv_stride_w
+
1
;
const
index_t
Ho_2
=
Ho
/
2
;
const
index_t
Wo_2
=
Wo
/
2
;
#else
// static mode
if
(
argc
<
6
)
...
...
@@ -93,7 +96,7 @@ int main(int argc, char* argv[])
const
bool
do_log
=
std
::
stoi
(
argv
[
4
]);
const
int
nrepeat
=
std
::
stoi
(
argv
[
5
]);
constexpr
index_t
activ_type
=
1
;
constexpr
ck
::
ActivTypeEnum_t
activ_type
=
ActivTypeEnum_t
::
LeakyRelu
;
#if 0
constexpr auto N = Number<1>{};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment