Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
composable_kernel-1
Commits
d1842890
Commit
d1842890
authored
Aug 09, 2021
by
Chao Liu
Browse files
tidy
parent
f885c131
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
21 additions
and
44 deletions
+21
-44
CMakeLists.txt
CMakeLists.txt
+4
-0
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops_v2r3.hpp
...de/tensor_operation/gridwise_dynamic_gemm_xdlops_v2r3.hpp
+0
-6
host/driver_offline/include/device_dynamic_convolution_backward_data_implicit_gemm_v4r1_xdlops_nhwc_kyxc_nhwk.hpp
...ackward_data_implicit_gemm_v4r1_xdlops_nhwc_kyxc_nhwk.hpp
+1
-9
host/driver_offline/include/device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_kyxc_nhwk.hpp
...kward_data_implicit_gemm_v4r1r2_xdlops_nhwc_kyxc_nhwk.hpp
+1
-6
host/driver_offline/src/conv_bwd_driver_offline.cpp
host/driver_offline/src/conv_bwd_driver_offline.cpp
+0
-2
host/driver_offline/src/conv_fwd_driver_offline.cpp
host/driver_offline/src/conv_fwd_driver_offline.cpp
+4
-4
host/host_tensor/CMakeLists.txt
host/host_tensor/CMakeLists.txt
+2
-0
host/host_tensor/include/host_conv_bwd_data.hpp
host/host_tensor/include/host_conv_bwd_data.hpp
+9
-17
No files found.
CMakeLists.txt
View file @
d1842890
...
...
@@ -38,6 +38,10 @@ link_libraries(${OpenMP_pthread_LIBRARY})
find_package
(
HIP REQUIRED
)
message
(
STATUS
"Build with HIP
${
hip_VERSION
}
"
)
## half
#find_path(HALF_INCLUDE_DIR half.hpp)
#message("HALF_INCLUDE_DIR: ${HALF_INCLUDE_DIR}")
## tidy
include
(
EnableCompilerWarnings
)
set
(
MIOPEN_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name
)
...
...
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops_v2r3.hpp
View file @
d1842890
...
...
@@ -203,9 +203,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3
__host__
__device__
static
constexpr
auto
MakeCM0M1M2NGridDescriptor
(
const
CMNGridDesc
&
c_m_n_grid_desc
)
{
const
auto
M
=
c_m_n_grid_desc
.
GetLength
(
I0
);
const
auto
N
=
c_m_n_grid_desc
.
GetLength
(
I1
);
constexpr
auto
xdlops_gemm
=
XdlopsGemm
<
FloatAB
,
MPerWave
,
NPerWave
,
K1
>
{};
constexpr
auto
CLayout
=
xdlops_gemm
.
GetCLayout
();
...
...
@@ -217,7 +214,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3
constexpr
index_t
MWaves
=
MPerBlock
/
(
MPerWave
*
MRepeat
);
constexpr
index_t
NWaves
=
NPerBlock
/
(
NPerWave
*
NRepeat
);
constexpr
auto
N0
=
Number
<
CLayout
.
N1
()
>
{};
constexpr
auto
N1
=
Number
<
CLayout
.
N0
()
>
{};
const
auto
c_m0_m1_m2_n_grid_desc
=
transform_dynamic_tensor_descriptor
(
...
...
@@ -277,8 +273,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3
p_c_grid
,
c_m0_m1_m2_n_grid_desc
.
GetElementSpaceSize
());
const
auto
K0
=
a_k0_m_k1_grid_desc
.
GetLength
(
I0
);
const
auto
M
=
a_k0_m_k1_grid_desc
.
GetLength
(
I1
);
const
auto
N
=
b_k0_n_k1_grid_desc
.
GetLength
(
I1
);
// divide block work by [M, N]
const
auto
block_work_idx
=
...
...
host/driver_offline/include/device_dynamic_convolution_backward_data_implicit_gemm_v4r1_xdlops_nhwc_kyxc_nhwk.hpp
View file @
d1842890
...
...
@@ -35,11 +35,6 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1_xdlops_nhwc_kyx
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I2
=
Number
<
2
>
{};
constexpr
auto
I3
=
Number
<
3
>
{};
constexpr
auto
I4
=
Number
<
4
>
{};
constexpr
auto
I5
=
Number
<
5
>
{};
constexpr
auto
I6
=
Number
<
6
>
{};
constexpr
auto
I7
=
Number
<
7
>
{};
constexpr
auto
I8
=
Number
<
8
>
{};
DeviceMem
in_n_hi_wi_c_device_buf
(
sizeof
(
TInWei
)
*
in_n_hi_wi_c
.
mDesc
.
GetElementSpace
());
DeviceMem
wei_k_y_x_c_device_buf
(
sizeof
(
TInWei
)
*
wei_k_y_x_c
.
mDesc
.
GetElementSpace
());
...
...
@@ -319,16 +314,13 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1_xdlops_nhwc_kyx
const
auto
K
=
out_n_ho_wo_k_lengths
[
I3
];
const
auto
C
=
wei_k_y_x_c_lengths
[
I3
];
const
auto
Hi
=
in_n_hi_wi_c_lengths
[
I1
];
const
auto
Wi
=
in_n_hi_wi_c_lengths
[
I2
];
const
auto
Ho
=
out_n_ho_wo_k_lengths
[
I1
];
const
auto
Wo
=
out_n_ho_wo_k_lengths
[
I2
];
const
auto
Y
=
wei_k_y_x_c_lengths
[
I1
];
const
auto
X
=
wei_k_y_x_c_lengths
[
I2
];
float
perf
=
(
float
)
(
std
::
size_t
(
2
)
*
N
*
K
*
Ho
*
Wo
*
C
*
Y
*
X
)
/
float
perf
=
static_cast
<
float
>
(
(
std
::
size_t
(
2
)
*
N
*
K
*
Ho
*
Wo
*
C
*
Y
*
X
)
)
/
(
std
::
size_t
(
1000
)
*
1000
*
1000
)
/
ave_time
;
std
::
cout
<<
"Average time : "
<<
ave_time
<<
" ms, "
<<
perf
<<
" TFlop/s"
...
...
host/driver_offline/include/device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_kyxc_nhwk.hpp
View file @
d1842890
...
...
@@ -35,11 +35,6 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_k
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I2
=
Number
<
2
>
{};
constexpr
auto
I3
=
Number
<
3
>
{};
constexpr
auto
I4
=
Number
<
4
>
{};
constexpr
auto
I5
=
Number
<
5
>
{};
constexpr
auto
I6
=
Number
<
6
>
{};
constexpr
auto
I7
=
Number
<
7
>
{};
constexpr
auto
I8
=
Number
<
8
>
{};
DeviceMem
in_n_hi_wi_c_device_buf
(
sizeof
(
TInWei
)
*
in_n_hi_wi_c
.
mDesc
.
GetElementSpace
());
DeviceMem
wei_k_y_x_c_device_buf
(
sizeof
(
TInWei
)
*
wei_k_y_x_c
.
mDesc
.
GetElementSpace
());
...
...
@@ -304,7 +299,7 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_k
const
auto
Y
=
wei_k_y_x_c_lengths
[
I1
];
const
auto
X
=
wei_k_y_x_c_lengths
[
I2
];
float
perf
=
(
float
)
(
std
::
size_t
(
2
)
*
N
*
K
*
Ho
*
Wo
*
C
*
Y
*
X
)
/
float
perf
=
static_cast
<
float
>
(
(
std
::
size_t
(
2
)
*
N
*
K
*
Ho
*
Wo
*
C
*
Y
*
X
)
)
/
(
std
::
size_t
(
1000
)
*
1000
*
1000
)
/
ave_time
;
std
::
cout
<<
"Average time : "
<<
ave_time
<<
" ms, "
<<
perf
<<
" TFlop/s"
...
...
host/driver_offline/src/conv_bwd_driver_offline.cpp
View file @
d1842890
...
...
@@ -277,8 +277,6 @@ int main(int argc, char* argv[])
in_right_pads_dev
);
};
const
auto
nhwc_desc
=
f_make_for_device_nhwc
();
#if USE_CONV_BWD_V4R1_XDL_NHWC
if
(
algo
==
ConvBackwardDataAlgo
::
V4R1XDLNHWC
)
{
...
...
host/driver_offline/src/conv_fwd_driver_offline.cpp
View file @
d1842890
...
...
@@ -20,12 +20,12 @@
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk.hpp"
#define USE_DYNAMIC_MODE 1
#define USE_CONV_FWD_V4R4_NCHW
1
#define USE_CONV_FWD_V4R4R2_NHWC
1
#define USE_CONV_FWD_V6R1_NCHW
1
#define USE_CONV_FWD_V4R4_NCHW
0
#define USE_CONV_FWD_V4R4R2_NHWC
0
#define USE_CONV_FWD_V6R1_NCHW
0
#define USE_CONV_FWD_V5R1_NCHW 0
#define USE_CONV_FWD_V4R4R2_XDL_NCHW 0
#define USE_CONV_FWD_V4R4R4_XDL_NHWC
0
#define USE_CONV_FWD_V4R4R4_XDL_NHWC
1
enum
ConvForwardAlgo
{
...
...
host/host_tensor/CMakeLists.txt
View file @
d1842890
...
...
@@ -10,6 +10,8 @@ set(HOST_TENSOR_SOURCE
## the library target
add_library
(
host_tensor SHARED
${
HOST_TENSOR_SOURCE
}
)
#target_include_directories(host_tensor SYSTEM PUBLIC $<BUILD_INTERFACE:${HALF_INCLUDE_DIR}>)
target_link_libraries
(
host_tensor PRIVATE hip::device
)
target_link_libraries
(
host_tensor INTERFACE hip::host
)
...
...
host/host_tensor/include/host_conv_bwd_data.hpp
View file @
d1842890
...
...
@@ -14,7 +14,7 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in,
const
ConvStrides
&
conv_strides
,
const
ConvDilations
&
conv_dilations
,
const
InLeftPads
&
in_left_pads
,
const
InRightPads
&
in_right_pads
,
const
InRightPads
&
/*
in_right_pads
*/
,
const
ConvTensorLayout
layout
=
ConvTensorLayout
::
NCHW
)
{
using
namespace
ck
;
...
...
@@ -25,11 +25,6 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in,
constexpr
auto
I3
=
Number
<
3
>
{};
auto
f_nchw
=
[
&
](
auto
n
,
auto
c
,
auto
hi
,
auto
wi
)
{
std
::
size_t
N
=
in
.
mDesc
.
GetLengths
()[
I0
];
std
::
size_t
C
=
in
.
mDesc
.
GetLengths
()[
I1
];
std
::
size_t
Hi
=
in
.
mDesc
.
GetLengths
()[
I2
];
std
::
size_t
Wi
=
in
.
mDesc
.
GetLengths
()[
I3
];
std
::
size_t
K
=
wei
.
mDesc
.
GetLengths
()[
I0
];
std
::
size_t
Y
=
wei
.
mDesc
.
GetLengths
()[
I2
];
std
::
size_t
X
=
wei
.
mDesc
.
GetLengths
()[
I3
];
...
...
@@ -74,11 +69,6 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in,
};
auto
f_nhwc
=
[
&
](
auto
n
,
auto
hi
,
auto
wi
,
auto
c
)
{
std
::
size_t
N
=
in
.
mDesc
.
GetLengths
()[
I0
];
std
::
size_t
Hi
=
in
.
mDesc
.
GetLengths
()[
I1
];
std
::
size_t
Wi
=
in
.
mDesc
.
GetLengths
()[
I2
];
std
::
size_t
C
=
in
.
mDesc
.
GetLengths
()[
I3
];
std
::
size_t
K
=
wei
.
mDesc
.
GetLengths
()[
I0
];
std
::
size_t
Y
=
wei
.
mDesc
.
GetLengths
()[
I1
];
std
::
size_t
X
=
wei
.
mDesc
.
GetLengths
()[
I2
];
...
...
@@ -122,22 +112,24 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in,
in
(
n
,
hi
,
wi
,
c
)
=
v
;
};
switch
(
layout
)
if
(
layout
==
ConvTensorLayout
::
NCHW
)
{
case
ConvTensorLayout
::
NCHW
:
make_ParallelTensorFunctor
(
f_nchw
,
in
.
mDesc
.
GetLengths
()[
0
],
in
.
mDesc
.
GetLengths
()[
1
],
in
.
mDesc
.
GetLengths
()[
2
],
in
.
mDesc
.
GetLengths
()[
3
])(
std
::
thread
::
hardware_concurrency
());
break
;
case
ConvTensorLayout
::
NHWC
:
}
else
if
(
layout
==
ConvTensorLayout
::
NHWC
)
{
make_ParallelTensorFunctor
(
f_nhwc
,
in
.
mDesc
.
GetLengths
()[
0
],
in
.
mDesc
.
GetLengths
()[
1
],
in
.
mDesc
.
GetLengths
()[
2
],
in
.
mDesc
.
GetLengths
()[
3
])(
std
::
thread
::
hardware_concurrency
());
break
;
default:
throw
std
::
runtime_error
(
"wrong! not supported layout"
);
}
else
{
throw
std
::
runtime_error
(
"wrong! not supported layout"
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment