Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
3567bf79
"...composable_kernel_rocm.git" did not exist on "73a076eee1cdc035de176f6061f4f1f5bfc1bd02"
Commit
3567bf79
authored
Jun 08, 2021
by
Jing Zhang
Browse files
clean code
parent
e8f5ca1a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
65 additions
and
61 deletions
+65
-61
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops.hpp
...include/tensor_operation/gridwise_dynamic_gemm_xdlops.hpp
+3
-3
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops_v2.hpp
...lude/tensor_operation/gridwise_dynamic_gemm_xdlops_v2.hpp
+3
-3
driver/include/device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw.hpp
...tion_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw.hpp
+59
-55
No files found.
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops.hpp
View file @
3567bf79
...
...
@@ -530,9 +530,9 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v1
decltype
(
c_m0_m1_m2_n_thread_desc
),
decltype
(
c_m0_m1_m2_n_global_desc
),
Sequence
<
M0
,
1
,
M2
,
1
>
,
Sequence
<
0
,
1
,
2
,
3
>
,
//
CThreadTransferSrcDstAccessOrder,
3
,
//
CThreadTransferSrcDstVectorDim,
1
,
//
CThreadTransferDstScalarPerVector,
CThreadTransferSrcDstAccessOrder
,
CThreadTransferSrcDstVectorDim
,
CThreadTransferDstScalarPerVector
,
CGlobalMemoryDataOperation
,
1
,
true
>
{
c_m0_m1_m2_n_global_desc
,
...
...
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops_v2.hpp
View file @
3567bf79
...
...
@@ -445,9 +445,9 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v2
decltype
(
c_m0_m1_m2_n_thread_desc
),
decltype
(
c_m0_m1_m2_n_global_desc
),
Sequence
<
M0
,
1
,
M2
,
1
>
,
Sequence
<
0
,
1
,
2
,
3
>
,
//
CThreadTransferSrcDstAccessOrder,
3
,
//
CThreadTransferSrcDstVectorDim,
1
,
//
CThreadTransferDstScalarPerVector,
CThreadTransferSrcDstAccessOrder
,
CThreadTransferSrcDstVectorDim
,
CThreadTransferDstScalarPerVector
,
CGlobalMemoryDataOperation
,
1
,
true
>
{
c_m0_m1_m2_n_global_desc
,
...
...
driver/include/device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw.hpp
View file @
3567bf79
...
...
@@ -152,61 +152,65 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw
for
(
index_t
i
=
0
;
i
<
5
;
++
i
)
{
float
ave_time
=
launch_kernel_dynamic_gemm_xdlops_v2
<
BlockSize
,
TInWei
,
TAcc
,
TOut
,
InMemoryDataOperation
::
Set
,
decltype
(
descs
[
I0
]),
decltype
(
descs
[
I1
]),
decltype
(
descs
[
I2
]),
decltype
(
descs
[
I3
]),
GemmMPerBlock
,
GemmNPerBlock
,
GemmKPerBlock
,
GemmMPerWave
,
GemmNPerWave
,
GemmKPack
,
MRepeat
,
NRepeat
,
GemmABlockTransferThreadSliceLengths_GemmK0_GemmM_GemmK1
,
GemmABlockTransferThreadClusterLengths_GemmK0_GemmM_GemmK1
,
Sequence
<
1
,
0
,
2
>
,
Sequence
<
1
,
0
,
2
>
,
2
,
GemmABlockTransferSrcScalarPerVector_GemmK
,
GemmABlockTransferDstScalarPerVector_KPack
,
false
,
// don't move back src coordinate after threadwise copy
GemmBBlockTransferThreadSliceLengths_GemmK0_GemmN_GemmK1
,
GemmBBlockTransferThreadClusterLengths_GemmK0_GemmN_GemmK1
,
Sequence
<
0
,
2
,
1
>
,
Sequence
<
1
,
0
,
2
>
,
1
,
GemmBBlockTransferSrcScalarPerVector_GemmN
,
GemmBBlockTransferDstScalarPerVector_KPack
,
false
,
// don't move back src coordinate after threadwise copy, which will be fused with
// MoveSrcSliceWindow() to save addr computation
Sequence
<
2
,
3
,
0
,
1
>
,
3
,
GemmCThreadTransferDstScalarPerVector_GemmN1
,
decltype
(
descs
[
I4
]),
decltype
(
descs
[
I5
]),
decltype
(
descs
[
I6
]),
decltype
(
descs
[
I7
]),
decltype
(
descs
[
I8
])
>
(
static_cast
<
TInWei
*>
(
wei_k_c_y_x_device_buf
.
GetDeviceBuffer
()),
static_cast
<
TInWei
*>
(
in_n_c_hi_wi_device_buf
.
GetDeviceBuffer
()),
static_cast
<
TOut
*>
(
out_n_k_ho_wo_device_buf
.
GetDeviceBuffer
()),
descs
[
I0
],
descs
[
I1
],
descs
[
I2
],
descs
[
I3
],
descs
[
I4
],
descs
[
I5
],
descs
[
I6
],
descs
[
I7
],
descs
[
I8
],
nrepeat
);
#if 0
float ave_time = launch_kernel_dynamic_gemm_xdlops_v1
#else
float
ave_time
=
launch_kernel_dynamic_gemm_xdlops_v2
#endif
<
BlockSize
,
TInWei
,
TAcc
,
TOut
,
InMemoryDataOperation
::
Set
,
decltype
(
descs
[
I0
]),
decltype
(
descs
[
I1
]),
decltype
(
descs
[
I2
]),
decltype
(
descs
[
I3
]),
GemmMPerBlock
,
GemmNPerBlock
,
GemmKPerBlock
,
GemmMPerWave
,
GemmNPerWave
,
GemmKPack
,
MRepeat
,
NRepeat
,
GemmABlockTransferThreadSliceLengths_GemmK0_GemmM_GemmK1
,
GemmABlockTransferThreadClusterLengths_GemmK0_GemmM_GemmK1
,
Sequence
<
1
,
0
,
2
>
,
Sequence
<
1
,
0
,
2
>
,
2
,
GemmABlockTransferSrcScalarPerVector_GemmK
,
GemmABlockTransferDstScalarPerVector_KPack
,
false
,
// don't move back src coordinate after threadwise copy
GemmBBlockTransferThreadSliceLengths_GemmK0_GemmN_GemmK1
,
GemmBBlockTransferThreadClusterLengths_GemmK0_GemmN_GemmK1
,
Sequence
<
0
,
2
,
1
>
,
Sequence
<
1
,
0
,
2
>
,
1
,
GemmBBlockTransferSrcScalarPerVector_GemmN
,
GemmBBlockTransferDstScalarPerVector_KPack
,
false
,
// don't move back src coordinate after threadwise copy, which will be fused
// with MoveSrcSliceWindow() to save addr computation
Sequence
<
2
,
3
,
0
,
1
>
,
3
,
GemmCThreadTransferDstScalarPerVector_GemmN1
,
decltype
(
descs
[
I4
]),
decltype
(
descs
[
I5
]),
decltype
(
descs
[
I6
]),
decltype
(
descs
[
I7
]),
decltype
(
descs
[
I8
])
>
(
static_cast
<
TInWei
*>
(
wei_k_c_y_x_device_buf
.
GetDeviceBuffer
()),
static_cast
<
TInWei
*>
(
in_n_c_hi_wi_device_buf
.
GetDeviceBuffer
()),
static_cast
<
TOut
*>
(
out_n_k_ho_wo_device_buf
.
GetDeviceBuffer
()),
descs
[
I0
],
descs
[
I1
],
descs
[
I2
],
descs
[
I3
],
descs
[
I4
],
descs
[
I5
],
descs
[
I6
],
descs
[
I7
],
descs
[
I8
],
nrepeat
);
float
perf
=
(
float
)
calculate_convolution_flops
(
in_n_c_hi_wi_desc
,
wei_k_c_y_x_desc
,
out_n_k_ho_wo_desc
)
/
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment