Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
3567bf79
Commit
3567bf79
authored
Jun 08, 2021
by
Jing Zhang
Browse files
clean code
parent
e8f5ca1a
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
65 additions
and
61 deletions
+65
-61
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops.hpp
...include/tensor_operation/gridwise_dynamic_gemm_xdlops.hpp
+3
-3
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops_v2.hpp
...lude/tensor_operation/gridwise_dynamic_gemm_xdlops_v2.hpp
+3
-3
driver/include/device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw.hpp
...tion_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw.hpp
+59
-55
No files found.
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops.hpp
View file @
3567bf79
...
@@ -530,9 +530,9 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v1
...
@@ -530,9 +530,9 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v1
decltype
(
c_m0_m1_m2_n_thread_desc
),
decltype
(
c_m0_m1_m2_n_thread_desc
),
decltype
(
c_m0_m1_m2_n_global_desc
),
decltype
(
c_m0_m1_m2_n_global_desc
),
Sequence
<
M0
,
1
,
M2
,
1
>
,
Sequence
<
M0
,
1
,
M2
,
1
>
,
Sequence
<
0
,
1
,
2
,
3
>
,
//
CThreadTransferSrcDstAccessOrder,
CThreadTransferSrcDstAccessOrder
,
3
,
//
CThreadTransferSrcDstVectorDim,
CThreadTransferSrcDstVectorDim
,
1
,
//
CThreadTransferDstScalarPerVector,
CThreadTransferDstScalarPerVector
,
CGlobalMemoryDataOperation
,
CGlobalMemoryDataOperation
,
1
,
1
,
true
>
{
c_m0_m1_m2_n_global_desc
,
true
>
{
c_m0_m1_m2_n_global_desc
,
...
...
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops_v2.hpp
View file @
3567bf79
...
@@ -445,9 +445,9 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v2
...
@@ -445,9 +445,9 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v2
decltype
(
c_m0_m1_m2_n_thread_desc
),
decltype
(
c_m0_m1_m2_n_thread_desc
),
decltype
(
c_m0_m1_m2_n_global_desc
),
decltype
(
c_m0_m1_m2_n_global_desc
),
Sequence
<
M0
,
1
,
M2
,
1
>
,
Sequence
<
M0
,
1
,
M2
,
1
>
,
Sequence
<
0
,
1
,
2
,
3
>
,
//
CThreadTransferSrcDstAccessOrder,
CThreadTransferSrcDstAccessOrder
,
3
,
//
CThreadTransferSrcDstVectorDim,
CThreadTransferSrcDstVectorDim
,
1
,
//
CThreadTransferDstScalarPerVector,
CThreadTransferDstScalarPerVector
,
CGlobalMemoryDataOperation
,
CGlobalMemoryDataOperation
,
1
,
1
,
true
>
{
c_m0_m1_m2_n_global_desc
,
true
>
{
c_m0_m1_m2_n_global_desc
,
...
...
driver/include/device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw.hpp
View file @
3567bf79
...
@@ -152,8 +152,12 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw
...
@@ -152,8 +152,12 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw
for
(
index_t
i
=
0
;
i
<
5
;
++
i
)
for
(
index_t
i
=
0
;
i
<
5
;
++
i
)
{
{
float
ave_time
=
launch_kernel_dynamic_gemm_xdlops_v2
<
#if 0
BlockSize
,
float ave_time = launch_kernel_dynamic_gemm_xdlops_v1
#else
float
ave_time
=
launch_kernel_dynamic_gemm_xdlops_v2
#endif
<
BlockSize
,
TInWei
,
TInWei
,
TAcc
,
TAcc
,
TOut
,
TOut
,
...
@@ -185,8 +189,8 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw
...
@@ -185,8 +189,8 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw
1
,
1
,
GemmBBlockTransferSrcScalarPerVector_GemmN
,
GemmBBlockTransferSrcScalarPerVector_GemmN
,
GemmBBlockTransferDstScalarPerVector_KPack
,
GemmBBlockTransferDstScalarPerVector_KPack
,
false
,
// don't move back src coordinate after threadwise copy, which will be fused
with
false
,
// don't move back src coordinate after threadwise copy, which will be fused
//
MoveSrcSliceWindow() to save addr computation
// with
MoveSrcSliceWindow() to save addr computation
Sequence
<
2
,
3
,
0
,
1
>
,
Sequence
<
2
,
3
,
0
,
1
>
,
3
,
3
,
GemmCThreadTransferDstScalarPerVector_GemmN1
,
GemmCThreadTransferDstScalarPerVector_GemmN1
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment