Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
3c71fe87
Commit
3c71fe87
authored
Jun 09, 2021
by
Jing Zhang
Browse files
tweak
parent
3567bf79
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
21 additions
and
17 deletions
+21
-17
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops_v2.hpp
...lude/tensor_operation/gridwise_dynamic_gemm_xdlops_v2.hpp
+7
-3
driver/include/device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw.hpp
...tion_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw.hpp
+14
-14
No files found.
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops_v2.hpp
View file @
3c71fe87
...
@@ -367,8 +367,10 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v2
...
@@ -367,8 +367,10 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v2
}
}
// main body
// main body
for
(
index_t
k_block_data_begin
=
0
;
k_block_data_begin
<
K0
-
KPerBlock
;
// for(index_t k_block_data_begin = 0; k_block_data_begin < K0 - KPerBlock;
k_block_data_begin
+=
KPerBlock
)
// k_block_data_begin += KPerBlock)
int
k_block_data_begin
=
0
;
do
{
{
a_blockwise_copy
.
MoveSrcSliceWindow
(
a_k0_m_k1_global_desc
,
a_blockwise_copy
.
MoveSrcSliceWindow
(
a_k0_m_k1_global_desc
,
a_block_slice_copy_step
,
a_block_slice_copy_step
,
...
@@ -390,7 +392,9 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v2
...
@@ -390,7 +392,9 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v2
a_blockwise_copy
.
RunWrite
(
a_k0_m_k1_block_desc
,
a_block_buf
);
a_blockwise_copy
.
RunWrite
(
a_k0_m_k1_block_desc
,
a_block_buf
);
b_blockwise_copy
.
RunWrite
(
b_k0_n_k1_block_desc
,
b_block_buf
);
b_blockwise_copy
.
RunWrite
(
b_k0_n_k1_block_desc
,
b_block_buf
);
}
k_block_data_begin
+=
KPerBlock
;
}
while
(
k_block_data_begin
<
(
K0
-
KPerBlock
));
// tail
// tail
{
{
...
...
driver/include/device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw.hpp
View file @
3c71fe87
...
@@ -79,7 +79,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw
...
@@ -79,7 +79,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw
const
auto
in_right_pads
=
sequence_to_tuple_of_number
(
InRightPads
{});
const
auto
in_right_pads
=
sequence_to_tuple_of_number
(
InRightPads
{});
#endif
#endif
#if
1
#if
0
constexpr index_t BlockSize = 256;
constexpr index_t BlockSize = 256;
constexpr index_t GemmMPerBlock = 128;
constexpr index_t GemmMPerBlock = 128;
...
@@ -109,28 +109,28 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw
...
@@ -109,28 +109,28 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw
#else
#else
constexpr
index_t
BlockSize
=
256
;
constexpr
index_t
BlockSize
=
256
;
constexpr
index_t
GemmMPerBlock
=
128
;
constexpr
index_t
GemmMPerBlock
=
256
;
constexpr
index_t
GemmNPerBlock
=
256
;
constexpr
index_t
GemmNPerBlock
=
128
;
constexpr
index_t
GemmKPerBlock
=
16
;
constexpr
index_t
GemmKPerBlock
=
4
;
constexpr
index_t
GemmMPerWave
=
64
;
constexpr
index_t
GemmMPerWave
=
64
;
constexpr
index_t
GemmNPerWave
=
64
;
constexpr
index_t
GemmNPerWave
=
64
;
constexpr
index_t
GemmKPack
=
1
;
constexpr
index_t
GemmKPack
=
8
;
constexpr
index_t
MRepeat
=
1
;
constexpr
index_t
MRepeat
=
2
;
constexpr
index_t
NRepeat
=
2
;
constexpr
index_t
NRepeat
=
1
;
using
GemmABlockTransferThreadSliceLengths_GemmK0_GemmM_GemmK1
=
Sequence
<
4
,
2
,
1
>
;
using
GemmABlockTransferThreadSliceLengths_GemmK0_GemmM_GemmK1
=
Sequence
<
1
,
4
,
8
>
;
using
GemmABlockTransferThreadClusterLengths_GemmK0_GemmM_GemmK1
=
Sequence
<
4
,
64
,
1
>
;
using
GemmABlockTransferThreadClusterLengths_GemmK0_GemmM_GemmK1
=
Sequence
<
4
,
64
,
1
>
;
constexpr
index_t
GemmABlockTransferSrcScalarPerVector_GemmK
=
1
;
constexpr
index_t
GemmABlockTransferSrcScalarPerVector_GemmK
=
8
;
constexpr
index_t
GemmABlockTransferDstScalarPerVector_KPack
=
1
;
constexpr
index_t
GemmABlockTransferDstScalarPerVector_KPack
=
8
;
using
GemmBBlockTransferThreadSliceLengths_GemmK0_GemmN_GemmK1
=
Sequence
<
4
,
4
,
1
>
;
using
GemmBBlockTransferThreadSliceLengths_GemmK0_GemmN_GemmK1
=
Sequence
<
1
,
4
,
4
>
;
using
GemmBBlockTransferThreadClusterLengths_GemmK0_GemmN_GemmK1
=
Sequence
<
4
,
64
,
1
>
;
using
GemmBBlockTransferThreadClusterLengths_GemmK0_GemmN_GemmK1
=
Sequence
<
4
,
32
,
2
>
;
constexpr
index_t
GemmBBlockTransferSrcScalarPerVector_GemmN
=
1
;
constexpr
index_t
GemmBBlockTransferSrcScalarPerVector_GemmN
=
4
;
constexpr
index_t
GemmBBlockTransferDstScalarPerVector_KPack
=
1
;
constexpr
index_t
GemmBBlockTransferDstScalarPerVector_KPack
=
4
;
constexpr
index_t
GemmCThreadTransferDstScalarPerVector_GemmN1
=
1
;
constexpr
index_t
GemmCThreadTransferDstScalarPerVector_GemmN1
=
1
;
#endif
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment