Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
b6116d2f
Commit
b6116d2f
authored
Dec 08, 2021
by
Jing Zhang
Browse files
clean
parent
abd9c245
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
36 deletions
+9
-36
host/driver_offline/include/device_gemm_xdlops_mk_kn_mn.hpp
host/driver_offline/include/device_gemm_xdlops_mk_kn_mn.hpp
+9
-36
No files found.
host/driver_offline/include/device_gemm_xdlops_mk_kn_mn.hpp
View file @
b6116d2f
...
@@ -331,37 +331,10 @@ void device_gemm_xdlops_mk_kn_mn(const Tensor<ABType>& a_m_k,
...
@@ -331,37 +331,10 @@ void device_gemm_xdlops_mk_kn_mn(const Tensor<ABType>& a_m_k,
constexpr
index_t
CThreadTransferDstScalarPerVector
=
1
;
constexpr
index_t
CThreadTransferDstScalarPerVector
=
1
;
#elif 1
#elif 1
constexpr
index_t
BlockSize
=
64
;
constexpr
index_t
BlockSize
=
256
;
constexpr
index_t
MPerBlock
=
48
;
constexpr
index_t
NPerBlock
=
32
;
constexpr
index_t
KPerBlock
=
4
;
constexpr
index_t
MPerXDL
=
16
;
constexpr
index_t
NPerXDL
=
16
;
constexpr
index_t
K1
=
8
;
constexpr
index_t
MRepeat
=
3
;
constexpr
index_t
NRepeat
=
2
;
using
ABlockTransferThreadSliceLengths_K0_M_K1
=
Sequence
<
4
,
1
,
8
>
;
using
ABlockTransferThreadClusterLengths_K0_M_K1
=
Sequence
<
1
,
48
,
1
>
;
constexpr
index_t
ABlockTransferSrcScalarPerVector_K1
=
8
;
constexpr
index_t
ABlockTransferDstScalarPerVector_K1
=
8
;
using
BBlockTransferThreadSliceLengths_K0_N_K1
=
Sequence
<
4
,
1
,
8
>
;
using
BBlockTransferThreadClusterLengths_K0_N_K1
=
Sequence
<
1
,
32
,
1
>
;
constexpr
index_t
BBlockTransferSrcScalarPerVector_N
=
1
;
constexpr
index_t
BBlockTransferDstScalarPerVector_K1
=
8
;
constexpr
index_t
CThreadTransferDstScalarPerVector
=
1
;
#elif 1
constexpr
index_t
BlockSize
=
64
;
constexpr
index_t
MPerBlock
=
48
;
constexpr
index_t
MPerBlock
=
96
;
constexpr
index_t
NPerBlock
=
1
6
;
constexpr
index_t
NPerBlock
=
1
28
;
constexpr
index_t
KPerBlock
=
4
;
constexpr
index_t
KPerBlock
=
4
;
constexpr
index_t
MPerXDL
=
16
;
constexpr
index_t
MPerXDL
=
16
;
...
@@ -369,18 +342,18 @@ void device_gemm_xdlops_mk_kn_mn(const Tensor<ABType>& a_m_k,
...
@@ -369,18 +342,18 @@ void device_gemm_xdlops_mk_kn_mn(const Tensor<ABType>& a_m_k,
constexpr
index_t
K1
=
8
;
constexpr
index_t
K1
=
8
;
constexpr
index_t
MRepeat
=
3
;
constexpr
index_t
MRepeat
=
3
;
constexpr
index_t
NRepeat
=
1
;
constexpr
index_t
NRepeat
=
4
;
using
ABlockTransferThreadSliceLengths_K0_M_K1
=
Sequence
<
4
,
1
,
8
>
;
using
ABlockTransferThreadSliceLengths_K0_M_K1
=
Sequence
<
1
,
3
,
8
>
;
using
ABlockTransferThreadClusterLengths_K0_M_K1
=
Sequence
<
1
,
48
,
1
>
;
using
ABlockTransferThreadClusterLengths_K0_M_K1
=
Sequence
<
4
,
32
,
1
>
;
constexpr
index_t
ABlockTransferSrcScalarPerVector_K1
=
8
;
constexpr
index_t
ABlockTransferSrcScalarPerVector_K1
=
8
;
constexpr
index_t
ABlockTransferDstScalarPerVector_K1
=
8
;
constexpr
index_t
ABlockTransferDstScalarPerVector_K1
=
8
;
using
BBlockTransferThreadSliceLengths_K0_N_K1
=
Sequence
<
4
,
1
,
8
>
;
using
BBlockTransferThreadSliceLengths_K0_N_K1
=
Sequence
<
1
,
2
,
8
>
;
using
BBlockTransferThreadClusterLengths_K0_N_K1
=
Sequence
<
1
,
1
6
,
1
>
;
using
BBlockTransferThreadClusterLengths_K0_N_K1
=
Sequence
<
4
,
6
4
,
1
>
;
constexpr
index_t
BBlockTransferSrcScalarPerVector_N
=
1
;
constexpr
index_t
BBlockTransferSrcScalarPerVector_N
=
2
;
constexpr
index_t
BBlockTransferDstScalarPerVector_K1
=
8
;
constexpr
index_t
BBlockTransferDstScalarPerVector_K1
=
8
;
constexpr
index_t
CThreadTransferDstScalarPerVector
=
1
;
constexpr
index_t
CThreadTransferDstScalarPerVector
=
1
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment