Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
cbf68933
Commit
cbf68933
authored
Sep 14, 2022
by
Anthony Chang
Browse files
set up inter-wave configuration
parent
64af1c84
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
17 additions
and
13 deletions
+17
-13
include/ck/ck.hpp
include/ck/ck.hpp
+1
-1
include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp
...e/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp
+4
-0
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp
...k/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp
+12
-12
No files found.
include/ck/ck.hpp
View file @
cbf68933
...
@@ -126,7 +126,7 @@
...
@@ -126,7 +126,7 @@
#define CK_EXPERIMENTAL_USE_MEMCPY_FOR_BIT_CAST 1
#define CK_EXPERIMENTAL_USE_MEMCPY_FOR_BIT_CAST 1
// experimental feature: optimize for inter-wave scheduling policy
// experimental feature: optimize for inter-wave scheduling policy
#define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING
0
#define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING
1
#define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING_MAC_CLUSTERS 1
#define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING_MAC_CLUSTERS 1
// hack: have underlying assumption that need to be satsified, otherwise it's a bug
// hack: have underlying assumption that need to be satsified, otherwise it's a bug
...
...
include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp
View file @
cbf68933
...
@@ -557,6 +557,10 @@ struct BlockwiseGemmXdlopsInterwave_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1
...
@@ -557,6 +557,10 @@ struct BlockwiseGemmXdlopsInterwave_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1
AThreadCopy
a_thread_copy_
{
CalculateAThreadOriginDataIndex
()};
AThreadCopy
a_thread_copy_
{
CalculateAThreadOriginDataIndex
()};
BThreadCopy
b_thread_copy_
{
CalculateBThreadOriginDataIndex
()};
BThreadCopy
b_thread_copy_
{
CalculateBThreadOriginDataIndex
()};
#else
static_assert
(
false
,
"Configuration error: using BlockwiseGemmXdlopsInterwave without defining "
"macro CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING=1"
);
#endif // #if CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING
#endif // #if CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING
};
};
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp
View file @
cbf68933
...
@@ -426,18 +426,18 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3
...
@@ -426,18 +426,18 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3
// c_mtx[MPerBlock, NPerBlock] is distributed among threads, and saved in
// c_mtx[MPerBlock, NPerBlock] is distributed among threads, and saved in
// register
// register
// sanity check
// sanity check
auto
blockwise_gemm
=
BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_Selector
<
auto
blockwise_gemm
=
BlockSize
,
BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1
<
BlockSize
,
FloatAB
,
FloatA
B
,
FloatA
cc
,
FloatAcc
,
decltype
(
a_block_desc_k0_m_k1
)
,
decltype
(
a
_block_desc_k0_
m
_k1
),
decltype
(
b
_block_desc_k0_
n
_k1
),
decltype
(
b_block_desc_k0_n_k1
)
,
MPerXDL
,
M
PerXDL
,
N
PerXDL
,
NPerXDL
,
MXdlPerWave
,
M
XdlPerWave
,
N
XdlPerWave
,
NXdlPerWave
,
K1
,
K1
>
{}
;
LoopSched
>
()
;
auto
c_thread_buf
=
blockwise_gemm
.
GetCThreadBuffer
();
auto
c_thread_buf
=
blockwise_gemm
.
GetCThreadBuffer
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment