Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
a9f57b62
"instance_gen/AIT_impl/generation/xx.cpp" did not exist on "d821d1e54f6ce8131070a1253dfc4dd6662d3d85"
Commit
a9f57b62
authored
May 25, 2023
by
danyao12
Browse files
modify comment
parent
9438a118
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
4 deletions
+4
-4
include/ck/tensor_operation/gpu/grid/gridwise_batched_multihead_attention_backward_xdl_cshuffle_pt4.hpp
...batched_multihead_attention_backward_xdl_cshuffle_pt4.hpp
+4
-4
No files found.
include/ck/tensor_operation/gpu/grid/gridwise_batched_multihead_attention_backward_xdl_cshuffle_pt4.hpp
View file @
a9f57b62
...
...
@@ -792,13 +792,13 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V1
__host__
__device__
static
constexpr
auto
GetABlockSliceLengths_M0_K0_M1_K1_M2_K2
()
{
// perform manual unmerge:
m
->
m
_repeat,
m
_waves,
m
_per_xdl
// perform manual unmerge:
n
->
n
_repeat,
n
_waves,
n
_per_xdl
constexpr
index_t
k
=
Gemm2Params
::
Sum_K
-
1
;
constexpr
index_t
k2
=
k
%
NPerXdl
;
constexpr
index_t
k1
=
k
/
NPerXdl
%
Gemm0NWaves
;
constexpr
index_t
k0
=
k
/
NPerXdl
/
Gemm0NWaves
%
NXdlPerWave
;
// perform manual unmerge:
n
->
n
_repeat,
n
_waves,
n
_per_xdl
// perform manual unmerge:
m
->
m
_repeat,
m
_waves,
m
_per_xdl
constexpr
index_t
m
=
Gemm2Params
::
Gemm2_M
-
1
;
constexpr
index_t
m2
=
m
%
MPerXdl
;
constexpr
index_t
m1
=
m
/
MPerXdl
%
Gemm0MWaves
;
...
...
@@ -1284,7 +1284,7 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V1
__builtin_amdgcn_readfirstlane
(
block_work_idx_n
*
NPerBlock
);
// 6 GEMM operations are categorized into 3 buckets. SizeK == SizeO == head_dim
// S_MNK / dP_MNO Gemm (Gemm0 rc
r
)
// S_MNK / dP_MNO Gemm (Gemm0 rc
c
)
// dV_NOM / dK_NKM Gemm (Gemm1 rrr)
// Y_MON / dQ_MKN Gemm (Gemm2 crr)
...
...
@@ -1347,7 +1347,7 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V1
tensor_operation
::
element_wise
::
PassThrough
{});
//
// set up S / dP Gemm (type 1 rc
r
)
// set up S / dP Gemm (type 1 rc
c
)
//
// S: blockwise gemm
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment