Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
36a1c7c9
Commit
36a1c7c9
authored
Dec 29, 2024
by
Po Yen Chen
Browse files
Use vector load if paged-vcache is in column major (async pipeline)
parent
65bbe6ea
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
40 additions
and
1 deletion
+40
-1
include/ck_tile/ops/fmha/pipeline/block_fmha_fwd_splitkv_pipeline_qr_ks_vs_async.hpp
...peline/block_fmha_fwd_splitkv_pipeline_qr_ks_vs_async.hpp
+40
-1
No files found.
include/ck_tile/ops/fmha/pipeline/block_fmha_fwd_splitkv_pipeline_qr_ks_vs_async.hpp
View file @
36a1c7c9
...
...
@@ -67,7 +67,9 @@ struct BlockFmhaFwdSplitKVPipelineQRKSVSAsync
if
constexpr
(
std
::
is_same_v
<
VLayout
,
ck_tile
::
tensor_layout
::
gemm
::
RowMajor
>
)
return
Policy
::
template
GetAlignmentV
<
Problem
>();
else
return
kPadSeqLenK
?
1
:
Policy
::
template
GetAlignmentV
<
Problem
>();
return
kIsPagedKV
?
Policy
::
template
GetAlignmentV
<
Problem
>()
:
kPadSeqLenK
?
1
:
Policy
::
template
GetAlignmentV
<
Problem
>();
}();
static
constexpr
index_t
kAlignmentOacc
=
kPadHeadDimV
?
1
:
Policy
::
template
GetAlignmentOacc
<
Problem
>();
...
...
@@ -555,6 +557,24 @@ struct BlockFmhaFwdSplitKVPipelineQRKSVSAsync
}
else
{
// Override data points which are located outside [0, seqlen_k) to 0.0
if
constexpr
(
kIsPagedKV
&&
kPadSeqLenK
)
{
if
(
v_page_block_navigator
.
is_last_block
(
i_page_block_v
))
{
const
auto
v_origin
=
v_page_block_navigator
.
to_global_window_origin
(
i_page_block_v
,
v_dram_window
.
get_window_origin
());
set_tile_if
(
v_buf
,
type_convert
<
VDataType
>
(
0.0
),
[
&
,
physical_seqlen_k_end_
=
physical_seqlen_k_end
](
auto
tile_idx
)
{
const
auto
col
=
v_origin
.
at
(
number
<
1
>
{})
+
tile_idx
.
at
(
number
<
1
>
{});
return
physical_seqlen_k_end_
<=
col
;
});
}
}
auto
v_lds_window_tmp
=
get_slice_tile
(
v_lds_window
,
sequence
<
(
LdsSeq
.
at
(
number
<
k0_loops
>
{}))
*
kN1
,
0
>
{},
...
...
@@ -691,6 +711,25 @@ struct BlockFmhaFwdSplitKVPipelineQRKSVSAsync
}
else
{
// Override data points which are located outside [0, seqlen_k) to 0.0
if
constexpr
(
kIsPagedKV
&&
kPadSeqLenK
)
{
if
(
v_page_block_navigator
.
is_last_block
(
i_page_block_v_
))
{
const
auto
v_origin
=
v_page_block_navigator
.
to_global_window_origin
(
i_page_block_v_
,
v_dram_window_
.
get_window_origin
());
set_tile_if
(
v_buf
,
type_convert
<
VDataType
>
(
0.0
),
[
&
,
physical_seqlen_k_end_
=
physical_seqlen_k_end
](
auto
tile_idx
)
{
const
auto
col
=
v_origin
.
at
(
number
<
1
>
{})
+
tile_idx
.
at
(
number
<
1
>
{});
return
physical_seqlen_k_end_
<=
col
;
});
}
}
auto
v_lds_window_tmp
=
get_slice_tile
(
v_lds_window
,
sequence
<
(
LdsSeq
.
at
(
number
<
k0_loops
+
i_k1
+
1
>
{}))
*
kN1
,
0
>
{},
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment