Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
d362410d
Commit
d362410d
authored
Feb 09, 2025
by
Qianfeng Zhang
Browse files
Use NumPrefetchV to separate from NumVLdsBuffers
parent
2e612c02
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
10 deletions
+20
-10
include/ck_tile/ops/fmha/pipeline/block_fmha_pipeline_qr_ks_vs_async.hpp
.../ops/fmha/pipeline/block_fmha_pipeline_qr_ks_vs_async.hpp
+11
-10
include/ck_tile/ops/fmha/pipeline/block_fmha_pipeline_qr_ks_vs_async_default_policy.hpp
...ine/block_fmha_pipeline_qr_ks_vs_async_default_policy.hpp
+9
-0
No files found.
include/ck_tile/ops/fmha/pipeline/block_fmha_pipeline_qr_ks_vs_async.hpp
View file @
d362410d
...
...
@@ -173,6 +173,7 @@ struct BlockFmhaPipelineQRKSVSAsync
constexpr
auto
NumKLdsBuffers
=
Policy
::
template
GetNumKLdsBuffers
<
Problem
>();
constexpr
auto
NumVLdsBuffers
=
Policy
::
template
GetNumVLdsBuffers
<
Problem
>();
constexpr
auto
NumPrefetchV
=
Policy
::
template
GetNumPrefetchV
<
Problem
>();
static_assert
(
NumKLdsBuffers
>=
2
);
...
...
@@ -250,7 +251,7 @@ struct BlockFmhaPipelineQRKSVSAsync
using
v_tile_type
=
decltype
(
load_tile
(
v_dram_window
));
statically_indexed_array
<
v_tile_type
,
Num
VLdsBuffers
>
v_tiles
;
statically_indexed_array
<
v_tile_type
,
Num
PrefetchV
>
v_tiles
;
using
v_lds_window_type
=
decltype
(
get_slice_tile
(
v_lds_window
,
sequence
<
0
,
0
>
{},
sequence
<
kN1
,
kK1
>
{}));
...
...
@@ -468,7 +469,7 @@ struct BlockFmhaPipelineQRKSVSAsync
const
auto
bias_tile
=
load_tile
(
bias_dram_window
);
// load bias tile
static_for
<
0
,
Num
VLdsBuffers
,
1
>
{}([
&
](
auto
i_buf
)
{
static_for
<
0
,
Num
PrefetchV
,
1
>
{}([
&
](
auto
i_buf
)
{
v_tiles
[
i_buf
]
=
load_tile
(
v_dram_window
);
move_tile_window
(
v_dram_window
,
{
0
,
kK1
});
});
...
...
@@ -704,8 +705,8 @@ struct BlockFmhaPipelineQRKSVSAsync
else
{
static_for
<
0
,
k1_loops
-
1
,
1
>
{}([
&
](
auto
i_k1
)
{
if
constexpr
(
i_k1
<
k1_loops
-
Num
VLdsBuffers
)
v_tiles
[
number
<
i_k1
%
Num
VLdsBuffers
>
{}]
=
load_tile
(
v_dram_window
);
if
constexpr
(
i_k1
<
k1_loops
-
Num
PrefetchV
)
v_tiles
[
number
<
i_k1
%
Num
PrefetchV
>
{}]
=
load_tile
(
v_dram_window
);
block_sync_lds
();
gemm_1
(
o_acc
,
...
...
@@ -719,19 +720,19 @@ struct BlockFmhaPipelineQRKSVSAsync
auto
v_shuffle_tmp
=
make_static_distributed_tensor
<
VDataType
>
(
Policy
::
template
MakeShuffledVRegBlockDescriptor
<
Problem
>());
shuffle_tile
(
v_shuffle_tmp
,
v_tiles
[
number
<
(
i_k1
+
1
)
%
Num
VLdsBuffers
>
{}]);
v_tiles
[
number
<
(
i_k1
+
1
)
%
Num
PrefetchV
>
{}]);
store_tile
(
v_lds_windows
[
number
<
(
i_k1
+
1
)
%
NumVLdsBuffers
>
{}],
tile_elementwise_in
(
v_element_func
,
v_shuffle_tmp
));
}
else
{
store_tile
(
v_lds_windows
[
number
<
(
i_k1
+
1
)
%
NumVLdsBuffers
>
{}],
tile_elementwise_in
(
v_element_func
,
v_tiles
[
number
<
(
i_k1
+
1
)
%
Num
VLdsBuffers
>
{}]));
store_tile
(
v_lds_windows
[
number
<
(
i_k1
+
1
)
%
NumVLdsBuffers
>
{}],
tile_elementwise_in
(
v_element_func
,
v_tiles
[
number
<
(
i_k1
+
1
)
%
Num
PrefetchV
>
{}]));
}
if
constexpr
(
i_k1
<
k1_loops
-
Num
VLdsBuffers
)
if
constexpr
(
i_k1
<
k1_loops
-
Num
PrefetchV
)
move_tile_window
(
v_dram_window
,
{
0
,
kK1
});
});
}
...
...
include/ck_tile/ops/fmha/pipeline/block_fmha_pipeline_qr_ks_vs_async_default_policy.hpp
View file @
d362410d
...
...
@@ -97,6 +97,15 @@ struct BlockFmhaPipelineQRKSVSAsyncDefaultPolicy
return
false
;
};
template
<
typename
Problem
>
CK_TILE_HOST_DEVICE
static
constexpr
ck_tile
::
index_t
GetNumPrefetchV
()
{
if
constexpr
(
IsPreloadWholeNextIterationK
<
Problem
>
())
return
GetNumVLdsBuffers
<
Problem
>
();
else
return
min
(
2
,
GetNumVLdsBuffers
<
Problem
>
());
};
template
<
typename
Problem
>
CK_TILE_HOST_DEVICE
static
constexpr
ck_tile
::
index_t
GetExclusiveKLdsBytes
()
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment