Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
256749c9
Commit
256749c9
authored
Mar 21, 2026
by
liuchy5
Browse files
feat:flash_mla,q去掉pad
parent
adbd3d7b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
10 deletions
+10
-10
vllm/v1/attention/backends/mla/flashmla_sparse.py
vllm/v1/attention/backends/mla/flashmla_sparse.py
+10
-10
No files found.
vllm/v1/attention/backends/mla/flashmla_sparse.py
View file @
256749c9
...
...
@@ -924,14 +924,14 @@ class FlashMLASparseImpl(MLACommonBaseImpl[FlashMLASparseMetadata]):
padded_num_heads
=
self
.
fp8_decode_padded_heads
# Pad query if needed (kernel only supports h_q = 64 or 128)
if
actual_num_heads
<
padded_num_heads
:
logger
.
warning_once
(
f
"Padding num_heads from
{
actual_num_heads
}
to "
f
"
{
padded_num_heads
}
for FP8 sparse decode kernel"
)
q_padded
=
q
.
new_zeros
((
q
.
size
(
0
),
q
.
size
(
1
),
padded_num_heads
,
q
.
size
(
3
)))
q_padded
[:,
:,
:
actual_num_heads
,
:]
=
q
q
=
q_padded
#
if actual_num_heads < padded_num_heads:
#
logger.warning_once(
#
f"Padding num_heads from {actual_num_heads} to "
#
f"{padded_num_heads} for FP8 sparse decode kernel"
#
)
#
q_padded = q.new_zeros((q.size(0), q.size(1), padded_num_heads, q.size(3)))
#
q_padded[:, :, :actual_num_heads, :] = q
#
q = q_padded
out
,
lse
=
flash_mla_with_kvcache
(
q
=
q
,
...
...
@@ -946,8 +946,8 @@ class FlashMLASparseImpl(MLACommonBaseImpl[FlashMLASparseMetadata]):
)
# Slice output back to actual head count if we padded
if
actual_num_heads
<
padded_num_heads
:
out
=
out
[:,
:,
:
actual_num_heads
,
:]
#
if actual_num_heads < padded_num_heads:
#
out = out[:, :, :actual_num_heads, :]
return
out
,
lse
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment