Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4137c5df
Unverified
Commit
4137c5df
authored
Feb 13, 2026
by
haosdent
Committed by
GitHub
Feb 13, 2026
Browse files
[Bug Fix] Fix MambaManager.cache_blocks() crash on null blocks in align mode (#34418)
Signed-off-by:
haosdent
<
haosdent@gmail.com
>
parent
7a8a46dd
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
48 additions
and
0 deletions
+48
-0
tests/v1/core/test_prefix_caching.py
tests/v1/core/test_prefix_caching.py
+46
-0
vllm/v1/core/single_type_kv_cache_manager.py
vllm/v1/core/single_type_kv_cache_manager.py
+2
-0
No files found.
tests/v1/core/test_prefix_caching.py
View file @
4137c5df
...
...
@@ -744,6 +744,12 @@ def _make_hybrid_kv_cache_config(
shapes
=
(
1
,
1
),
dtypes
=
(
torch
.
float32
,),
),
"mamba_align"
:
lambda
:
MambaSpec
(
block_size
=
block_size
,
shapes
=
(
1
,
1
),
dtypes
=
(
torch
.
float32
,),
mamba_cache_mode
=
"align"
,
),
}
kv_cache_groups
=
[
...
...
@@ -962,6 +968,46 @@ def test_prefill_hybrid_model_combinations_eagle(
manager
.
free
(
req1
)
def
test_prefill_hybrid_model_mamba_align
():
"""Test that MambaManager.cache_blocks() handles null blocks in align mode.
Regression test for https://github.com/vllm-project/vllm/issues/34361.
In mamba_cache_mode="align", allocate_new_blocks() pads req_to_blocks with
null blocks. cache_full_blocks() correctly skips them, but
MambaManager.cache_blocks() must also skip null blocks when tracking
cached_blocks_this_step.
"""
block_size
=
16
num_blocks
=
30
kv_cache_config
=
_make_hybrid_kv_cache_config
(
block_size
,
num_blocks
,
[
"full"
,
"mamba_align"
]
)
manager
=
KVCacheManager
(
kv_cache_config
,
max_model_len
=
8192
,
enable_caching
=
True
,
hash_block_size
=
block_size
,
)
hash_fn
=
sha256
# 3 full blocks (48 tokens) + 7 partial tokens = 55 tokens total
all_token_ids
=
[
i
for
i
in
range
(
3
)
for
_
in
range
(
block_size
)]
+
[
3
]
*
7
# First request: allocate_slots should not crash with the assertion error
# in MambaManager.cache_blocks() when null blocks are present.
req0
=
make_request
(
"0"
,
all_token_ids
,
block_size
,
hash_fn
)
computed_blocks
,
num_computed_tokens
=
manager
.
get_computed_blocks
(
req0
)
assert
num_computed_tokens
==
0
blocks
=
manager
.
allocate_slots
(
req0
,
55
,
num_computed_tokens
,
computed_blocks
)
assert
blocks
is
not
None
assert
len
(
blocks
.
get_block_ids
())
==
2
# full_attn + mamba groups
manager
.
free
(
req0
)
def
test_prefill_plp
():
"""Test prefill with APC and some prompt logprobs (plp) requests.
...
...
vllm/v1/core/single_type_kv_cache_manager.py
View file @
4137c5df
...
...
@@ -1000,6 +1000,8 @@ class MambaManager(SingleTypeKVCacheManager):
for
block
in
self
.
req_to_blocks
[
request
.
request_id
][
num_cached_blocks_before
:
num_cached_blocks_after
]:
if
block
.
is_null
:
continue
assert
block
.
block_hash
is
not
None
self
.
cached_blocks_this_step
.
add
(
block
.
block_hash
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment