Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8fb2c135
Unverified
Commit
8fb2c135
authored
Jan 12, 2026
by
Asaf Joseph Gardin
Committed by
GitHub
Jan 12, 2026
Browse files
[Bugfix] Fix stale SSM state for new Mamba requests scheduled as decode (#32118)
Signed-off-by:
Josephasafg
<
ajgard7@gmail.com
>
parent
8863c2b2
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
24 additions
and
3 deletions
+24
-3
tests/v1/attention/test_batch_reordering.py
tests/v1/attention/test_batch_reordering.py
+21
-0
vllm/v1/attention/backends/utils.py
vllm/v1/attention/backends/utils.py
+3
-3
No files found.
tests/v1/attention/test_batch_reordering.py
View file @
8fb2c135
...
...
@@ -98,6 +98,27 @@ REORDER_TEST_CASES = {
expected_order
=
[
0
,
1
,
6
,
8
,
4
,
3
,
2
,
7
,
5
],
expected_modified
=
True
,
),
"new_request_single_token_prefill"
:
ReorderTestCase
(
requests
=
[
(
100
,
0
),
(
1
,
0
),
# New request with only 1 token (STILL prefill)
(
50
,
100
),
(
1
,
10
),
],
# Only index 3 is a true decode (has num_computed_tokens > 0)
expected_order
=
[
3
,
2
,
0
,
1
],
expected_modified
=
True
,
),
"multiple_new_requests_single_token_prefill"
:
ReorderTestCase
(
requests
=
[
(
1
,
0
),
# New prefill (1 token, no computed)
(
1
,
0
),
# New prefill (1 token, no computed)
(
1
,
50
),
(
200
,
0
),
],
expected_order
=
[
2
,
1
,
0
,
3
],
expected_modified
=
True
,
),
}
...
...
vllm/v1/attention/backends/utils.py
View file @
8fb2c135
...
...
@@ -1040,9 +1040,9 @@ def reorder_batch_to_split_decodes_and_prefills(
num_scheduled_tokens_np
=
np
.
array
(
num_scheduled_tokens
)
num_computed_tokens_np
=
input_batch
.
num_computed_tokens_cpu
[:
num_reqs
]
is_
decode
=
num_schedul
ed_tokens_np
<
=
decode_threshold
is_extend
=
(
~
is_decode
)
&
(
num_
comput
ed_tokens_np
>
0
)
is_
prefill
=
(
~
is_decode
)
&
(
num_computed_tokens_np
==
0
)
is_
prefill
=
num_comput
ed_tokens_np
=
=
0
is_decode
=
(
num_
schedul
ed_tokens_np
<=
decode_threshold
)
&
(
~
is_prefill
)
is_
extend
=
(
num_scheduled_tokens_np
>
decode_threshold
)
&
(
~
is_prefill
)
# Desired order: decode → extend → prefill
req_regions
=
np
.
zeros
(
is_decode
.
shape
,
dtype
=
np
.
int32
)
# 0 = decode by default
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment