Unverified Commit 8fb2c135 authored by Asaf Joseph Gardin's avatar Asaf Joseph Gardin Committed by GitHub
Browse files

[Bugfix] Fix stale SSM state for new Mamba requests scheduled as decode (#32118)


Signed-off-by: default avatarJosephasafg <ajgard7@gmail.com>
parent 8863c2b2
...@@ -98,6 +98,27 @@ REORDER_TEST_CASES = { ...@@ -98,6 +98,27 @@ REORDER_TEST_CASES = {
expected_order=[0, 1, 6, 8, 4, 3, 2, 7, 5], expected_order=[0, 1, 6, 8, 4, 3, 2, 7, 5],
expected_modified=True, expected_modified=True,
), ),
"new_request_single_token_prefill": ReorderTestCase(
requests=[
(100, 0),
(1, 0), # New request with only 1 token (STILL prefill)
(50, 100),
(1, 10),
],
# Only index 3 is a true decode (has num_computed_tokens > 0)
expected_order=[3, 2, 0, 1],
expected_modified=True,
),
"multiple_new_requests_single_token_prefill": ReorderTestCase(
requests=[
(1, 0), # New prefill (1 token, no computed)
(1, 0), # New prefill (1 token, no computed)
(1, 50),
(200, 0),
],
expected_order=[2, 1, 0, 3],
expected_modified=True,
),
} }
......
...@@ -1040,9 +1040,9 @@ def reorder_batch_to_split_decodes_and_prefills( ...@@ -1040,9 +1040,9 @@ def reorder_batch_to_split_decodes_and_prefills(
num_scheduled_tokens_np = np.array(num_scheduled_tokens) num_scheduled_tokens_np = np.array(num_scheduled_tokens)
num_computed_tokens_np = input_batch.num_computed_tokens_cpu[:num_reqs] num_computed_tokens_np = input_batch.num_computed_tokens_cpu[:num_reqs]
is_decode = num_scheduled_tokens_np <= decode_threshold is_prefill = num_computed_tokens_np == 0
is_extend = (~is_decode) & (num_computed_tokens_np > 0) is_decode = (num_scheduled_tokens_np <= decode_threshold) & (~is_prefill)
is_prefill = (~is_decode) & (num_computed_tokens_np == 0) is_extend = (num_scheduled_tokens_np > decode_threshold) & (~is_prefill)
# Desired order: decode → extend → prefill # Desired order: decode → extend → prefill
req_regions = np.zeros(is_decode.shape, dtype=np.int32) # 0 = decode by default req_regions = np.zeros(is_decode.shape, dtype=np.int32) # 0 = decode by default
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment