[WIP][CI][Bugfix] Fix `test_run_eagle_dp` (#38584)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>

[WIP][CI][Bugfix] Fix `test_run_eagle_dp` (#38584)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
7d654635 · Matthew Bonanni · GitHub · 8278825b · 7d654635 · 7d654635
Unverified Commit 7d654635 authored Mar 31, 2026 by Matthew Bonanni Committed by GitHub Mar 31, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 5 deletions

tests/v1/distributed/test_eagle_dp.py tests/v1/distributed/test_eagle_dp.py +1 -3

vllm/v1/attention/backends/flash_attn.py vllm/v1/attention/backends/flash_attn.py +5 -2

No files found.
--- a/tests/v1/distributed/test_eagle_dp.py
+++ b/tests/v1/distributed/test_eagle_dp.py
@@ -69,9 +69,7 @@ async def test_run_eagle_dp(monkeypatch: pytest.MonkeyPatch, attn_backend: str):
    )
    prompt = "This is a test of data parallel with eagle"
-    # This test might be flaky, see
+    num_expected_tokens = 100
-    # https://github.com/vllm-project/vllm/issues/31913
-    num_expected_tokens = 20
    sampling_params = SamplingParams(
        max_tokens=num_expected_tokens,
        ignore_eos=True,

--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@@ -389,8 +389,11 @@ class FlashAttentionMetadataBuilder(AttentionMetadataBuilder[FlashAttentionMetad
        slot_mapping = common_attn_metadata.slot_mapping
        causal = common_attn_metadata.causal
-        # the overhead of the aot schedule is not worth it for spec-decode
+        # Disable AOT schedule for spec-decode proposer (not worth the overhead)
-        aot_schedule = self.aot_schedule and not fast_build
+        # and for batch invariance (schedule varies with max_seqlen_q/k).
+        aot_schedule = (
+            self.aot_schedule and not fast_build and not envs.VLLM_BATCH_INVARIANT
+        )
        if self.aot_sliding_window is None:
            self.aot_sliding_window = (-1, -1)