[Bugfix] Fix eagle dp tests on A100 (#31241)

Signed-off-by: Richard Zou <zou3519@gmail.com>

[Bugfix] Fix eagle dp tests on A100 (#31241)
Signed-off-by: Richard Zou <zou3519@gmail.com>
254f6b98 · Richard Zou · GitHub · bc5ef333 · 254f6b98
Unverified Commit 254f6b98 authored Dec 24, 2025 by Richard Zou Committed by GitHub Dec 25, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 1 deletion

tests/v1/distributed/test_eagle_dp.py tests/v1/distributed/test_eagle_dp.py +7 -1

No files found.
--- a/tests/v1/distributed/test_eagle_dp.py
+++ b/tests/v1/distributed/test_eagle_dp.py
@@ -16,7 +16,12 @@ DP_SIZE = int(os.getenv("DP_SIZE", 2))
 @pytest.mark.asyncio
-async def test_run_eagle_dp():
+async def test_run_eagle_dp(monkeypatch: pytest.MonkeyPatch):
+    # This test checks that running a model with and without eagle
+    # leads to identical tokens. This is only true in batch invariant mode
+    # (because the target model verifies all draft tokens in one big forward pass)
+    monkeypatch.setenv("VLLM_BATCH_INVARIANT", "1")
    target_model = "meta-llama/Llama-3.1-8B-Instruct"
    draft_model = "yuhuili/EAGLE-LLaMA3.1-Instruct-8B"
@@ -29,6 +34,7 @@ async def test_run_eagle_dp():
        data_parallel_backend="mp",  # ray takes more time
        trust_remote_code=True,
        max_model_len=16384,
+        attention_config={"backend": "FLASH_ATTN"},
    )
    eagle_engine_args = replace(