Unverified Commit 254f6b98 authored by Richard Zou's avatar Richard Zou Committed by GitHub
Browse files

[Bugfix] Fix eagle dp tests on A100 (#31241)


Signed-off-by: default avatarRichard Zou <zou3519@gmail.com>
parent bc5ef333
...@@ -16,7 +16,12 @@ DP_SIZE = int(os.getenv("DP_SIZE", 2)) ...@@ -16,7 +16,12 @@ DP_SIZE = int(os.getenv("DP_SIZE", 2))
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_run_eagle_dp(): async def test_run_eagle_dp(monkeypatch: pytest.MonkeyPatch):
# This test checks that running a model with and without eagle
# leads to identical tokens. This is only true in batch invariant mode
# (because the target model verifies all draft tokens in one big forward pass)
monkeypatch.setenv("VLLM_BATCH_INVARIANT", "1")
target_model = "meta-llama/Llama-3.1-8B-Instruct" target_model = "meta-llama/Llama-3.1-8B-Instruct"
draft_model = "yuhuili/EAGLE-LLaMA3.1-Instruct-8B" draft_model = "yuhuili/EAGLE-LLaMA3.1-Instruct-8B"
...@@ -29,6 +34,7 @@ async def test_run_eagle_dp(): ...@@ -29,6 +34,7 @@ async def test_run_eagle_dp():
data_parallel_backend="mp", # ray takes more time data_parallel_backend="mp", # ray takes more time
trust_remote_code=True, trust_remote_code=True,
max_model_len=16384, max_model_len=16384,
attention_config={"backend": "FLASH_ATTN"},
) )
eagle_engine_args = replace( eagle_engine_args = replace(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment