Unverified Commit 7d654635 authored by Matthew Bonanni's avatar Matthew Bonanni Committed by GitHub
Browse files

[WIP][CI][Bugfix] Fix `test_run_eagle_dp` (#38584)


Signed-off-by: default avatarMatthew Bonanni <mbonanni@redhat.com>
parent 8278825b
...@@ -69,9 +69,7 @@ async def test_run_eagle_dp(monkeypatch: pytest.MonkeyPatch, attn_backend: str): ...@@ -69,9 +69,7 @@ async def test_run_eagle_dp(monkeypatch: pytest.MonkeyPatch, attn_backend: str):
) )
prompt = "This is a test of data parallel with eagle" prompt = "This is a test of data parallel with eagle"
# This test might be flaky, see num_expected_tokens = 100
# https://github.com/vllm-project/vllm/issues/31913
num_expected_tokens = 20
sampling_params = SamplingParams( sampling_params = SamplingParams(
max_tokens=num_expected_tokens, max_tokens=num_expected_tokens,
ignore_eos=True, ignore_eos=True,
......
...@@ -389,8 +389,11 @@ class FlashAttentionMetadataBuilder(AttentionMetadataBuilder[FlashAttentionMetad ...@@ -389,8 +389,11 @@ class FlashAttentionMetadataBuilder(AttentionMetadataBuilder[FlashAttentionMetad
slot_mapping = common_attn_metadata.slot_mapping slot_mapping = common_attn_metadata.slot_mapping
causal = common_attn_metadata.causal causal = common_attn_metadata.causal
# the overhead of the aot schedule is not worth it for spec-decode # Disable AOT schedule for spec-decode proposer (not worth the overhead)
aot_schedule = self.aot_schedule and not fast_build # and for batch invariance (schedule varies with max_seqlen_q/k).
aot_schedule = (
self.aot_schedule and not fast_build and not envs.VLLM_BATCH_INVARIANT
)
if self.aot_sliding_window is None: if self.aot_sliding_window is None:
self.aot_sliding_window = (-1, -1) self.aot_sliding_window = (-1, -1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment