[CI][SpecDecode] Fix spec decode tests, use flash attention backend for spec...

[CI][SpecDecode] Fix spec decode tests, use flash attention backend for spec decode CI tests. (#8975)

[CI][SpecDecode] Fix spec decode tests, use flash attention backend for spec...
[CI][SpecDecode] Fix spec decode tests, use flash attention backend for spec decode CI tests. (#8975)
bce32448 · Lily Liu · GitHub · 1425a1bc · bce32448 · bce32448
Unverified Commit bce32448 authored Sep 30, 2024 by Lily Liu Committed by GitHub Oct 01, 2024
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 3 deletions

.buildkite/test-pipeline.yaml .buildkite/test-pipeline.yaml +0 -2

tests/spec_decode/test_multi_step_worker.py tests/spec_decode/test_multi_step_worker.py +4 -1

No files found.
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -207,8 +207,6 @@ steps:
  - vllm/spec_decode
  - tests/spec_decode
  commands:
-    # See https://github.com/vllm-project/vllm/issues/5152
-    - export VLLM_ATTENTION_BACKEND=XFORMERS
    - pytest -v -s spec_decode/e2e/test_multistep_correctness.py
    - pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py

--- a/tests/spec_decode/test_multi_step_worker.py
+++ b/tests/spec_decode/test_multi_step_worker.py
@@ -673,7 +673,10 @@ def test_use_draft_model_runner_advance_step():
    worker.model_runner._gpu_advance_step.side_effect = ValueError(
        exception_secret)
-    seq_group_metadata_list, _, _ = create_batch(batch_size, k)
+    seq_group_metadata_list, _, _ = create_batch(batch_size,
+                                                 k,
+                                                 block_size=block_size,
+                                                 num_gpu_blocks=num_gpu_blocks)
    # Fallback (should not call) when num_steps=1.
    execute_model_req = ExecuteModelRequest(