Unverified Commit 3a6ae1d3 authored by Simon Mo's avatar Simon Mo Committed by GitHub
Browse files

[CI] Disable flash_attn backend for spec decode (#5286)

parent 8f1729b8
......@@ -45,7 +45,7 @@ steps:
- TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py
- TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_chunked_prefill_distributed.py
- TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_chunked_prefill_distributed.py
- pytest -v -s spec_decode/e2e/test_integration_dist.py
- pytest -v -s spec_decode/e2e/test_integration_dist.py
- label: Distributed Tests (Multiple Groups)
#mirror_hardwares: [amd]
......@@ -124,7 +124,10 @@ steps:
- label: Speculative decoding tests
#mirror_hardwares: [amd]
command: pytest -v -s spec_decode
commands:
# See https://github.com/vllm-project/vllm/issues/5152
- export VLLM_ATTENTION_BACKEND=XFORMERS
- pytest -v -s spec_decode
- label: LoRA Test %N
#mirror_hardwares: [amd]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment