Unverified Commit d65668b4 authored by Concurrensee's avatar Concurrensee Committed by GitHub
Browse files

Adding "AMD: Multi-step Tests" to amdproduction. (#19508)


Signed-off-by: default avatarYida Wu <yidawu@alumni.cmu.edu>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
parent aafbbd98
...@@ -675,7 +675,7 @@ steps: ...@@ -675,7 +675,7 @@ steps:
- pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins - pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins
- label: Multi-step Tests (4 GPUs) # 36min - label: Multi-step Tests (4 GPUs) # 36min
mirror_hardwares: [amdexperimental] mirror_hardwares: [amdexperimental, amdproduction]
working_dir: "/vllm-workspace/tests" working_dir: "/vllm-workspace/tests"
num_gpus: 4 num_gpus: 4
source_file_dependencies: source_file_dependencies:
......
...@@ -8,6 +8,7 @@ from typing import Optional ...@@ -8,6 +8,7 @@ from typing import Optional
import pytest import pytest
from vllm.platforms import current_platform
from vllm.utils import STR_BACKEND_ENV_VAR from vllm.utils import STR_BACKEND_ENV_VAR
from ..models.utils import check_logprobs_close, check_outputs_equal from ..models.utils import check_logprobs_close, check_outputs_equal
...@@ -71,6 +72,12 @@ def test_multi_step_llm( ...@@ -71,6 +72,12 @@ def test_multi_step_llm(
num_logprobs: corresponds to the `logprobs` argument to the OpenAI num_logprobs: corresponds to the `logprobs` argument to the OpenAI
completions endpoint; `None` -> 1 logprob returned. completions endpoint; `None` -> 1 logprob returned.
""" """
if current_platform.is_rocm() and \
(attention_backend == "FLASHINFER" or enable_chunked_prefill):
pytest.skip(
"Multi-Step with FLASHINFER or Chunked-Prefill is not supported"
"on ROCm")
with monkeypatch.context() as m: with monkeypatch.context() as m:
m.setenv(STR_BACKEND_ENV_VAR, attention_backend) m.setenv(STR_BACKEND_ENV_VAR, attention_backend)
...@@ -221,6 +228,9 @@ def test_multi_step_llm_w_prompt_logprobs( ...@@ -221,6 +228,9 @@ def test_multi_step_llm_w_prompt_logprobs(
@pytest.mark.parametrize("num_prompts", NUM_PROMPTS) @pytest.mark.parametrize("num_prompts", NUM_PROMPTS)
@pytest.mark.parametrize("num_logprobs", [None, 5]) @pytest.mark.parametrize("num_logprobs", [None, 5])
@pytest.mark.parametrize("attention_backend", ["FLASH_ATTN"]) @pytest.mark.parametrize("attention_backend", ["FLASH_ATTN"])
@pytest.mark.skipif(
current_platform.is_rocm(),
reason="Multi-Step + Chunked-Prefill not supported on ROCm")
def test_multi_step_llm_chunked_prefill_prefix_cache( def test_multi_step_llm_chunked_prefill_prefix_cache(
vllm_runner, vllm_runner,
example_prompts, example_prompts,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment