Adding "AMD: Multi-step Tests" to amdproduction. (#19508)

Signed-off-by: Yida Wu <yidawu@alumni.cmu.edu> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>

Adding "AMD: Multi-step Tests" to amdproduction. (#19508)
Signed-off-by: Yida Wu <yidawu@alumni.cmu.edu> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
d65668b4 · Concurrensee · GitHub · aafbbd98 · d65668b4 · d65668b4
Unverified Commit d65668b4 authored Jun 13, 2025 by Concurrensee Committed by GitHub Jun 13, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 1 deletion

.buildkite/test-pipeline.yaml .buildkite/test-pipeline.yaml +1 -1

tests/multi_step/test_correctness_llm.py tests/multi_step/test_correctness_llm.py +10 -0

No files found.
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -675,7 +675,7 @@ steps:
  - pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins
 - label: Multi-step Tests (4 GPUs) # 36min
-  mirror_hardwares: [amdexperimental]
+  mirror_hardwares: [amdexperimental, amdproduction]
  working_dir: "/vllm-workspace/tests"
  num_gpus: 4
  source_file_dependencies:

--- a/tests/multi_step/test_correctness_llm.py
+++ b/tests/multi_step/test_correctness_llm.py
@@ -8,6 +8,7 @@ from typing import Optional
 import pytest
+from vllm.platforms import current_platform
 from vllm.utils import STR_BACKEND_ENV_VAR
 from ..models.utils import check_logprobs_close, check_outputs_equal
@@ -71,6 +72,12 @@ def test_multi_step_llm(
      num_logprobs: corresponds to the `logprobs` argument to the OpenAI
                    completions endpoint; `None` -> 1 logprob returned.
    """
+    if current_platform.is_rocm() and \
+        (attention_backend == "FLASHINFER" or enable_chunked_prefill):
+        pytest.skip(
+            "Multi-Step with FLASHINFER or Chunked-Prefill is not supported"
+            "on ROCm")
    with monkeypatch.context() as m:
        m.setenv(STR_BACKEND_ENV_VAR, attention_backend)
@@ -221,6 +228,9 @@ def test_multi_step_llm_w_prompt_logprobs(
 @pytest.mark.parametrize("num_prompts", NUM_PROMPTS)
 @pytest.mark.parametrize("num_logprobs", [None, 5])
 @pytest.mark.parametrize("attention_backend", ["FLASH_ATTN"])
+@pytest.mark.skipif(
+    current_platform.is_rocm(),
+    reason="Multi-Step + Chunked-Prefill not supported on ROCm")
 def test_multi_step_llm_chunked_prefill_prefix_cache(
    vllm_runner,
    example_prompts,