[ROCm] Fix the Kernels, Core, and Prefix Caching AMD CI groups (#13970)

Signed-off-by: Sage Moore <sage@neuralmagic.com>

[ROCm] Fix the Kernels, Core, and Prefix Caching AMD CI groups (#13970)
Signed-off-by: Sage Moore <sage@neuralmagic.com>
38acae6e · Sage Moore · GitHub · a2dd48c3 · 38acae6e · 38acae6e
Unverified Commit 38acae6e authored Feb 27, 2025 by Sage Moore Committed by GitHub Feb 27, 2025
3 changed files
--- a/.buildkite/run-amd-test.sh
+++ b/.buildkite/run-amd-test.sh
@@ -92,7 +92,9 @@ if [[ $commands == *" kernels "* ]]; then
  --ignore=kernels/test_moe.py \
  --ignore=kernels/test_prefix_prefill.py \
  --ignore=kernels/test_rand.py \
-  --ignore=kernels/test_sampler.py"
+  --ignore=kernels/test_sampler.py \
+  --ignore=kernels/test_cascade_flash_attn.py \
+  --ignore=kernels/test_mamba_mixer2.py"
 fi

 #ignore certain Entrypoints tests

--- a/tests/core/block/e2e/test_correctness_sliding_window.py
+++ b/tests/core/block/e2e/test_correctness_sliding_window.py
@@ -7,6 +7,7 @@ import pytest

 from tests.kernels.utils import override_backend_env_variable
 from vllm import LLM, SamplingParams
+from vllm.platforms import current_platform

 from .conftest import get_text_from_llm_generator

@@ -42,6 +43,11 @@ def test_sliding_window_retrival(baseline_llm_generator, test_llm_generator,

    Additionally, we compare the results of the v1 and v2 managers.
    """
+    if backend == "FLASHINFER" and current_platform.is_rocm():
+        pytest.skip("Flashinfer does not support ROCm/HIP.")
+    if backend == "XFORMERS" and current_platform.is_rocm():
+        pytest.skip("Xformers does not support ROCm/HIP.")
+
    override_backend_env_variable(monkeypatch, backend)

    sampling_params = SamplingParams(
@@ -101,6 +107,10 @@ def test_sliding_window_chunked_prefill(test_llm_generator, batch_size, seed,
    The results with and without chunked prefill are not the same due to
    numerical instabilities.
    """
+    if backend == "FLASHINFER" and current_platform.is_rocm():
+        pytest.skip("Flashinfer does not support ROCm/HIP.")
+    if backend == "XFORMERS" and current_platform.is_rocm():
+        pytest.skip("Xformers does not support ROCm/HIP.")
    override_backend_env_variable(monkeypatch, backend)

    sampling_params = SamplingParams(

--- a/tests/prefix_caching/test_prefix_caching.py
+++ b/tests/prefix_caching/test_prefix_caching.py
@@ -12,6 +12,7 @@ from tests.kernels.utils import override_backend_env_variable
 from vllm import SamplingParams, TokensPrompt
 from vllm.core.scheduler import Scheduler
 from vllm.engine.llm_engine import LLMEngine
+from vllm.platforms import current_platform

 from ..models.utils import check_outputs_equal

@@ -53,6 +54,10 @@ def test_mixed_requests(
    and the others don't. The cached position determines where
    the sequence is at among the batch of prefills.
    """
+    if backend == "FLASHINFER" and current_platform.is_rocm():
+        pytest.skip("Flashinfer does not support ROCm/HIP.")
+    if backend == "XFORMERS" and current_platform.is_rocm():
+        pytest.skip("Xformers does not support ROCm/HIP.")
    override_backend_env_variable(monkeypatch, backend)

    with hf_runner(model, dtype=dtype) as hf_model:
@@ -103,6 +108,11 @@ def test_unstable_prompt_sequence(
    backend: str,
    monkeypatch,
 ) -> None:
+
+    if backend == "FLASHINFER" and current_platform.is_rocm():
+        pytest.skip("Flashinfer does not support ROCm/HIP.")
+    if backend == "XFORMERS" and current_platform.is_rocm():
+        pytest.skip("Xformers does not support ROCm/HIP.")
    override_backend_env_variable(monkeypatch, backend)

    with vllm_runner(