Unverified Commit 38acae6e authored by Sage Moore's avatar Sage Moore Committed by GitHub
Browse files

[ROCm] Fix the Kernels, Core, and Prefix Caching AMD CI groups (#13970)


Signed-off-by: default avatarSage Moore <sage@neuralmagic.com>
parent a2dd48c3
...@@ -92,7 +92,9 @@ if [[ $commands == *" kernels "* ]]; then ...@@ -92,7 +92,9 @@ if [[ $commands == *" kernels "* ]]; then
--ignore=kernels/test_moe.py \ --ignore=kernels/test_moe.py \
--ignore=kernels/test_prefix_prefill.py \ --ignore=kernels/test_prefix_prefill.py \
--ignore=kernels/test_rand.py \ --ignore=kernels/test_rand.py \
--ignore=kernels/test_sampler.py" --ignore=kernels/test_sampler.py \
--ignore=kernels/test_cascade_flash_attn.py \
--ignore=kernels/test_mamba_mixer2.py"
fi fi
#ignore certain Entrypoints tests #ignore certain Entrypoints tests
......
...@@ -7,6 +7,7 @@ import pytest ...@@ -7,6 +7,7 @@ import pytest
from tests.kernels.utils import override_backend_env_variable from tests.kernels.utils import override_backend_env_variable
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from vllm.platforms import current_platform
from .conftest import get_text_from_llm_generator from .conftest import get_text_from_llm_generator
...@@ -42,6 +43,11 @@ def test_sliding_window_retrival(baseline_llm_generator, test_llm_generator, ...@@ -42,6 +43,11 @@ def test_sliding_window_retrival(baseline_llm_generator, test_llm_generator,
Additionally, we compare the results of the v1 and v2 managers. Additionally, we compare the results of the v1 and v2 managers.
""" """
if backend == "FLASHINFER" and current_platform.is_rocm():
pytest.skip("Flashinfer does not support ROCm/HIP.")
if backend == "XFORMERS" and current_platform.is_rocm():
pytest.skip("Xformers does not support ROCm/HIP.")
override_backend_env_variable(monkeypatch, backend) override_backend_env_variable(monkeypatch, backend)
sampling_params = SamplingParams( sampling_params = SamplingParams(
...@@ -101,6 +107,10 @@ def test_sliding_window_chunked_prefill(test_llm_generator, batch_size, seed, ...@@ -101,6 +107,10 @@ def test_sliding_window_chunked_prefill(test_llm_generator, batch_size, seed,
The results with and without chunked prefill are not the same due to The results with and without chunked prefill are not the same due to
numerical instabilities. numerical instabilities.
""" """
if backend == "FLASHINFER" and current_platform.is_rocm():
pytest.skip("Flashinfer does not support ROCm/HIP.")
if backend == "XFORMERS" and current_platform.is_rocm():
pytest.skip("Xformers does not support ROCm/HIP.")
override_backend_env_variable(monkeypatch, backend) override_backend_env_variable(monkeypatch, backend)
sampling_params = SamplingParams( sampling_params = SamplingParams(
......
...@@ -12,6 +12,7 @@ from tests.kernels.utils import override_backend_env_variable ...@@ -12,6 +12,7 @@ from tests.kernels.utils import override_backend_env_variable
from vllm import SamplingParams, TokensPrompt from vllm import SamplingParams, TokensPrompt
from vllm.core.scheduler import Scheduler from vllm.core.scheduler import Scheduler
from vllm.engine.llm_engine import LLMEngine from vllm.engine.llm_engine import LLMEngine
from vllm.platforms import current_platform
from ..models.utils import check_outputs_equal from ..models.utils import check_outputs_equal
...@@ -53,6 +54,10 @@ def test_mixed_requests( ...@@ -53,6 +54,10 @@ def test_mixed_requests(
and the others don't. The cached position determines where and the others don't. The cached position determines where
the sequence is at among the batch of prefills. the sequence is at among the batch of prefills.
""" """
if backend == "FLASHINFER" and current_platform.is_rocm():
pytest.skip("Flashinfer does not support ROCm/HIP.")
if backend == "XFORMERS" and current_platform.is_rocm():
pytest.skip("Xformers does not support ROCm/HIP.")
override_backend_env_variable(monkeypatch, backend) override_backend_env_variable(monkeypatch, backend)
with hf_runner(model, dtype=dtype) as hf_model: with hf_runner(model, dtype=dtype) as hf_model:
...@@ -103,6 +108,11 @@ def test_unstable_prompt_sequence( ...@@ -103,6 +108,11 @@ def test_unstable_prompt_sequence(
backend: str, backend: str,
monkeypatch, monkeypatch,
) -> None: ) -> None:
if backend == "FLASHINFER" and current_platform.is_rocm():
pytest.skip("Flashinfer does not support ROCm/HIP.")
if backend == "XFORMERS" and current_platform.is_rocm():
pytest.skip("Xformers does not support ROCm/HIP.")
override_backend_env_variable(monkeypatch, backend) override_backend_env_variable(monkeypatch, backend)
with vllm_runner( with vllm_runner(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment