Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
38acae6e
Unverified
Commit
38acae6e
authored
Feb 27, 2025
by
Sage Moore
Committed by
GitHub
Feb 27, 2025
Browse files
[ROCm] Fix the Kernels, Core, and Prefix Caching AMD CI groups (#13970)
Signed-off-by:
Sage Moore
<
sage@neuralmagic.com
>
parent
a2dd48c3
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
23 additions
and
1 deletion
+23
-1
.buildkite/run-amd-test.sh
.buildkite/run-amd-test.sh
+3
-1
tests/core/block/e2e/test_correctness_sliding_window.py
tests/core/block/e2e/test_correctness_sliding_window.py
+10
-0
tests/prefix_caching/test_prefix_caching.py
tests/prefix_caching/test_prefix_caching.py
+10
-0
No files found.
.buildkite/run-amd-test.sh
View file @
38acae6e
...
@@ -92,7 +92,9 @@ if [[ $commands == *" kernels "* ]]; then
...
@@ -92,7 +92,9 @@ if [[ $commands == *" kernels "* ]]; then
--ignore=kernels/test_moe.py
\
--ignore=kernels/test_moe.py
\
--ignore=kernels/test_prefix_prefill.py
\
--ignore=kernels/test_prefix_prefill.py
\
--ignore=kernels/test_rand.py
\
--ignore=kernels/test_rand.py
\
--ignore=kernels/test_sampler.py"
--ignore=kernels/test_sampler.py
\
--ignore=kernels/test_cascade_flash_attn.py
\
--ignore=kernels/test_mamba_mixer2.py"
fi
fi
#ignore certain Entrypoints tests
#ignore certain Entrypoints tests
...
...
tests/core/block/e2e/test_correctness_sliding_window.py
View file @
38acae6e
...
@@ -7,6 +7,7 @@ import pytest
...
@@ -7,6 +7,7 @@ import pytest
from
tests.kernels.utils
import
override_backend_env_variable
from
tests.kernels.utils
import
override_backend_env_variable
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
vllm.platforms
import
current_platform
from
.conftest
import
get_text_from_llm_generator
from
.conftest
import
get_text_from_llm_generator
...
@@ -42,6 +43,11 @@ def test_sliding_window_retrival(baseline_llm_generator, test_llm_generator,
...
@@ -42,6 +43,11 @@ def test_sliding_window_retrival(baseline_llm_generator, test_llm_generator,
Additionally, we compare the results of the v1 and v2 managers.
Additionally, we compare the results of the v1 and v2 managers.
"""
"""
if
backend
==
"FLASHINFER"
and
current_platform
.
is_rocm
():
pytest
.
skip
(
"Flashinfer does not support ROCm/HIP."
)
if
backend
==
"XFORMERS"
and
current_platform
.
is_rocm
():
pytest
.
skip
(
"Xformers does not support ROCm/HIP."
)
override_backend_env_variable
(
monkeypatch
,
backend
)
override_backend_env_variable
(
monkeypatch
,
backend
)
sampling_params
=
SamplingParams
(
sampling_params
=
SamplingParams
(
...
@@ -101,6 +107,10 @@ def test_sliding_window_chunked_prefill(test_llm_generator, batch_size, seed,
...
@@ -101,6 +107,10 @@ def test_sliding_window_chunked_prefill(test_llm_generator, batch_size, seed,
The results with and without chunked prefill are not the same due to
The results with and without chunked prefill are not the same due to
numerical instabilities.
numerical instabilities.
"""
"""
if
backend
==
"FLASHINFER"
and
current_platform
.
is_rocm
():
pytest
.
skip
(
"Flashinfer does not support ROCm/HIP."
)
if
backend
==
"XFORMERS"
and
current_platform
.
is_rocm
():
pytest
.
skip
(
"Xformers does not support ROCm/HIP."
)
override_backend_env_variable
(
monkeypatch
,
backend
)
override_backend_env_variable
(
monkeypatch
,
backend
)
sampling_params
=
SamplingParams
(
sampling_params
=
SamplingParams
(
...
...
tests/prefix_caching/test_prefix_caching.py
View file @
38acae6e
...
@@ -12,6 +12,7 @@ from tests.kernels.utils import override_backend_env_variable
...
@@ -12,6 +12,7 @@ from tests.kernels.utils import override_backend_env_variable
from
vllm
import
SamplingParams
,
TokensPrompt
from
vllm
import
SamplingParams
,
TokensPrompt
from
vllm.core.scheduler
import
Scheduler
from
vllm.core.scheduler
import
Scheduler
from
vllm.engine.llm_engine
import
LLMEngine
from
vllm.engine.llm_engine
import
LLMEngine
from
vllm.platforms
import
current_platform
from
..models.utils
import
check_outputs_equal
from
..models.utils
import
check_outputs_equal
...
@@ -53,6 +54,10 @@ def test_mixed_requests(
...
@@ -53,6 +54,10 @@ def test_mixed_requests(
and the others don't. The cached position determines where
and the others don't. The cached position determines where
the sequence is at among the batch of prefills.
the sequence is at among the batch of prefills.
"""
"""
if
backend
==
"FLASHINFER"
and
current_platform
.
is_rocm
():
pytest
.
skip
(
"Flashinfer does not support ROCm/HIP."
)
if
backend
==
"XFORMERS"
and
current_platform
.
is_rocm
():
pytest
.
skip
(
"Xformers does not support ROCm/HIP."
)
override_backend_env_variable
(
monkeypatch
,
backend
)
override_backend_env_variable
(
monkeypatch
,
backend
)
with
hf_runner
(
model
,
dtype
=
dtype
)
as
hf_model
:
with
hf_runner
(
model
,
dtype
=
dtype
)
as
hf_model
:
...
@@ -103,6 +108,11 @@ def test_unstable_prompt_sequence(
...
@@ -103,6 +108,11 @@ def test_unstable_prompt_sequence(
backend
:
str
,
backend
:
str
,
monkeypatch
,
monkeypatch
,
)
->
None
:
)
->
None
:
if
backend
==
"FLASHINFER"
and
current_platform
.
is_rocm
():
pytest
.
skip
(
"Flashinfer does not support ROCm/HIP."
)
if
backend
==
"XFORMERS"
and
current_platform
.
is_rocm
():
pytest
.
skip
(
"Xformers does not support ROCm/HIP."
)
override_backend_env_variable
(
monkeypatch
,
backend
)
override_backend_env_variable
(
monkeypatch
,
backend
)
with
vllm_runner
(
with
vllm_runner
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment