Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
47194606
Unverified
Commit
47194606
authored
Jun 19, 2025
by
Alexei-V-Ivanov-AMD
Committed by
GitHub
Jun 19, 2025
Browse files
Fixing Chunked Prefill Test. (#19762)
Signed-off-by:
Alexei V. Ivanov
<
alexei.ivanov@amd.com
>
parent
466166dc
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
3 deletions
+17
-3
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+1
-1
tests/basic_correctness/test_chunked_prefill.py
tests/basic_correctness/test_chunked_prefill.py
+16
-2
No files found.
.buildkite/test-pipeline.yaml
View file @
47194606
...
@@ -89,7 +89,7 @@ steps:
...
@@ -89,7 +89,7 @@ steps:
-
VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
-
VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
-
label
:
Chunked Prefill Test
-
label
:
Chunked Prefill Test
mirror_hardwares
:
[
amdexperimental
]
mirror_hardwares
:
[
amdexperimental
,
amdproduction
]
source_file_dependencies
:
source_file_dependencies
:
-
vllm/
-
vllm/
-
tests/basic_correctness/test_chunked_prefill
-
tests/basic_correctness/test_chunked_prefill
...
...
tests/basic_correctness/test_chunked_prefill.py
View file @
47194606
...
@@ -49,7 +49,13 @@ def use_v0_only(monkeypatch: pytest.MonkeyPatch):
...
@@ -49,7 +49,13 @@ def use_v0_only(monkeypatch: pytest.MonkeyPatch):
# NOTE: Increasing this in this suite will fail CI because we currently cannot
# NOTE: Increasing this in this suite will fail CI because we currently cannot
# reset distributed env properly. Use a value > 1 just when you test.
# reset distributed env properly. Use a value > 1 just when you test.
@
pytest
.
mark
.
parametrize
(
"tensor_parallel_size"
,
[
1
])
@
pytest
.
mark
.
parametrize
(
"tensor_parallel_size"
,
[
1
])
@
pytest
.
mark
.
parametrize
(
"attention_backend"
,
[
"FLASHINFER"
,
"FLASH_ATTN"
])
@
pytest
.
mark
.
parametrize
(
"attention_backend"
,
[
pytest
.
param
(
"FLASHINFER"
,
marks
=
pytest
.
mark
.
skipif
(
current_platform
.
is_rocm
(),
reason
=
"FLASHINFER isn't supported on ROCm"
)),
"FLASH_ATTN"
])
def
test_models
(
def
test_models
(
hf_runner
:
HfRunner
,
hf_runner
:
HfRunner
,
vllm_runner
:
VllmRunner
,
vllm_runner
:
VllmRunner
,
...
@@ -99,7 +105,13 @@ def test_models(
...
@@ -99,7 +105,13 @@ def test_models(
@
multi_gpu_test
(
num_gpus
=
2
)
@
multi_gpu_test
(
num_gpus
=
2
)
@
pytest
.
mark
.
parametrize
(
"distributed_executor_backend"
,
[
"ray"
,
"mp"
])
@
pytest
.
mark
.
parametrize
(
"distributed_executor_backend"
,
[
"ray"
,
"mp"
])
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"attention_backend"
,
[
"FLASHINFER"
,
"FLASH_ATTN"
])
@
pytest
.
mark
.
parametrize
(
"attention_backend"
,
[
pytest
.
param
(
"FLASHINFER"
,
marks
=
pytest
.
mark
.
skipif
(
current_platform
.
is_rocm
(),
reason
=
"FLASHINFER isn't supported on ROCm"
)),
"FLASH_ATTN"
])
def
test_models_distributed
(
def
test_models_distributed
(
hf_runner
:
HfRunner
,
hf_runner
:
HfRunner
,
vllm_runner
:
VllmRunner
,
vllm_runner
:
VllmRunner
,
...
@@ -172,6 +184,8 @@ def test_models_distributed(
...
@@ -172,6 +184,8 @@ def test_models_distributed(
# Due to low-precision numerical divergence, this test is too sensitive to
# Due to low-precision numerical divergence, this test is too sensitive to
# the async postprocessor
# the async postprocessor
@
pytest
.
mark
.
parametrize
(
"disable_async_output_proc"
,
[
True
])
@
pytest
.
mark
.
parametrize
(
"disable_async_output_proc"
,
[
True
])
@
pytest
.
mark
.
skipif
(
current_platform
.
is_rocm
(),
reason
=
"machete_prepack_B isn't supported on ROCm"
)
def
test_models_with_fp8_kv_cache
(
def
test_models_with_fp8_kv_cache
(
vllm_runner
:
VllmRunner
,
vllm_runner
:
VllmRunner
,
example_prompts
,
example_prompts
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment