Unverified Commit 4f2ed5fd authored by Andreas Karatzas's avatar Andreas Karatzas Committed by GitHub
Browse files

[ROCm][CI] Enable hybrid chunked prefill test (#38317)


Signed-off-by: default avatarAndreas Karatzas <akaratza@amd.com>
parent d28d86e8
......@@ -1801,6 +1801,19 @@ steps:
- tests/v1/e2e
commands:
- pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "eagle_correctness_heavy"
- label: V1 e2e (4xH100-4xMI325) # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
agent_pool: mi325_4
optional: true
source_file_dependencies:
- vllm/v1/attention/backends/utils.py
- vllm/v1/worker/gpu_model_runner.py
- tests/v1/e2e/test_hybrid_chunked_prefill.py
commands:
- pytest -v -s v1/e2e/test_hybrid_chunked_prefill.py
- label: V1 Spec Decode # TBD
......
......@@ -36,14 +36,20 @@ MESSAGES = [
]
@pytest.mark.skipif(not current_platform.is_cuda(), reason="CUDA not available")
@pytest.mark.parametrize(
"model_name",
[
pytest.param("Qwen/Qwen3.5-4B", marks=[large_gpu_mark(min_gb=40)]),
pytest.param(
"nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8",
marks=[large_gpu_mark(min_gb=80)] + multi_gpu_marks(num_gpus=4),
marks=[large_gpu_mark(min_gb=80)]
+ multi_gpu_marks(num_gpus=4)
+ [
pytest.mark.skipif(
not current_platform.is_cuda(),
reason="modelopt quantization is supported only on CUDA",
)
],
),
],
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment