"vscode:/vscode.git/clone" did not exist on "8279201ce6ab178131fedff211a5539dc3ef2710"
Unverified Commit ec27b36b authored by Andreas Karatzas's avatar Andreas Karatzas Committed by GitHub
Browse files

[CI] Defining extended V1 e2e + engine tests (#35580)


Signed-off-by: default avatarAndreas Karatzas <akaratza@amd.com>
parent 3fd1d4ec
......@@ -388,9 +388,7 @@ steps:
- label: V1 Test e2e + engine # 65min
timeout_in_minutes: 90
mirror_hardwares: [amdexperimental, amdproduction]
# The test uses 4 GPUs, but we schedule it on 8-GPU machines for stability.
# See discussion here: https://github.com/vllm-project/vllm/pull/31040
agent_pool: mi325_8
agent_pool: mi325_1
optional: true
# grade: Blocking
source_file_dependencies:
......@@ -402,6 +400,34 @@ steps:
- pytest -v -s v1/e2e
- pytest -v -s v1/engine
- label: V1 Test e2e (2 GPUs) # 65min
timeout_in_minutes: 90
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_2
optional: true
# grade: Blocking
source_file_dependencies:
- vllm/
- tests/v1
commands:
# Only run tests that need exactly 2 GPUs
- pytest -v -s v1/e2e/test_spec_decode.py -k "tensor_parallelism"
- label: V1 Test e2e (4 GPUs) # 65min
timeout_in_minutes: 90
mirror_hardwares: [amdexperimental, amdproduction]
# The test uses 4 GPUs, but we schedule it on 8-GPU machines for stability.
# See discussion here: https://github.com/vllm-project/vllm/pull/31040
agent_pool: mi325_4
optional: true
# grade: Blocking
source_file_dependencies:
- vllm/
- tests/v1
commands:
# Only run tests that need 4 GPUs
- pytest -v -s v1/e2e/test_spec_decode.py -k "eagle_correctness_heavy"
- label: V1 Test entrypoints # 35min
timeout_in_minutes: 50
mirror_hardwares: [amdexperimental, amdproduction, amdtentative]
......
......@@ -14,7 +14,7 @@ steps:
commands:
- pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py
- label: V1 e2e + engine
- label: V1 e2e + engine (1 GPU)
timeout_in_minutes: 45
source_file_dependencies:
- vllm/
......@@ -36,3 +36,35 @@ steps:
commands:
- pytest -v -s v1/e2e
- pytest -v -s v1/engine
- label: V1 e2e (2 GPUs)
timeout_in_minutes: 60 # TODO: Fix timeout after we have more confidence in the test stability
optional: true
num_devices: 2
source_file_dependencies:
- vllm/
- tests/v1/e2e
commands:
# Only run tests that need exactly 2 GPUs
- pytest -v -s v1/e2e/test_spec_decode.py -k "tensor_parallelism"
mirror:
amd:
device: mi325_2
depends_on:
- image-build-amd
- label: V1 e2e (4 GPUs)
timeout_in_minutes: 60 # TODO: Fix timeout after we have more confidence in the test stability
optional: true
num_devices: 4
source_file_dependencies:
- vllm/
- tests/v1/e2e
commands:
# Only run tests that need 4 GPUs
- pytest -v -s v1/e2e/test_spec_decode.py -k "eagle_correctness_heavy"
mirror:
amd:
device: mi325_4
depends_on:
- image-build-amd
......@@ -630,7 +630,7 @@ def test_eagle_correctness_medium(
False,
"auto",
0.8,
marks=multi_gpu_marks(num_gpus=4),
marks=[*multi_gpu_marks(num_gpus=4), large_gpu_mark(min_gb=40)],
id="llama4_eagle",
),
pytest.param(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment