[CI] Defining extended V1 e2e + engine tests (#35580)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>

[CI] Defining extended V1 e2e + engine tests (#35580)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
ec27b36b · Andreas Karatzas · GitHub · 3fd1d4ec · ec27b36b · ec27b36b
Unverified Commit ec27b36b authored Mar 02, 2026 by Andreas Karatzas Committed by GitHub Mar 02, 2026
Showing with 63 additions and 5 deletions

.buildkite/test-amd.yaml .buildkite/test-amd.yaml +29 -3

.buildkite/test_areas/engine.yaml .buildkite/test_areas/engine.yaml +33 -1

tests/v1/e2e/test_spec_decode.py tests/v1/e2e/test_spec_decode.py +1 -1

No files found.
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -388,9 +388,7 @@ steps:
 - label: V1 Test e2e + engine # 65min
  timeout_in_minutes: 90
  mirror_hardwares: [amdexperimental, amdproduction]
-  # The test uses 4 GPUs, but we schedule it on 8-GPU machines for stability.
-  # See discussion here: https://github.com/vllm-project/vllm/pull/31040
-  agent_pool: mi325_8
+  agent_pool: mi325_1
  optional: true
  # grade: Blocking
  source_file_dependencies:
@@ -402,6 +400,34 @@ steps:
    - pytest -v -s v1/e2e
    - pytest -v -s v1/engine

+- label: V1 Test e2e (2 GPUs) # 65min
+  timeout_in_minutes: 90
+  mirror_hardwares: [amdexperimental, amdproduction]
+  agent_pool: mi325_2
+  optional: true
+  # grade: Blocking
+  source_file_dependencies:
+    - vllm/
+    - tests/v1
+  commands:
+    # Only run tests that need exactly 2 GPUs
+    - pytest -v -s v1/e2e/test_spec_decode.py -k "tensor_parallelism"
+
+- label: V1 Test e2e (4 GPUs) # 65min
+  timeout_in_minutes: 90
+  mirror_hardwares: [amdexperimental, amdproduction]
+  # The test uses 4 GPUs, but we schedule it on 8-GPU machines for stability.
+  # See discussion here: https://github.com/vllm-project/vllm/pull/31040
+  agent_pool: mi325_4
+  optional: true
+  # grade: Blocking
+  source_file_dependencies:
+    - vllm/
+    - tests/v1
+  commands:
+    # Only run tests that need 4 GPUs
+    - pytest -v -s v1/e2e/test_spec_decode.py -k "eagle_correctness_heavy"
+
 - label: V1 Test entrypoints # 35min
  timeout_in_minutes: 50
  mirror_hardwares: [amdexperimental, amdproduction, amdtentative]

--- a/.buildkite/test_areas/engine.yaml
+++ b/.buildkite/test_areas/engine.yaml
@@ -14,7 +14,7 @@ steps:
  commands:
  - pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py

- label: V1 e2e + engine
+- label: V1 e2e + engine (1 GPU)
  timeout_in_minutes: 45
  source_file_dependencies:
    - vllm/
@@ -36,3 +36,35 @@ steps:
      commands:
      - pytest -v -s v1/e2e
      - pytest -v -s v1/engine
+
+- label: V1 e2e (2 GPUs)
+  timeout_in_minutes: 60 # TODO: Fix timeout after we have more confidence in the test stability
+  optional: true
+  num_devices: 2
+  source_file_dependencies:
+    - vllm/
+    - tests/v1/e2e
+  commands:
+    # Only run tests that need exactly 2 GPUs
+    - pytest -v -s v1/e2e/test_spec_decode.py -k "tensor_parallelism"
+  mirror:
+    amd:
+      device: mi325_2
+      depends_on:
+      - image-build-amd
+
+- label: V1 e2e (4 GPUs)
+  timeout_in_minutes: 60 # TODO: Fix timeout after we have more confidence in the test stability
+  optional: true
+  num_devices: 4
+  source_file_dependencies:
+    - vllm/
+    - tests/v1/e2e
+  commands:
+    # Only run tests that need 4 GPUs
+    - pytest -v -s v1/e2e/test_spec_decode.py -k "eagle_correctness_heavy"
+  mirror:
+    amd:
+      device: mi325_4
+      depends_on:
+      - image-build-amd
--- a/tests/v1/e2e/test_spec_decode.py
+++ b/tests/v1/e2e/test_spec_decode.py
@@ -630,7 +630,7 @@ def test_eagle_correctness_medium(
            False,
            "auto",
            0.8,
-            marks=multi_gpu_marks(num_gpus=4),
+            marks=[*multi_gpu_marks(num_gpus=4), large_gpu_mark(min_gb=40)],
            id="llama4_eagle",
        ),
        pytest.param(