[ci] Switch some CI jobs to H200 MIG slices (#38956)

56de443d · Kevin H. Luu · GitHub · 4dd49b06 · 56de443d · 56de443d
Unverified Commit 56de443d authored Apr 05, 2026 by Kevin H. Luu Committed by GitHub Apr 05, 2026
14 changed files
--- a/.buildkite/test_areas/basic_correctness.yaml
+++ b/.buildkite/test_areas/basic_correctness.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: Basic Correctness
  timeout_in_minutes: 30
+  device: h200_18gb
  source_file_dependencies:
  - vllm/
  - tests/basic_correctness/test_basic_correctness

--- a/.buildkite/test_areas/benchmarks.yaml
+++ b/.buildkite/test_areas/benchmarks.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: Benchmarks CLI Test
  timeout_in_minutes: 20
+  device: h200_18gb
  source_file_dependencies:
  - vllm/
  - tests/benchmarks/

--- a/.buildkite/test_areas/cuda.yaml
+++ b/.buildkite/test_areas/cuda.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: Platform Tests (CUDA)
  timeout_in_minutes: 15
+  device: h200_18gb
  source_file_dependencies:
  - vllm/
  - tests/cuda

--- a/.buildkite/test_areas/engine.yaml
+++ b/.buildkite/test_areas/engine.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: Engine
  timeout_in_minutes: 15
+  device: h200_18gb
  source_file_dependencies:
  - vllm/
  - tests/engine
@@ -25,6 +26,7 @@ steps:
 - label: e2e Scheduling (1 GPU)
  timeout_in_minutes: 30
+  device: h200_18gb
  source_file_dependencies:
    - vllm/v1/
    - tests/v1/e2e/general/

--- a/.buildkite/test_areas/entrypoints.yaml
+++ b/.buildkite/test_areas/entrypoints.yaml
@@ -61,6 +61,7 @@ steps:
 - label: Entrypoints Integration (API Server openai - Part 3)
  timeout_in_minutes: 50
+  device: h200_18gb
  working_dir: "/vllm-workspace/tests"
  source_file_dependencies:
  - vllm/
@@ -105,6 +106,7 @@ steps:
 - label: OpenAI API Correctness
  timeout_in_minutes: 30
+  device: h200_18gb
  source_file_dependencies:
  - csrc/
  - vllm/entrypoints/openai/

--- a/.buildkite/test_areas/expert_parallelism.yaml
+++ b/.buildkite/test_areas/expert_parallelism.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: EPLB Algorithm
  timeout_in_minutes: 15
+  device: h200_18gb
  working_dir: "/vllm-workspace/tests"
  source_file_dependencies:
  - vllm/distributed/eplb

--- a/.buildkite/test_areas/kernels.yaml
+++ b/.buildkite/test_areas/kernels.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: vLLM IR Tests
  timeout_in_minutes: 10
+  device: h200_18gb
  working_dir: "/vllm-workspace/"
  source_file_dependencies:
    - vllm/ir

--- a/.buildkite/test_areas/misc.yaml
+++ b/.buildkite/test_areas/misc.yaml
@@ -19,6 +19,7 @@ steps:
 - label: V1 Sample + Logits
  timeout_in_minutes: 30
+  device: h200_18gb
  source_file_dependencies:
    - vllm/
    - tests/v1/sample
@@ -86,6 +87,7 @@ steps:
 - label: Regression
  timeout_in_minutes: 20
+  device: h200_18gb
  source_file_dependencies:
  - vllm/
  - tests/test_regression

--- a/.buildkite/test_areas/models_basic.yaml
+++ b/.buildkite/test_areas/models_basic.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: Basic Models Tests (Initialization)
  timeout_in_minutes: 45
+  device: h200_18gb
  torch_nightly: true
  source_file_dependencies:
  - vllm/

--- a/.buildkite/test_areas/models_language.yaml
+++ b/.buildkite/test_areas/models_language.yaml
@@ -67,6 +67,7 @@ steps:
 - label: Language Models Test (PPL)
  timeout_in_minutes: 110
+  device: h200_18gb
  optional: true
  source_file_dependencies:
  - vllm/
@@ -90,6 +91,7 @@ steps:
 - label: Language Models Test (MTEB)
  timeout_in_minutes: 110
+  device: h200_18gb
  optional: true
  source_file_dependencies:
  - vllm/

--- a/.buildkite/test_areas/models_multimodal.yaml
+++ b/.buildkite/test_areas/models_multimodal.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: "Multi-Modal Models (Standard) 1: qwen2"
  timeout_in_minutes: 45
+  device: h200_18gb
  source_file_dependencies:
  - vllm/
  - tests/models/multimodal
@@ -19,6 +20,7 @@ steps:
 - label: "Multi-Modal Models (Standard) 2: qwen3 + gemma"
  timeout_in_minutes: 45
+  device: h200_18gb
  source_file_dependencies:
  - vllm/
  - tests/models/multimodal
@@ -77,6 +79,7 @@ steps:
 - label: Multi-Modal Processor # 44min
  timeout_in_minutes: 60
+  device: h200_18gb
  source_file_dependencies:
  - vllm/
  - tests/models/multimodal
@@ -131,6 +134,7 @@ steps:
 - label: Multi-Modal Models (Extended Pooling)
  optional: true
+  device: h200_18gb
  source_file_dependencies:
  - vllm/
  - tests/models/multimodal/pooling

--- a/.buildkite/test_areas/pytorch.yaml
+++ b/.buildkite/test_areas/pytorch.yaml
@@ -49,6 +49,7 @@ steps:
 - label: PyTorch Fullgraph
  timeout_in_minutes: 30
+  device: h200_18gb
  source_file_dependencies:
  - vllm/
  - tests/compile
@@ -60,6 +61,7 @@ steps:
  # if this test fails, it means the nightly torch version is not compatible with some
  # of the dependencies. Please check the error message and add the package to whitelist
  # in /vllm/tools/pre_commit/generate_nightly_torch_test.py
+  device: h200_18gb
  soft_fail: true
  source_file_dependencies:
  - requirements/nightly_torch_test.txt

--- a/.buildkite/test_areas/ray_compat.yaml
+++ b/.buildkite/test_areas/ray_compat.yaml
@@ -7,6 +7,7 @@ steps:
  # If this fails, it means the PR introduces a dependency that
  # conflicts with Ray's dependency constraints.
  # See https://github.com/vllm-project/vllm/issues/33599
+  device: h200_18gb
  soft_fail: true
  timeout_in_minutes: 10
  source_file_dependencies:

--- a/.buildkite/test_areas/spec_decode.yaml
+++ b/.buildkite/test_areas/spec_decode.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: Spec Decode Eagle
  timeout_in_minutes: 30
+  device: h200_18gb
  source_file_dependencies:
    - vllm/v1/spec_decode/
    - vllm/v1/worker/gpu/spec_decode/
@@ -13,6 +14,7 @@ steps:
 - label: Spec Decode Speculators + MTP
  timeout_in_minutes: 30
+  device: h200_18gb
  source_file_dependencies:
    - vllm/v1/spec_decode/
    - vllm/v1/worker/gpu/spec_decode/
@@ -23,6 +25,7 @@ steps:
 - label: Spec Decode Ngram + Suffix
  timeout_in_minutes: 30
+  device: h200_18gb
  source_file_dependencies:
    - vllm/v1/spec_decode/
    - vllm/v1/worker/gpu/spec_decode/
@@ -32,6 +35,7 @@ steps:
 - label: Spec Decode Draft Model
  timeout_in_minutes: 30
+  device: h200_18gb
  source_file_dependencies:
    - vllm/v1/spec_decode/
    - vllm/v1/worker/gpu/spec_decode/