Unverified Commit 56de443d authored by Kevin H. Luu's avatar Kevin H. Luu Committed by GitHub
Browse files

[ci] Switch some CI jobs to H200 MIG slices (#38956)

parent 4dd49b06
...@@ -4,6 +4,7 @@ depends_on: ...@@ -4,6 +4,7 @@ depends_on:
steps: steps:
- label: Basic Correctness - label: Basic Correctness
timeout_in_minutes: 30 timeout_in_minutes: 30
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
- tests/basic_correctness/test_basic_correctness - tests/basic_correctness/test_basic_correctness
......
...@@ -4,6 +4,7 @@ depends_on: ...@@ -4,6 +4,7 @@ depends_on:
steps: steps:
- label: Benchmarks CLI Test - label: Benchmarks CLI Test
timeout_in_minutes: 20 timeout_in_minutes: 20
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
- tests/benchmarks/ - tests/benchmarks/
......
...@@ -4,6 +4,7 @@ depends_on: ...@@ -4,6 +4,7 @@ depends_on:
steps: steps:
- label: Platform Tests (CUDA) - label: Platform Tests (CUDA)
timeout_in_minutes: 15 timeout_in_minutes: 15
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
- tests/cuda - tests/cuda
......
...@@ -4,6 +4,7 @@ depends_on: ...@@ -4,6 +4,7 @@ depends_on:
steps: steps:
- label: Engine - label: Engine
timeout_in_minutes: 15 timeout_in_minutes: 15
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
- tests/engine - tests/engine
...@@ -25,6 +26,7 @@ steps: ...@@ -25,6 +26,7 @@ steps:
- label: e2e Scheduling (1 GPU) - label: e2e Scheduling (1 GPU)
timeout_in_minutes: 30 timeout_in_minutes: 30
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/v1/ - vllm/v1/
- tests/v1/e2e/general/ - tests/v1/e2e/general/
......
...@@ -61,6 +61,7 @@ steps: ...@@ -61,6 +61,7 @@ steps:
- label: Entrypoints Integration (API Server openai - Part 3) - label: Entrypoints Integration (API Server openai - Part 3)
timeout_in_minutes: 50 timeout_in_minutes: 50
device: h200_18gb
working_dir: "/vllm-workspace/tests" working_dir: "/vllm-workspace/tests"
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
...@@ -105,6 +106,7 @@ steps: ...@@ -105,6 +106,7 @@ steps:
- label: OpenAI API Correctness - label: OpenAI API Correctness
timeout_in_minutes: 30 timeout_in_minutes: 30
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- csrc/ - csrc/
- vllm/entrypoints/openai/ - vllm/entrypoints/openai/
......
...@@ -4,6 +4,7 @@ depends_on: ...@@ -4,6 +4,7 @@ depends_on:
steps: steps:
- label: EPLB Algorithm - label: EPLB Algorithm
timeout_in_minutes: 15 timeout_in_minutes: 15
device: h200_18gb
working_dir: "/vllm-workspace/tests" working_dir: "/vllm-workspace/tests"
source_file_dependencies: source_file_dependencies:
- vllm/distributed/eplb - vllm/distributed/eplb
......
...@@ -4,6 +4,7 @@ depends_on: ...@@ -4,6 +4,7 @@ depends_on:
steps: steps:
- label: vLLM IR Tests - label: vLLM IR Tests
timeout_in_minutes: 10 timeout_in_minutes: 10
device: h200_18gb
working_dir: "/vllm-workspace/" working_dir: "/vllm-workspace/"
source_file_dependencies: source_file_dependencies:
- vllm/ir - vllm/ir
......
...@@ -19,6 +19,7 @@ steps: ...@@ -19,6 +19,7 @@ steps:
- label: V1 Sample + Logits - label: V1 Sample + Logits
timeout_in_minutes: 30 timeout_in_minutes: 30
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
- tests/v1/sample - tests/v1/sample
...@@ -86,6 +87,7 @@ steps: ...@@ -86,6 +87,7 @@ steps:
- label: Regression - label: Regression
timeout_in_minutes: 20 timeout_in_minutes: 20
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
- tests/test_regression - tests/test_regression
......
...@@ -4,6 +4,7 @@ depends_on: ...@@ -4,6 +4,7 @@ depends_on:
steps: steps:
- label: Basic Models Tests (Initialization) - label: Basic Models Tests (Initialization)
timeout_in_minutes: 45 timeout_in_minutes: 45
device: h200_18gb
torch_nightly: true torch_nightly: true
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
......
...@@ -67,6 +67,7 @@ steps: ...@@ -67,6 +67,7 @@ steps:
- label: Language Models Test (PPL) - label: Language Models Test (PPL)
timeout_in_minutes: 110 timeout_in_minutes: 110
device: h200_18gb
optional: true optional: true
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
...@@ -90,6 +91,7 @@ steps: ...@@ -90,6 +91,7 @@ steps:
- label: Language Models Test (MTEB) - label: Language Models Test (MTEB)
timeout_in_minutes: 110 timeout_in_minutes: 110
device: h200_18gb
optional: true optional: true
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
......
...@@ -4,6 +4,7 @@ depends_on: ...@@ -4,6 +4,7 @@ depends_on:
steps: steps:
- label: "Multi-Modal Models (Standard) 1: qwen2" - label: "Multi-Modal Models (Standard) 1: qwen2"
timeout_in_minutes: 45 timeout_in_minutes: 45
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
- tests/models/multimodal - tests/models/multimodal
...@@ -19,6 +20,7 @@ steps: ...@@ -19,6 +20,7 @@ steps:
- label: "Multi-Modal Models (Standard) 2: qwen3 + gemma" - label: "Multi-Modal Models (Standard) 2: qwen3 + gemma"
timeout_in_minutes: 45 timeout_in_minutes: 45
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
- tests/models/multimodal - tests/models/multimodal
...@@ -77,6 +79,7 @@ steps: ...@@ -77,6 +79,7 @@ steps:
- label: Multi-Modal Processor # 44min - label: Multi-Modal Processor # 44min
timeout_in_minutes: 60 timeout_in_minutes: 60
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
- tests/models/multimodal - tests/models/multimodal
...@@ -131,6 +134,7 @@ steps: ...@@ -131,6 +134,7 @@ steps:
- label: Multi-Modal Models (Extended Pooling) - label: Multi-Modal Models (Extended Pooling)
optional: true optional: true
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
- tests/models/multimodal/pooling - tests/models/multimodal/pooling
......
...@@ -49,6 +49,7 @@ steps: ...@@ -49,6 +49,7 @@ steps:
- label: PyTorch Fullgraph - label: PyTorch Fullgraph
timeout_in_minutes: 30 timeout_in_minutes: 30
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
- tests/compile - tests/compile
...@@ -60,6 +61,7 @@ steps: ...@@ -60,6 +61,7 @@ steps:
# if this test fails, it means the nightly torch version is not compatible with some # if this test fails, it means the nightly torch version is not compatible with some
# of the dependencies. Please check the error message and add the package to whitelist # of the dependencies. Please check the error message and add the package to whitelist
# in /vllm/tools/pre_commit/generate_nightly_torch_test.py # in /vllm/tools/pre_commit/generate_nightly_torch_test.py
device: h200_18gb
soft_fail: true soft_fail: true
source_file_dependencies: source_file_dependencies:
- requirements/nightly_torch_test.txt - requirements/nightly_torch_test.txt
......
...@@ -7,6 +7,7 @@ steps: ...@@ -7,6 +7,7 @@ steps:
# If this fails, it means the PR introduces a dependency that # If this fails, it means the PR introduces a dependency that
# conflicts with Ray's dependency constraints. # conflicts with Ray's dependency constraints.
# See https://github.com/vllm-project/vllm/issues/33599 # See https://github.com/vllm-project/vllm/issues/33599
device: h200_18gb
soft_fail: true soft_fail: true
timeout_in_minutes: 10 timeout_in_minutes: 10
source_file_dependencies: source_file_dependencies:
......
...@@ -4,6 +4,7 @@ depends_on: ...@@ -4,6 +4,7 @@ depends_on:
steps: steps:
- label: Spec Decode Eagle - label: Spec Decode Eagle
timeout_in_minutes: 30 timeout_in_minutes: 30
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/v1/spec_decode/ - vllm/v1/spec_decode/
- vllm/v1/worker/gpu/spec_decode/ - vllm/v1/worker/gpu/spec_decode/
...@@ -13,6 +14,7 @@ steps: ...@@ -13,6 +14,7 @@ steps:
- label: Spec Decode Speculators + MTP - label: Spec Decode Speculators + MTP
timeout_in_minutes: 30 timeout_in_minutes: 30
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/v1/spec_decode/ - vllm/v1/spec_decode/
- vllm/v1/worker/gpu/spec_decode/ - vllm/v1/worker/gpu/spec_decode/
...@@ -23,6 +25,7 @@ steps: ...@@ -23,6 +25,7 @@ steps:
- label: Spec Decode Ngram + Suffix - label: Spec Decode Ngram + Suffix
timeout_in_minutes: 30 timeout_in_minutes: 30
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/v1/spec_decode/ - vllm/v1/spec_decode/
- vllm/v1/worker/gpu/spec_decode/ - vllm/v1/worker/gpu/spec_decode/
...@@ -32,6 +35,7 @@ steps: ...@@ -32,6 +35,7 @@ steps:
- label: Spec Decode Draft Model - label: Spec Decode Draft Model
timeout_in_minutes: 30 timeout_in_minutes: 30
device: h200_18gb
source_file_dependencies: source_file_dependencies:
- vllm/v1/spec_decode/ - vllm/v1/spec_decode/
- vllm/v1/worker/gpu/spec_decode/ - vllm/v1/worker/gpu/spec_decode/
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment