Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
db14f61f
Unverified
Commit
db14f61f
authored
Dec 08, 2025
by
Kevin H. Luu
Committed by
GitHub
Dec 08, 2025
Browse files
[ci] Refactor CI file structure (#29343)
parent
78c75033
Changes
31
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
463 additions
and
0 deletions
+463
-0
.buildkite/test_areas/model_executor.yaml
.buildkite/test_areas/model_executor.yaml
+17
-0
.buildkite/test_areas/models_basic.yaml
.buildkite/test_areas/models_basic.yaml
+62
-0
.buildkite/test_areas/models_distributed.yaml
.buildkite/test_areas/models_distributed.yaml
+22
-0
.buildkite/test_areas/models_language.yaml
.buildkite/test_areas/models_language.yaml
+91
-0
.buildkite/test_areas/models_multimodal.yaml
.buildkite/test_areas/models_multimodal.yaml
+79
-0
.buildkite/test_areas/plugins.yaml
.buildkite/test_areas/plugins.yaml
+34
-0
.buildkite/test_areas/pytorch.yaml
.buildkite/test_areas/pytorch.yaml
+50
-0
.buildkite/test_areas/quantization.yaml
.buildkite/test_areas/quantization.yaml
+46
-0
.buildkite/test_areas/samplers.yaml
.buildkite/test_areas/samplers.yaml
+14
-0
.buildkite/test_areas/tool_use.yaml
.buildkite/test_areas/tool_use.yaml
+23
-0
.buildkite/test_areas/weight_loading.yaml
.buildkite/test_areas/weight_loading.yaml
+25
-0
No files found.
.buildkite/test_areas/model_executor.yaml
0 → 100644
View file @
db14f61f
group
:
Model Executor
depends_on
:
-
image-build
steps
:
-
label
:
Model Executor
timeout_in_minutes
:
35
source_file_dependencies
:
-
vllm/engine/arg_utils.py
-
vllm/config/model.py
-
vllm/model_executor
-
tests/model_executor
-
tests/entrypoints/openai/test_tensorizer_entrypoint.py
commands
:
-
apt-get update && apt-get install -y curl libsodium23
-
export VLLM_WORKER_MULTIPROC_METHOD=spawn
-
pytest -v -s model_executor
-
pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
.buildkite/test_areas/models_basic.yaml
0 → 100644
View file @
db14f61f
group
:
Models - Basic
depends_on
:
-
image-build
steps
:
-
label
:
Basic Models Tests (Initialization)
timeout_in_minutes
:
45
mirror_hardwares
:
[
amdexperimental
]
torch_nightly
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/test_initialization.py
commands
:
# Run a subset of model initialization tests
-
pytest -v -s models/test_initialization.py::test_can_initialize_small_subset
-
label
:
Basic Models Tests (Extra Initialization) %N
timeout_in_minutes
:
45
mirror_hardwares
:
[
amdexperimental
]
torch_nightly
:
true
source_file_dependencies
:
-
vllm/model_executor/models/
-
tests/models/test_initialization.py
commands
:
# Only when vLLM model source is modified - test initialization of a large
# subset of supported models (the complement of the small subset in the above
# test.) Also run if model initialization test file is modified
-
pytest -v -s models/test_initialization.py -k 'not test_can_initialize_small_subset' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
parallelism
:
2
-
label
:
Basic Models Tests (Other)
timeout_in_minutes
:
45
source_file_dependencies
:
-
vllm/
-
tests/models/test_transformers.py
-
tests/models/test_registry.py
commands
:
-
pytest -v -s models/test_transformers.py models/test_registry.py
-
label
:
Basic Models Test (Other CPU)
# 5min
timeout_in_minutes
:
10
source_file_dependencies
:
-
vllm/
-
tests/models/test_utils.py
-
tests/models/test_vision.py
no_gpu
:
true
commands
:
-
pytest -v -s models/test_utils.py models/test_vision.py
-
label
:
Transformers Nightly Models
working_dir
:
"
/vllm-workspace/"
optional
:
true
soft_fail
:
true
commands
:
-
pip install --upgrade git+https://github.com/huggingface/transformers
-
pytest -v -s tests/models/test_initialization.py
-
pytest -v -s tests/models/test_transformers.py
-
pytest -v -s tests/models/multimodal/processing/
-
pytest -v -s tests/models/multimodal/test_mapping.py
-
python3 examples/offline_inference/basic/chat.py
-
python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
# Whisper needs spawn method to avoid deadlock
-
VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
.buildkite/test_areas/models_distributed.yaml
0 → 100644
View file @
db14f61f
group
:
Models - Distributed
depends_on
:
-
image-build
steps
:
-
label
:
Distributed Model Tests (2 GPUs)
timeout_in_minutes
:
50
working_dir
:
"
/vllm-workspace/tests"
num_gpus
:
2
source_file_dependencies
:
-
vllm/model_executor/model_loader/sharded_state_loader.py
-
vllm/model_executor/models/
-
tests/basic_correctness/
-
tests/model_executor/model_loader/test_sharded_state_loader.py
-
tests/models/
commands
:
-
TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
-
CUDA_VISIBLE_DEVICES=0,1 pytest -v -s model_executor/model_loader/test_sharded_state_loader.py
# Avoid importing model tests that cause CUDA reinitialization error
-
pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
-
pytest models/language -v -s -m 'distributed(num_gpus=2)'
-
pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py
-
VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)'
.buildkite/test_areas/models_language.yaml
0 → 100644
View file @
db14f61f
group
:
Models - Language
depends_on
:
-
image-build
steps
:
-
label
:
Language Models Tests (Standard)
timeout_in_minutes
:
25
mirror_hardwares
:
[
amdexperimental
]
torch_nightly
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/language
commands
:
# Test standard language models, excluding a subset of slow tests
-
pip freeze | grep -E 'torch'
-
pytest -v -s models/language -m 'core_model and (not slow_test)'
-
label
:
Language Models Tests (Extra Standard) %N
timeout_in_minutes
:
45
mirror_hardwares
:
[
amdexperimental
]
torch_nightly
:
true
source_file_dependencies
:
-
vllm/model_executor/models/
-
tests/models/language/pooling/test_embedding.py
-
tests/models/language/generation/test_common.py
-
tests/models/language/pooling/test_classification.py
commands
:
# Shard slow subset of standard language models tests. Only run when model
# source is modified, or when specified test files are modified
-
pip freeze | grep -E 'torch'
-
pytest -v -s models/language -m 'core_model and slow_test' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
parallelism
:
2
-
label
:
Language Models Tests (Hybrid) %N
timeout_in_minutes
:
75
mirror_hardwares
:
[
amdexperimental
]
torch_nightly
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/language/generation
commands
:
# Install fast path packages for testing against transformers
# Note: also needed to run plamo2 model in vLLM
-
uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.2.5'
-
uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
# Shard hybrid language model tests
-
pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
parallelism
:
2
-
label
:
Language Models Test (Extended Generation)
# 80min
timeout_in_minutes
:
110
mirror_hardwares
:
[
amdexperimental
]
optional
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/language/generation
commands
:
# Install fast path packages for testing against transformers
# Note: also needed to run plamo2 model in vLLM
-
uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.2.5'
-
uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
-
pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)'
-
label
:
Language Models Test (PPL)
timeout_in_minutes
:
110
mirror_hardwares
:
[
amdexperimental
]
optional
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/language/generation_ppl_test
commands
:
-
pytest -v -s models/language/generation_ppl_test
-
label
:
Language Models Test (Extended Pooling)
# 36min
timeout_in_minutes
:
50
mirror_hardwares
:
[
amdexperimental
]
optional
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/language/pooling
commands
:
-
pytest -v -s models/language/pooling -m 'not core_model'
-
label
:
Language Models Test (MTEB)
timeout_in_minutes
:
110
mirror_hardwares
:
[
amdexperimental
]
optional
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/language/pooling_mteb_test
commands
:
-
pytest -v -s models/language/pooling_mteb_test
.buildkite/test_areas/models_multimodal.yaml
0 → 100644
View file @
db14f61f
group
:
Models - Multimodal
depends_on
:
-
image-build
steps
:
-
label
:
Multi-Modal Models (Standard)
# 60min
timeout_in_minutes
:
80
source_file_dependencies
:
-
vllm/
-
tests/models/multimodal
commands
:
-
pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-
pip freeze | grep -E 'torch'
-
pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing
-
cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model
# Otherwise, mp_method="spawn" doesn't work
-
label
:
Multi-Modal Processor Test (CPU)
timeout_in_minutes
:
60
source_file_dependencies
:
-
vllm/
-
tests/models/multimodal
no_gpu
:
true
commands
:
-
pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-
pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py
-
label
:
Multi-Modal Processor
# 44min
timeout_in_minutes
:
60
source_file_dependencies
:
-
vllm/
-
tests/models/multimodal
commands
:
-
pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-
pytest -v -s models/multimodal/processing/test_tensor_schema.py
-
label
:
Multi-Modal Accuracy Eval (Small Models)
# 50min
timeout_in_minutes
:
70
working_dir
:
"
/vllm-workspace/.buildkite/lm-eval-harness"
source_file_dependencies
:
-
vllm/multimodal/
-
vllm/inputs/
-
vllm/v1/core/
commands
:
-
pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-mm-small.txt --tp-size=1
-
label
:
Multi-Modal Models (Extended)
1
optional
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/multimodal
commands
:
-
pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-
pytest -v -s models/multimodal -m 'not core_model' --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/processing
-
label
:
Multi-Modal Models (Extended)
2
optional
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/multimodal
commands
:
-
pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-
pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model'
-
label
:
Multi-Modal Models (Extended)
3
optional
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/multimodal
commands
:
-
pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-
pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model'
# This test is used only in PR development phase to test individual models and should never run on main
-
label
:
Custom Models
optional
:
true
commands
:
-
echo 'Testing custom models...'
# PR authors can temporarily add commands below to test individual models
# e.g. pytest -v -s models/encoder_decoder/vision_language/test_mllama.py
# *To avoid merge conflicts, remember to REMOVE (not just comment out) them before merging the PR*
.buildkite/test_areas/plugins.yaml
0 → 100644
View file @
db14f61f
group
:
Plugins
depends_on
:
-
image-build
steps
:
-
label
:
Plugin Tests (2 GPUs)
timeout_in_minutes
:
60
working_dir
:
"
/vllm-workspace/tests"
num_gpus
:
2
source_file_dependencies
:
-
vllm/plugins/
-
tests/plugins/
commands
:
# begin platform plugin and general plugin tests, all the code in-between runs on dummy platform
-
pip install -e ./plugins/vllm_add_dummy_platform
-
pytest -v -s plugins_tests/test_platform_plugins.py
-
pip uninstall vllm_add_dummy_platform -y
# end platform plugin tests
# begin io_processor plugins test, all the code in between uses the prithvi_io_processor plugin
-
pip install -e ./plugins/prithvi_io_processor_plugin
-
pytest -v -s plugins_tests/test_io_processor_plugins.py
-
pip uninstall prithvi_io_processor_plugin -y
# end io_processor plugins test
# begin stat_logger plugins test
-
pip install -e ./plugins/vllm_add_dummy_stat_logger
-
pytest -v -s plugins_tests/test_stats_logger_plugins.py
-
pip uninstall dummy_stat_logger -y
# end stat_logger plugins test
# other tests continue here:
-
pytest -v -s plugins_tests/test_scheduler_plugins.py
-
pip install -e ./plugins/vllm_add_dummy_model
-
pytest -v -s distributed/test_distributed_oot.py
-
pytest -v -s entrypoints/openai/test_oot_registration.py
# it needs a clean process
-
pytest -v -s models/test_oot_registration.py
# it needs a clean process
-
pytest -v -s plugins/lora_resolvers
# unit tests for in-tree lora resolver plugins
.buildkite/test_areas/pytorch.yaml
0 → 100644
View file @
db14f61f
group
:
PyTorch
depends_on
:
-
image-build
steps
:
-
label
:
PyTorch Compilation Unit Tests
timeout_in_minutes
:
30
source_file_dependencies
:
-
vllm/
-
tests/compile
commands
:
# Run unit tests defined directly under compile/,
# not including subdirectories, which are usually heavier
# tests covered elsewhere.
# Use `find` to launch multiple instances of pytest so that
# they do not suffer from https://github.com/vllm-project/vllm/issues/28965
-
"
find
compile/
-maxdepth
1
-name
'test_*.py'
-exec
pytest
-s
-v
{}
\\
;"
-
label
:
PyTorch Fullgraph Smoke Test
timeout_in_minutes
:
30
source_file_dependencies
:
-
vllm/
-
tests/compile
commands
:
# Run smoke tests under fullgraph directory, except test_full_graph.py
# as it is a heavy test that is covered in other steps.
# Use `find` to launch multiple instances of pytest so that
# they do not suffer from https://github.com/vllm-project/vllm/issues/28965
-
"
find
compile/fullgraph/
-name
'test_*.py'
-not
-name
'test_full_graph.py'
-exec
pytest
-s
-v
{}
\\
;"
-
label
:
PyTorch Fullgraph
timeout_in_minutes
:
40
source_file_dependencies
:
-
vllm/
-
tests/compile
commands
:
# fp8 kv scales not supported on sm89, tested on Blackwell instead
-
pytest -v -s compile/fullgraph/test_full_graph.py -k 'not test_fp8_kv_scale_compile'
# Limit to no custom ops to reduce running time
# Wrap with quotes to escape yaml and avoid starting -k string with a -
-
"
pytest
-v
-s
compile/distributed/test_fusions_e2e.py
-k
'TRITON
and
not
+quant_fp8
and
not
Llama-4'"
-
label
:
Pytorch Nightly Dependency Override Check
# 2min
# if this test fails, it means the nightly torch version is not compatible with some
# of the dependencies. Please check the error message and add the package to whitelist
# in /vllm/tools/pre_commit/generate_nightly_torch_test.py
soft_fail
:
true
source_file_dependencies
:
-
requirements/nightly_torch_test.txt
commands
:
-
bash standalone_tests/pytorch_nightly_dependency.sh
\ No newline at end of file
.buildkite/test_areas/quantization.yaml
0 → 100644
View file @
db14f61f
group
:
Quantization
depends_on
:
-
image-build
steps
:
-
label
:
Quantization
timeout_in_minutes
:
90
source_file_dependencies
:
-
csrc/
-
vllm/model_executor/layers/quantization
-
tests/quantization
commands
:
# temporary install here since we need nightly, will move to requirements/test.in
# after torchao 0.12 release, and pin a working version of torchao nightly here
# since torchao nightly is only compatible with torch nightly currently
# https://github.com/pytorch/ao/issues/2919, we'll have to skip new torchao tests for now
# we can only upgrade after this is resolved
# TODO(jerryzh168): resolve the above comment
-
uv pip install --system torchao==0.13.0 --index-url https://download.pytorch.org/whl/cu129
-
uv pip install --system conch-triton-kernels
-
VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py
-
label
:
Quantized MoE Test (B200)
timeout_in_minutes
:
60
working_dir
:
"
/vllm-workspace/"
gpu
:
b200
source_file_dependencies
:
-
tests/quantization/test_blackwell_moe.py
-
vllm/model_executor/models/deepseek_v2.py
-
vllm/model_executor/models/gpt_oss.py
-
vllm/model_executor/models/llama4.py
-
vllm/model_executor/layers/fused_moe
-
vllm/model_executor/layers/quantization/compressed_tensors
-
vllm/model_executor/layers/quantization/modelopt.py
-
vllm/model_executor/layers/quantization/mxfp4.py
-
vllm/v1/attention/backends/flashinfer.py
commands
:
-
pytest -s -v tests/quantization/test_blackwell_moe.py
-
label
:
Quantized Models Test
timeout_in_minutes
:
60
source_file_dependencies
:
-
vllm/model_executor/layers/quantization
-
tests/models/quantization
commands
:
-
pytest -v -s models/quantization
.buildkite/test_areas/samplers.yaml
0 → 100644
View file @
db14f61f
group
:
Samplers
depends_on
:
-
image-build
steps
:
-
label
:
Samplers Test
timeout_in_minutes
:
75
source_file_dependencies
:
-
vllm/model_executor/layers
-
vllm/sampling_metadata.py
-
tests/samplers
-
tests/conftest.py
commands
:
-
pytest -v -s samplers
-
VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers
.buildkite/test_areas/tool_use.yaml
0 → 100644
View file @
db14f61f
group
:
Tool use
depends_on
:
-
image-build
steps
:
-
label
:
OpenAI-Compatible Tool Use
timeout_in_minutes
:
35
mirror_hardwares
:
[
amdexperimental
]
fast_check
:
false
source_file_dependencies
:
-
vllm/
-
tests/tool_use
commands
:
-
pytest -v -s -m 'not cpu_test' tool_use
-
label
:
OpenAI-Compatible Tool Use (CPU)
depends_on
:
~
timeout_in_minutes
:
10
source_file_dependencies
:
-
vllm/
-
tests/tool_use
no_gpu
:
true
commands
:
-
pytest -v -s -m 'cpu_test' tool_use
.buildkite/test_areas/weight_loading.yaml
0 → 100644
View file @
db14f61f
group
:
Weight Loading
depends_on
:
-
image-build
steps
:
-
label
:
Weight Loading Multiple GPU
# 33min
timeout_in_minutes
:
45
working_dir
:
"
/vllm-workspace/tests"
num_gpus
:
2
optional
:
true
source_file_dependencies
:
-
vllm/
-
tests/weight_loading
commands
:
-
bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models.txt
-
label
:
Weight Loading Multiple GPU - Large Models
# optional
working_dir
:
"
/vllm-workspace/tests"
num_gpus
:
2
gpu
:
a100
optional
:
true
source_file_dependencies
:
-
vllm/
-
tests/weight_loading
commands
:
-
bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment