Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c8c42597
Unverified
Commit
c8c42597
authored
Sep 12, 2025
by
afeldman-nm
Committed by
GitHub
Sep 12, 2025
Browse files
[CI] Speed up model unit tests in CI (#24253)
Signed-off-by:
Andrew Feldman
<
afeldman@redhat.com
>
parent
9d2a4460
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
123 additions
and
21 deletions
+123
-21
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+66
-12
pyproject.toml
pyproject.toml
+1
-0
tests/models/language/generation/test_common.py
tests/models/language/generation/test_common.py
+10
-4
tests/models/language/pooling/test_classification.py
tests/models/language/pooling/test_classification.py
+4
-1
tests/models/language/pooling/test_embedding.py
tests/models/language/pooling/test_embedding.py
+5
-2
tests/models/test_initialization.py
tests/models/test_initialization.py
+37
-2
No files found.
.buildkite/test-pipeline.yaml
View file @
c8c42597
...
...
@@ -571,36 +571,85 @@ steps:
##### models test #####
-
label
:
Basic Models Test
# 57min
timeout_in_minutes
:
7
5
-
label
:
Basic Models Test
s (Initialization)
timeout_in_minutes
:
4
5
mirror_hardwares
:
[
amdexperimental
]
torch_nightly
:
true
source_file_dependencies
:
-
vllm/
-
tests/models
-
tests/models
/test_initialization.py
commands
:
-
pytest -v -s models/test_transformers.py
-
pytest -v -s models/test_registry.py
-
pytest -v -s models/test_utils.py
-
pytest -v -s models/test_vision.py
-
pytest -v -s models/test_initialization.py
# Run a subset of model initialization tests
-
pytest -v -s models/test_initialization.py::test_can_initialize_small_subset
-
label
:
Language
Models Test (
Standard)
# 35min
-
label
:
Basic
Models Test
s
(
Extra Initialization) %N
timeout_in_minutes
:
45
mirror_hardwares
:
[
amdexperimental
]
torch_nightly
:
true
source_file_dependencies
:
-
vllm/model_executor/models/
-
tests/models/test_initialization.py
commands
:
# Only when vLLM model source is modified - test initialization of a large
# subset of supported models (the complement of the small subset in the above
# test.) Also run if model initialization test file is modified
-
pytest -v -s models/test_initialization.py \
-k 'not test_can_initialize_small_subset' \
--num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT \
--shard-id=$$BUILDKITE_PARALLEL_JOB
parallelism
:
2
-
label
:
Basic Models Tests (Other)
timeout_in_minutes
:
45
mirror_hardwares
:
[
amdexperimental
]
torch_nightly
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/test_transformers.py
-
tests/models/test_registry.py
-
tests/models/test_utils.py
-
tests/models/test_vision.py
commands
:
-
pytest -v -s models/test_transformers.py \
models/test_registry.py \
models/test_utils.py \
models/test_vision.py
-
label
:
Language Models Tests (Standard)
timeout_in_minutes
:
25
mirror_hardwares
:
[
amdexperimental
]
torch_nightly
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/language
commands
:
# Test standard language models, excluding a subset of slow tests
-
pip freeze | grep -E 'torch'
-
pytest -v -s models/language -m core_model
-
pytest -v -s models/language -m
'
core_model
and (not slow_test)'
-
label
:
Language Models Test (
Hybrid)
# 35 min
-
label
:
Language Models Test
s
(
Extra Standard) %N
timeout_in_minutes
:
45
mirror_hardwares
:
[
amdexperimental
]
torch_nightly
:
true
source_file_dependencies
:
-
vllm/model_executor/models/
-
tests/models/language/pooling/test_embedding.py
-
tests/models/language/generation/test_common.py
-
tests/models/language/pooling/test_classification.py
commands
:
# Shard slow subset of standard language models tests. Only run when model
# source is modified, or when specified test files are modified
-
pip freeze | grep -E 'torch'
-
pytest -v -s models/language -m 'core_model and slow_test' \
--num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT \
--shard-id=$$BUILDKITE_PARALLEL_JOB
parallelism
:
2
-
label
:
Language Models Tests (Hybrid) %N
timeout_in_minutes
:
75
mirror_hardwares
:
[
amdexperimental
]
torch_nightly
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/language/generation
commands
:
...
...
@@ -608,7 +657,12 @@ steps:
# Note: also needed to run plamo2 model in vLLM
-
uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.2.5'
-
uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
-
pytest -v -s models/language/generation -m hybrid_model
# Shard hybrid language model tests
-
pytest -v -s models/language/generation \
-m hybrid_model \
--num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT \
--shard-id=$$BUILDKITE_PARALLEL_JOB
parallelism
:
2
-
label
:
Language Models Test (Extended Generation)
# 80min
timeout_in_minutes
:
110
...
...
pyproject.toml
View file @
c8c42597
...
...
@@ -145,6 +145,7 @@ skip_gitignore = true
[tool.pytest.ini_options]
markers
=
[
"slow_test"
,
"skip_global_cleanup"
,
"core_model: enable this model test in each PR instead of only nightly"
,
"hybrid_model: models that contain mamba layers (including pure SSM and hybrid architectures)"
,
...
...
tests/models/language/generation/test_common.py
View file @
c8c42597
...
...
@@ -38,7 +38,7 @@ AITER_MODEL_LIST = [
[
pytest
.
param
(
"bigscience/bloom-560m"
,
# bloom - testing alibi slopes
marks
=
[
pytest
.
mark
.
core_model
],
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
slow_test
],
),
pytest
.
param
(
"openai-community/gpt2"
,
# gpt2
...
...
@@ -49,7 +49,10 @@ AITER_MODEL_LIST = [
pytest
.
param
(
"EleutherAI/pythia-70m"
),
# gpt_neox
pytest
.
param
(
"google/gemma-1.1-2b-it"
,
# gemma
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
],
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
,
pytest
.
mark
.
slow_test
],
),
pytest
.
param
(
"zai-org/chatglm3-6b"
,
# chatglm (text-only)
...
...
@@ -70,14 +73,17 @@ AITER_MODEL_LIST = [
),
pytest
.
param
(
"microsoft/phi-2"
,
# phi
marks
=
[
pytest
.
mark
.
core_model
],
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
slow_test
],
),
pytest
.
param
(
"Qwen/Qwen-7B-Chat"
,
# qwen (text-only)
),
pytest
.
param
(
"Qwen/Qwen2.5-0.5B-Instruct"
,
# qwen2
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
],
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
,
pytest
.
mark
.
slow_test
],
),
pytest
.
param
(
"Qwen/Qwen3-8B"
,
# qwen (text-only)
...
...
tests/models/language/pooling/test_classification.py
View file @
c8c42597
...
...
@@ -11,7 +11,10 @@ from vllm.platforms import current_platform
"model"
,
[
pytest
.
param
(
"jason9693/Qwen2.5-1.5B-apeach"
,
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
]),
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
,
pytest
.
mark
.
slow_test
]),
],
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
...
...
tests/models/language/pooling/test_embedding.py
View file @
c8c42597
...
...
@@ -19,7 +19,7 @@ from ...utils import check_embeddings_close
# model code with bidirectional attention.
# [Decoder-only]
pytest
.
param
(
"BAAI/bge-multilingual-gemma2"
,
marks
=
[
pytest
.
mark
.
core_model
]),
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
slow_test
]),
pytest
.
param
(
"intfloat/e5-mistral-7b-instruct"
,
# CPU v1 doesn't support sliding window
...
...
@@ -29,7 +29,10 @@ from ...utils import check_embeddings_close
# [Encoder-only]
pytest
.
param
(
"BAAI/bge-base-en-v1.5"
,
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
],
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
,
pytest
.
mark
.
slow_test
],
),
pytest
.
param
(
"sentence-transformers/all-MiniLM-L12-v2"
),
pytest
.
param
(
"intfloat/multilingual-e5-small"
),
...
...
tests/models/test_initialization.py
View file @
c8c42597
...
...
@@ -18,6 +18,26 @@ from .registry import (_TRANSFORMERS_BACKEND_MODELS, AUTO_EXAMPLE_MODELS,
HF_EXAMPLE_MODELS
,
HfExampleModels
)
from
.utils
import
dummy_hf_overrides
# This minimal list of model architectures is smaller than the total list of
# supported models. The intention is that in the "typical" regression testing
# scenario, we only test initializing these models. This subset was chosen
# to include representative examples of model varieties/workloads (conditional
# generation, sequence classification, causal LM, ranking, chat, reward model,
# multimodal, geospatial, voice, embedding, MTP)
MINIMAL_MODEL_ARCH_LIST
=
[
"LlavaForConditionalGeneration"
,
"Llama4ForConditionalGeneration"
,
"BertForSequenceClassification"
,
"Gemma3nForCausalLM"
,
"JinaVLForRanking"
,
"InternVLChatModel"
,
"InternLM2ForRewardModel"
,
"TransformersForMultimodalLM"
,
"PrithviGeoSpatialMAE"
,
"UltravoxModel"
,
"DeepSeekMTPModel"
,
"XLMRobertaModel"
]
# This list is the complement of the minimal list above. The intention is that
# this list of models is only tested in a "special case" i.e. most PRs should
# not test these models
OTHER_MODEL_ARCH_LIST
=
(
set
(
HF_EXAMPLE_MODELS
.
get_supported_archs
())
-
set
(
MINIMAL_MODEL_ARCH_LIST
))
@
create_new_process_for_each_test
()
def
can_initialize
(
model_arch
:
str
,
monkeypatch
:
pytest
.
MonkeyPatch
,
...
...
@@ -101,8 +121,23 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
max_num_seqs
=
model_info
.
max_num_seqs
)
@
pytest
.
mark
.
parametrize
(
"model_arch"
,
HF_EXAMPLE_MODELS
.
get_supported_archs
())
def
test_can_initialize
(
model_arch
:
str
,
monkeypatch
:
pytest
.
MonkeyPatch
):
@
pytest
.
mark
.
parametrize
(
"model_arch"
,
MINIMAL_MODEL_ARCH_LIST
)
def
test_can_initialize_small_subset
(
model_arch
:
str
,
monkeypatch
:
pytest
.
MonkeyPatch
):
"""Test initializing small subset of supported models"""
if
model_arch
==
"Lfm2ForCausalLM"
:
pytest
.
skip
(
"Skipping until test supports V1-only models"
)
can_initialize
(
model_arch
,
monkeypatch
,
HF_EXAMPLE_MODELS
)
@
pytest
.
mark
.
parametrize
(
"model_arch"
,
OTHER_MODEL_ARCH_LIST
)
def
test_can_initialize_large_subset
(
model_arch
:
str
,
monkeypatch
:
pytest
.
MonkeyPatch
):
"""Test initializing large subset of supported models
This test covers the complement of the tests covered in the "small subset"
test.
"""
if
model_arch
==
"Lfm2ForCausalLM"
:
pytest
.
skip
(
"Skipping until test supports V1-only models"
)
can_initialize
(
model_arch
,
monkeypatch
,
HF_EXAMPLE_MODELS
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment