Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
48e925fa
Unverified
Commit
48e925fa
authored
May 01, 2025
by
Cyrus Leung
Committed by
GitHub
May 01, 2025
Browse files
[Misc] Clean up test docstrings and names (#17521)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
1903c0b8
Changes
19
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
51 additions
and
115 deletions
+51
-115
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+8
-2
tests/models/language/generation/test_common.py
tests/models/language/generation/test_common.py
+0
-5
tests/models/language/generation/test_granite.py
tests/models/language/generation/test_granite.py
+0
-4
tests/models/language/generation/test_mistral.py
tests/models/language/generation/test_mistral.py
+0
-4
tests/models/language/generation/test_phimoe.py
tests/models/language/generation/test_phimoe.py
+0
-4
tests/models/language/pooling/test_classification.py
tests/models/language/pooling/test_classification.py
+1
-5
tests/models/language/pooling/test_embedding.py
tests/models/language/pooling/test_embedding.py
+0
-4
tests/models/language/pooling/test_jina.py
tests/models/language/pooling/test_jina.py
+3
-8
tests/models/language/pooling/test_scoring.py
tests/models/language/pooling/test_scoring.py
+22
-39
tests/models/language/pooling/test_snowflake_arctic_embed.py
tests/models/language/pooling/test_snowflake_arctic_embed.py
+0
-4
tests/models/language/pooling/test_truncation_control.py
tests/models/language/pooling/test_truncation_control.py
+12
-12
tests/models/multimodal/generation/test_pixtral.py
tests/models/multimodal/generation/test_pixtral.py
+0
-4
tests/models/multimodal/generation/test_whisper.py
tests/models/multimodal/generation/test_whisper.py
+2
-2
tests/models/quantization/test_aqlm.py
tests/models/quantization/test_aqlm.py
+0
-5
tests/models/quantization/test_bitblas.py
tests/models/quantization/test_bitblas.py
+0
-2
tests/models/quantization/test_gptq_bitblas.py
tests/models/quantization/test_gptq_bitblas.py
+0
-2
tests/models/quantization/test_gptq_marlin.py
tests/models/quantization/test_gptq_marlin.py
+2
-3
tests/models/quantization/test_gptq_marlin_24.py
tests/models/quantization/test_gptq_marlin_24.py
+0
-2
tests/models/test_transformers.py
tests/models/test_transformers.py
+1
-4
No files found.
.buildkite/test-pipeline.yaml
View file @
48e925fa
...
...
@@ -395,10 +395,8 @@ steps:
-
csrc/
-
vllm/model_executor/layers/quantization
-
tests/quantization
-
tests/models/quantization
commands
:
-
VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization
-
pytest -v -s models/quantization
-
label
:
LM Eval Small Models
# 53min
working_dir
:
"
/vllm-workspace/.buildkite/lm-eval-harness"
...
...
@@ -509,6 +507,14 @@ steps:
-
pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-
pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model'
-
label
:
Quantized Models Test
#mirror_hardwares: [amd]
source_file_dependencies
:
-
vllm/model_executor/layers/quantization
-
tests/models/quantization
commands
:
-
pytest -v -s models/quantization
# This test is used only in PR development phase to test individual models and should never run on main
-
label
:
Custom Models Test
mirror_hardwares
:
[
amd
]
...
...
tests/models/language/generation/test_
models
.py
→
tests/models/language/generation/test_
common
.py
View file @
48e925fa
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM when using greedy sampling.
Run `pytest tests/models/test_models.py`.
"""
import
pytest
import
torch
...
...
tests/models/language/generation/test_granite.py
View file @
48e925fa
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for Granite models using greedy sampling.
Run `pytest tests/models/test_granite.py`.
"""
import
pytest
from
...utils
import
check_logprobs_close
...
...
tests/models/language/generation/test_mistral.py
View file @
48e925fa
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
Run `pytest tests/models/test_mistral.py`.
"""
import
copy
import
json
...
...
tests/models/language/generation/test_phimoe.py
View file @
48e925fa
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for moe models using greedy sampling.
Run `pytest tests/models/test_phimoe.py`.
"""
import
pytest
import
torch
...
...
tests/models/language/pooling/test_cl
s_models
.py
→
tests/models/language/pooling/test_cl
assification
.py
View file @
48e925fa
# SPDX-License-Identifier: Apache-2.0
"""Compare the classification outputs of HF and vLLM models.
Run `pytest tests/models/test_cls_models.py`.
"""
import
pytest
import
torch
from
transformers
import
AutoModelForSequenceClassification
...
...
@@ -19,7 +15,7 @@ from vllm.platforms import current_platform
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
]
if
current_platform
.
is_rocm
()
else
[
"float"
])
def
test_
classification_
models
(
def
test_models
(
hf_runner
,
vllm_runner
,
example_prompts
,
...
...
tests/models/language/pooling/test_embedding.py
View file @
48e925fa
# SPDX-License-Identifier: Apache-2.0
"""Compare the embedding outputs of HF and vLLM models.
Run `pytest tests/models/embedding/language/test_embedding.py`.
"""
import
pytest
from
vllm.config
import
PoolerConfig
...
...
tests/models/language/pooling/test_jina.py
View file @
48e925fa
# SPDX-License-Identifier: Apache-2.0
# ruff: noqa: E501
"""Compare the scoring outputs of HF and vLLM models.
Run `pytest tests/models/embedding/language/test_jina.py`.
"""
import
math
import
pytest
...
...
@@ -22,9 +17,9 @@ TEXTS_2 = [
"Organic skincare for sensitive skin with aloe vera and chamomile."
,
"New makeup trends focus on bold colors and innovative techniques"
,
"Bio-Hautpflege für empfindliche Haut mit Aloe Vera und Kamille"
,
"Neue Make-up-Trends setzen auf kräftige Farben und innovative Techniken"
,
"Cuidado de la piel orgánico para piel sensible con aloe vera y manzanilla"
,
"Las nuevas tendencias de maquillaje se centran en colores vivos y técnicas innovadoras"
,
"Neue Make-up-Trends setzen auf kräftige Farben und innovative Techniken"
,
# noqa: E501
"Cuidado de la piel orgánico para piel sensible con aloe vera y manzanilla"
,
# noqa: E501
"Las nuevas tendencias de maquillaje se centran en colores vivos y técnicas innovadoras"
,
# noqa: E501
"针对敏感肌专门设计的天然有机护肤产品"
,
"新的化妆趋势注重鲜艳的颜色和创新的技巧"
,
"敏感肌のために特別に設計された天然有機スキンケア製品"
,
...
...
tests/models/language/pooling/test_scoring.py
View file @
48e925fa
# SPDX-License-Identifier: Apache-2.0
"""Compare the scoring outputs of HF and vLLM models.
Run `pytest tests/models/embedding/language/test_scoring.py`.
"""
import
math
import
pytest
import
torch
import
torch.nn.functional
as
F
MODELS
=
[
CROSS_ENCODER_
MODELS
=
[
"cross-encoder/ms-marco-MiniLM-L-6-v2"
,
# Bert
"BAAI/bge-reranker-v2-m3"
,
# Roberta
]
...
...
@@ -28,21 +24,21 @@ TEXTS_2 = [
"The capital of Germany is Berlin."
,
]
DTYPE
=
"half"
@
pytest
.
fixture
(
scope
=
"module"
,
params
=
MODELS
)
@
pytest
.
fixture
(
scope
=
"module"
,
params
=
CROSS_ENCODER_
MODELS
)
def
model_name
(
request
):
yield
request
.
param
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
def
test_llm_1_to_1
(
vllm_runner
,
hf_runner
,
model_name
,
dtype
:
str
):
def
test_cross_encoder_1_to_1
(
vllm_runner
,
hf_runner
,
model_name
):
text_pair
=
[
TEXTS_1
[
0
],
TEXTS_2
[
0
]]
with
hf_runner
(
model_name
,
dtype
=
dtype
,
is_cross_encoder
=
True
)
as
hf_model
:
with
hf_runner
(
model_name
,
dtype
=
DTYPE
,
is_cross_encoder
=
True
)
as
hf_model
:
hf_outputs
=
hf_model
.
predict
([
text_pair
]).
tolist
()
with
vllm_runner
(
model_name
,
task
=
"score"
,
dtype
=
dtype
,
with
vllm_runner
(
model_name
,
task
=
"score"
,
dtype
=
DTYPE
,
max_model_len
=
None
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
score
(
text_pair
[
0
],
text_pair
[
1
])
...
...
@@ -52,18 +48,16 @@ def test_llm_1_to_1(vllm_runner, hf_runner, model_name, dtype: str):
assert
math
.
isclose
(
hf_outputs
[
0
],
vllm_outputs
[
0
],
rel_tol
=
0.01
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
def
test_llm_1_to_N
(
vllm_runner
,
hf_runner
,
model_name
,
dtype
:
str
):
def
test_cross_encoder_1_to_N
(
vllm_runner
,
hf_runner
,
model_name
):
text_pairs
=
[
[
TEXTS_1
[
0
],
TEXTS_2
[
0
]],
[
TEXTS_1
[
0
],
TEXTS_2
[
1
]],
]
with
hf_runner
(
model_name
,
dtype
=
dtype
,
is_cross_encoder
=
True
)
as
hf_model
:
with
hf_runner
(
model_name
,
dtype
=
DTYPE
,
is_cross_encoder
=
True
)
as
hf_model
:
hf_outputs
=
hf_model
.
predict
(
text_pairs
).
tolist
()
with
vllm_runner
(
model_name
,
task
=
"score"
,
dtype
=
dtype
,
with
vllm_runner
(
model_name
,
task
=
"score"
,
dtype
=
DTYPE
,
max_model_len
=
None
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
score
(
TEXTS_1
[
0
],
TEXTS_2
)
...
...
@@ -74,18 +68,16 @@ def test_llm_1_to_N(vllm_runner, hf_runner, model_name, dtype: str):
assert
math
.
isclose
(
hf_outputs
[
1
],
vllm_outputs
[
1
],
rel_tol
=
0.01
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
def
test_llm_N_to_N
(
vllm_runner
,
hf_runner
,
model_name
,
dtype
:
str
):
def
test_cross_encoder_N_to_N
(
vllm_runner
,
hf_runner
,
model_name
):
text_pairs
=
[
[
TEXTS_1
[
0
],
TEXTS_2
[
0
]],
[
TEXTS_1
[
1
],
TEXTS_2
[
1
]],
]
with
hf_runner
(
model_name
,
dtype
=
dtype
,
is_cross_encoder
=
True
)
as
hf_model
:
with
hf_runner
(
model_name
,
dtype
=
DTYPE
,
is_cross_encoder
=
True
)
as
hf_model
:
hf_outputs
=
hf_model
.
predict
(
text_pairs
).
tolist
()
with
vllm_runner
(
model_name
,
task
=
"score"
,
dtype
=
dtype
,
with
vllm_runner
(
model_name
,
task
=
"score"
,
dtype
=
DTYPE
,
max_model_len
=
None
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
score
(
TEXTS_1
,
TEXTS_2
)
...
...
@@ -101,13 +93,10 @@ def emb_model_name(request):
yield
request
.
param
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
def
test_llm_1_to_1_embedding
(
vllm_runner
,
hf_runner
,
emb_model_name
,
dtype
:
str
):
def
test_embedding_1_to_1
(
vllm_runner
,
hf_runner
,
emb_model_name
):
text_pair
=
[
TEXTS_1
[
0
],
TEXTS_2
[
0
]]
with
hf_runner
(
emb_model_name
,
dtype
=
dtype
,
with
hf_runner
(
emb_model_name
,
dtype
=
DTYPE
,
is_sentence_transformer
=
True
)
as
hf_model
:
hf_embeddings
=
hf_model
.
encode
(
text_pair
)
hf_outputs
=
[
...
...
@@ -116,7 +105,7 @@ def test_llm_1_to_1_embedding(vllm_runner, hf_runner, emb_model_name,
with
vllm_runner
(
emb_model_name
,
task
=
"embed"
,
dtype
=
dtype
,
dtype
=
DTYPE
,
max_model_len
=
None
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
score
(
text_pair
[
0
],
text_pair
[
1
])
...
...
@@ -126,16 +115,13 @@ def test_llm_1_to_1_embedding(vllm_runner, hf_runner, emb_model_name,
assert
math
.
isclose
(
hf_outputs
[
0
],
vllm_outputs
[
0
],
rel_tol
=
0.01
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
def
test_llm_1_to_N_embedding
(
vllm_runner
,
hf_runner
,
emb_model_name
,
dtype
:
str
):
def
test_embedding_1_to_N
(
vllm_runner
,
hf_runner
,
emb_model_name
):
text_pairs
=
[
[
TEXTS_1
[
0
],
TEXTS_2
[
0
]],
[
TEXTS_1
[
0
],
TEXTS_2
[
1
]],
]
with
hf_runner
(
emb_model_name
,
dtype
=
dtype
,
with
hf_runner
(
emb_model_name
,
dtype
=
DTYPE
,
is_sentence_transformer
=
True
)
as
hf_model
:
hf_embeddings
=
[
hf_model
.
encode
(
text_pair
)
for
text_pair
in
text_pairs
...
...
@@ -147,7 +133,7 @@ def test_llm_1_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
with
vllm_runner
(
emb_model_name
,
task
=
"embed"
,
dtype
=
dtype
,
dtype
=
DTYPE
,
max_model_len
=
None
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
score
(
TEXTS_1
[
0
],
TEXTS_2
)
...
...
@@ -158,16 +144,13 @@ def test_llm_1_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
assert
math
.
isclose
(
hf_outputs
[
1
],
vllm_outputs
[
1
],
rel_tol
=
0.01
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
def
test_llm_N_to_N_embedding
(
vllm_runner
,
hf_runner
,
emb_model_name
,
dtype
:
str
):
def
test_embedding_N_to_N
(
vllm_runner
,
hf_runner
,
emb_model_name
):
text_pairs
=
[
[
TEXTS_1
[
0
],
TEXTS_2
[
0
]],
[
TEXTS_1
[
1
],
TEXTS_2
[
1
]],
]
with
hf_runner
(
emb_model_name
,
dtype
=
dtype
,
with
hf_runner
(
emb_model_name
,
dtype
=
DTYPE
,
is_sentence_transformer
=
True
)
as
hf_model
:
hf_embeddings
=
[
hf_model
.
encode
(
text_pair
)
for
text_pair
in
text_pairs
...
...
@@ -179,7 +162,7 @@ def test_llm_N_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
with
vllm_runner
(
emb_model_name
,
task
=
"embed"
,
dtype
=
dtype
,
dtype
=
DTYPE
,
max_model_len
=
None
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
score
(
TEXTS_1
,
TEXTS_2
)
...
...
tests/models/language/pooling/test_snowflake_arctic_embed.py
View file @
48e925fa
# SPDX-License-Identifier: Apache-2.0
"""Compare the embedding outputs of HF and vLLM models.
Run `pytest tests/models/embedding/language/test_snowflake_arctic_embed.py`.
"""
import
pytest
from
...utils
import
EmbedModelInfo
,
check_embeddings_close
...
...
tests/models/language/pooling/test_truncation_control.py
View file @
48e925fa
...
...
@@ -5,18 +5,18 @@ MODEL_NAME = "sentence-transformers/all-MiniLM-L12-v2"
max_model_len
=
128
input_str
=
"""Immerse yourself in the enchanting chronicle of calculus, a
mathematical domain that has radically transformed our comprehension of
change and motion. Despite its roots in ancient civilizations, the
formal birth of calculus predominantly occurred in the 17th century,
primarily under the influential guidance of Sir Isaac Newton and Gottfried
Wilhelm Leibniz. The earliest traces of calculus concepts are found in
ancient Greek mathematics,most notably in the works of Eudoxus and
Archimedes, around 300 BCE. They utilized the 'method of exhaustion'—a
technique for computing areas and volumes through the use of finite sums.
This methodology laid crucial foundational work for integral calculus.
In the 17th century, both Newton and Leibniz independently pioneered
calculus, each contributing unique perspectives that would shape this new
field."""
mathematical domain that has radically transformed our comprehension of
change and motion. Despite its roots in ancient civilizations, the
formal birth of calculus predominantly occurred in the 17th century,
primarily under the influential guidance of Sir Isaac Newton and Gottfried
Wilhelm Leibniz. The earliest traces of calculus concepts are found in
ancient Greek mathematics,most notably in the works of Eudoxus and
Archimedes, around 300 BCE. They utilized the 'method of exhaustion'—a
technique for computing areas and volumes through the use of finite sums.
This methodology laid crucial foundational work for integral calculus.
In the 17th century, both Newton and Leibniz independently pioneered
calculus, each contributing unique perspectives that would shape this new
field."""
def
test_smaller_truncation_size
(
vllm_runner
,
...
...
tests/models/multimodal/generation/test_pixtral.py
View file @
48e925fa
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
Run `pytest tests/models/test_mistral.py`.
"""
import
json
from
dataclasses
import
asdict
from
typing
import
TYPE_CHECKING
,
Any
,
Optional
...
...
tests/models/multimodal/generation/test_whisper.py
View file @
48e925fa
...
...
@@ -119,10 +119,10 @@ def run_test(
assert
output
.
outputs
[
0
].
text
==
expected
@
create_new_process_for_each_test
(
"spawn"
)
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"openai/whisper-small"
,
"openai/whisper-large-v3-turbo"
])
@
create_new_process_for_each_test
()
def
test_models
(
vllm_runner
,
model
)
->
None
:
run_test
(
vllm_runner
,
...
...
@@ -131,11 +131,11 @@ def test_models(vllm_runner, model) -> None:
)
@
create_new_process_for_each_test
(
"spawn"
)
@
multi_gpu_test
(
num_gpus
=
2
)
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"openai/whisper-large-v3-turbo"
])
@
pytest
.
mark
.
parametrize
(
"distributed_executor_backend"
,
[
"ray"
,
"mp"
])
@
create_new_process_for_each_test
()
def
test_models_distributed
(
vllm_runner
,
model
,
...
...
tests/models/quantization/test_aqlm.py
View file @
48e925fa
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of a AQLM model between vLLM and HF Transformers
Run `pytest tests/models/test_aqlm.py`.
"""
import
pytest
from
tests.quantization.utils
import
is_quant_method_supported
...
...
tests/models/quantization/test_bitblas.py
View file @
48e925fa
...
...
@@ -8,8 +8,6 @@ bitblas/GPTQ models are in the top 3 selections of each other.
Note: bitblas internally uses locks to synchronize the threads. This can
result in very slight nondeterminism for bitblas. As a result, we re-run the
test up to 3 times to see if we pass.
Run `pytest tests/models/test_bitblas.py`.
"""
from
dataclasses
import
dataclass
...
...
tests/models/quantization/test_gptq_bitblas.py
View file @
48e925fa
...
...
@@ -8,8 +8,6 @@ bitblas/GPTQ models are in the top 3 selections of each other.
Note: bitblas internally uses locks to synchronize the threads. This can
result in very slight nondeterminism for bitblas. As a result, we re-run the
test up to 3 times to see if we pass.
Run `pytest tests/models/test_bitblas.py`.
"""
from
dataclasses
import
dataclass
...
...
tests/models/quantization/test_gptq_marlin.py
View file @
48e925fa
# SPDX-License-Identifier: Apache-2.0
"""Compares the outputs of gptq vs gptq_marlin
"""Compares the outputs of gptq vs gptq_marlin.
Note: GPTQ and Marlin do not have bitwise correctness.
As a result, in this test, we just confirm that the top selected tokens of the
Marlin/GPTQ models are in the top 5 selections of each other.
Note: Marlin internally uses locks to synchronize the threads. This can
result in very slight nondeterminism for Marlin. As a result, we re-run the test
up to 3 times to see if we pass.
Run `pytest tests/models/test_gptq_marlin.py`.
"""
import
os
...
...
tests/models/quantization/test_gptq_marlin_24.py
View file @
48e925fa
...
...
@@ -4,8 +4,6 @@
Note: GPTQ and Marlin_24 do not have bitwise correctness.
As a result, in this test, we just confirm that the top selected tokens of the
Marlin/GPTQ models are in the top 3 selections of each other.
Run `pytest tests/models/test_marlin_24.py`.
"""
from
dataclasses
import
dataclass
...
...
tests/models/test_transformers.py
View file @
48e925fa
# SPDX-License-Identifier: Apache-2.0
"""Test the functionality of the Transformers backend.
Run `pytest tests/models/test_transformers.py`.
"""
"""Test the functionality of the Transformers backend."""
import
pytest
from
..conftest
import
HfRunner
,
VllmRunner
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment