Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
fc67613a
Commit
fc67613a
authored
Apr 18, 2026
by
zhuwenwen
Browse files
Merge tag 'v0.19.1' into v0.19.0
parents
31aec25b
b1388b1f
Changes
82
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1070 additions
and
44 deletions
+1070
-44
requirements/test.txt
requirements/test.txt
+10
-10
requirements/test/xpu.txt
requirements/test/xpu.txt
+736
-0
requirements/xpu-test.in
requirements/xpu-test.in
+2
-0
tests/conftest.py
tests/conftest.py
+9
-0
tests/kernels/core/test_minimax_reduce_rms.py
tests/kernels/core/test_minimax_reduce_rms.py
+152
-0
tests/lora/conftest.py
tests/lora/conftest.py
+13
-0
tests/lora/test_lora_checkpoints.py
tests/lora/test_lora_checkpoints.py
+23
-0
tests/lora/test_minicpmv_tp.py
tests/lora/test_minicpmv_tp.py
+11
-0
tests/model_executor/test_weight_utils.py
tests/model_executor/test_weight_utils.py
+0
-18
tests/models/language/generation/test_common.py
tests/models/language/generation/test_common.py
+5
-0
tests/models/language/pooling/test_colbert.py
tests/models/language/pooling/test_colbert.py
+8
-0
tests/models/language/pooling/test_nomic_max_model_len.py
tests/models/language/pooling/test_nomic_max_model_len.py
+39
-7
tests/models/language/pooling_mteb_test/mteb_embed_utils.py
tests/models/language/pooling_mteb_test/mteb_embed_utils.py
+2
-0
tests/models/language/pooling_mteb_test/mteb_score_utils.py
tests/models/language/pooling_mteb_test/mteb_score_utils.py
+4
-1
tests/models/language/pooling_mteb_test/test_baai.py
tests/models/language/pooling_mteb_test/test_baai.py
+4
-1
tests/models/language/pooling_mteb_test/test_gte.py
tests/models/language/pooling_mteb_test/test_gte.py
+2
-1
tests/models/language/pooling_mteb_test/test_jina.py
tests/models/language/pooling_mteb_test/test_jina.py
+4
-0
tests/models/language/pooling_mteb_test/test_nomic.py
tests/models/language/pooling_mteb_test/test_nomic.py
+4
-0
tests/models/multimodal/generation/test_common.py
tests/models/multimodal/generation/test_common.py
+38
-6
tests/models/multimodal/generation/test_nemotron_parse.py
tests/models/multimodal/generation/test_nemotron_parse.py
+4
-0
No files found.
requirements/test.txt
View file @
fc67613a
...
@@ -4,7 +4,7 @@ absl-py==2.1.0
...
@@ -4,7 +4,7 @@ absl-py==2.1.0
# via
# via
# rouge-score
# rouge-score
# tensorboard
# tensorboard
accelerate==1.
0.1
accelerate==1.
13.0
# via peft
# via peft
aenum==3.1.16
aenum==3.1.16
# via lightly
# via lightly
...
@@ -240,7 +240,6 @@ filelock==3.16.1
...
@@ -240,7 +240,6 @@ filelock==3.16.1
# huggingface-hub
# huggingface-hub
# ray
# ray
# torch
# torch
# transformers
# virtualenv
# virtualenv
fiona==1.10.1
fiona==1.10.1
# via torchgeo
# via torchgeo
...
@@ -323,7 +322,7 @@ h5py==3.13.0
...
@@ -323,7 +322,7 @@ h5py==3.13.0
# via terratorch
# via terratorch
harfile==0.3.0
harfile==0.3.0
# via schemathesis
# via schemathesis
hf-xet==1.
1.7
hf-xet==1.
4.3
# via huggingface-hub
# via huggingface-hub
hiredis==3.0.0
hiredis==3.0.0
# via tensorizer
# via tensorizer
...
@@ -337,9 +336,10 @@ httpx==0.27.2
...
@@ -337,9 +336,10 @@ httpx==0.27.2
# via
# via
# -r requirements/test.in
# -r requirements/test.in
# diffusers
# diffusers
# huggingface-hub
# perceptron
# perceptron
# schemathesis
# schemathesis
huggingface-hub==
0.36
.2
huggingface-hub==
1.10
.2
# via
# via
# accelerate
# accelerate
# datasets
# datasets
...
@@ -740,7 +740,7 @@ pathvalidate==3.2.1
...
@@ -740,7 +740,7 @@ pathvalidate==3.2.1
# via pytablewriter
# via pytablewriter
patsy==1.0.1
patsy==1.0.1
# via statsmodels
# via statsmodels
peft==0.1
6.0
peft==0.1
8.1
# via -r requirements/test.in
# via -r requirements/test.in
perceptron==0.1.4
perceptron==0.1.4
# via -r requirements/test.in
# via -r requirements/test.in
...
@@ -963,7 +963,7 @@ referencing==0.35.1
...
@@ -963,7 +963,7 @@ referencing==0.35.1
# via
# via
# jsonschema
# jsonschema
# jsonschema-specifications
# jsonschema-specifications
regex==202
4.9.11
regex==202
6.2.28
# via
# via
# diffusers
# diffusers
# nltk
# nltk
...
@@ -982,7 +982,6 @@ requests==2.32.3
...
@@ -982,7 +982,6 @@ requests==2.32.3
# google-api-core
# google-api-core
# google-cloud-storage
# google-cloud-storage
# gpt-oss
# gpt-oss
# huggingface-hub
# lightly
# lightly
# lm-eval
# lm-eval
# mistral-common
# mistral-common
...
@@ -995,7 +994,6 @@ requests==2.32.3
...
@@ -995,7 +994,6 @@ requests==2.32.3
# starlette-testclient
# starlette-testclient
# tacoreader
# tacoreader
# tiktoken
# tiktoken
# transformers
# wandb
# wandb
resampy==0.4.3
resampy==0.4.3
# via -r requirements/test.in
# via -r requirements/test.in
...
@@ -1193,7 +1191,7 @@ timm==1.0.17
...
@@ -1193,7 +1191,7 @@ timm==1.0.17
# segmentation-models-pytorch
# segmentation-models-pytorch
# terratorch
# terratorch
# torchgeo
# torchgeo
tokenizers==0.22.
0
tokenizers==0.22.
2
# via
# via
# -r requirements/test.in
# -r requirements/test.in
# transformers
# transformers
...
@@ -1269,7 +1267,7 @@ tqdm==4.67.3
...
@@ -1269,7 +1267,7 @@ tqdm==4.67.3
# tacoreader
# tacoreader
# terratorch
# terratorch
# transformers
# transformers
transformers==
4
.5
7.5
transformers==
5
.5
.3
# via
# via
# -r requirements/test.in
# -r requirements/test.in
# genai-perf
# genai-perf
...
@@ -1290,7 +1288,9 @@ typepy==1.3.2
...
@@ -1290,7 +1288,9 @@ typepy==1.3.2
typer==0.15.2
typer==0.15.2
# via
# via
# fastsafetensors
# fastsafetensors
# huggingface-hub
# perceptron
# perceptron
# transformers
types-python-dateutil==2.9.0.20241206
types-python-dateutil==2.9.0.20241206
# via arrow
# via arrow
typeshed-client==2.8.2
typeshed-client==2.8.2
...
...
requirements/test/xpu.txt
0 → 100644
View file @
fc67613a
# This file was autogenerated by uv via the following command:
# uv pip compile requirements/test/xpu.in -c requirements/xpu.txt -o requirements/test/xpu.txt --index-strategy unsafe-best-match --torch-backend xpu --python-platform x86_64-manylinux_2_39 --python-version 3.12
absl-py==2.4.0
# via
# -r requirements/test/xpu.in
# rouge-score
accelerate==1.13.0
# via -r requirements/test/xpu.in
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.13.4
# via
# -c requirements/common.txt
# fsspec
# gpt-oss
# lm-eval
aiosignal==1.4.0
# via aiohttp
albumentations==1.4.6
# via -r requirements/test/xpu.in
annotated-doc==0.0.4
# via
# fastapi
# typer
annotated-types==0.7.0
# via pydantic
anyio==4.13.0
# via
# httpx
# starlette
arctic-inference==0.1.1
# via -r requirements/test/xpu.in
attrs==26.1.0
# via
# aiohttp
# jsonlines
# jsonschema
# referencing
audioread==3.0.1
# via
# -r requirements/test/xpu.in
# librosa
blobfile==3.0.0
# via -r requirements/test/xpu.in
bm25s==0.2.13
# via
# -r requirements/test/xpu.in
# mteb
bounded-pool-executor==0.0.3
# via pqdm
certifi==2026.2.25
# via
# httpcore
# httpx
# requests
cffi==2.0.0
# via soundfile
chardet==5.2.0
# via mbstrdecoder
charset-normalizer==3.4.6
# via requests
chz==0.4.0
# via gpt-oss
click==8.3.1
# via
# jiwer
# nltk
# schemathesis
# typer
# uvicorn
colorama==0.4.6
# via sacrebleu
coverage==7.13.5
# via pytest-cov
dataproperty==1.1.0
# via
# pytablewriter
# tabledata
datasets==4.8.4
# via
# evaluate
# lm-eval
# mteb
decorator==5.2.1
# via librosa
dill==0.4.1
# via
# datasets
# evaluate
# lm-eval
# multiprocess
docker==7.1.0
# via gpt-oss
docopt==0.6.2
# via num2words
dpcpp-cpp-rt==2025.3.1
# via
# onemkl-sycl-blas
# onemkl-sycl-dft
# onemkl-sycl-lapack
# onemkl-sycl-rng
# onemkl-sycl-sparse
# torch
evaluate==0.4.6
# via lm-eval
fastapi==0.135.2
# via
# -c requirements/common.txt
# gpt-oss
filelock==3.25.2
# via
# -c requirements/common.txt
# blobfile
# datasets
# huggingface-hub
# modelscope
# torch
frozenlist==1.8.0
# via
# aiohttp
# aiosignal
fsspec==2026.2.0
# via
# datasets
# evaluate
# huggingface-hub
# torch
gpt-oss==0.0.8
# via -r requirements/test/xpu.in
graphql-core==3.2.8
# via hypothesis-graphql
h11==0.16.0
# via
# httpcore
# uvicorn
harfile==0.4.0
# via schemathesis
hf-xet==1.4.3
# via huggingface-hub
html2text==2025.4.15
# via gpt-oss
httpcore==1.0.9
# via httpx
httpx==0.28.1
# via
# datasets
# huggingface-hub
# schemathesis
huggingface-hub==1.10.2
# via
# accelerate
# datasets
# evaluate
# sentence-transformers
# timm
# tokenizers
# transformers
hypothesis==6.151.10
# via
# hypothesis-graphql
# hypothesis-jsonschema
# schemathesis
hypothesis-graphql==0.12.0
# via schemathesis
hypothesis-jsonschema==0.23.1
# via schemathesis
idna==3.11
# via
# anyio
# httpx
# requests
# yarl
imageio==2.37.3
# via scikit-image
impi-rt==2021.17.0
# via
# oneccl
# torch
iniconfig==2.3.0
# via pytest
intel-cmplr-lib-rt==2025.3.1
# via
# intel-sycl-rt
# torch
intel-cmplr-lib-ur==2025.3.1
# via
# intel-openmp
# intel-sycl-rt
# torch
intel-cmplr-lic-rt==2025.3.1
# via
# intel-opencl-rt
# intel-sycl-rt
# torch
intel-opencl-rt==2025.3.1
# via
# dpcpp-cpp-rt
# onemkl-sycl-blas
# onemkl-sycl-dft
# onemkl-sycl-lapack
# onemkl-sycl-rng
# onemkl-sycl-sparse
# torch
intel-openmp==2025.3.1
# via
# dpcpp-cpp-rt
# mkl
# torch
intel-pti==0.15.0
# via torch
intel-sycl-rt==2025.3.1
# via
# dpcpp-cpp-rt
# oneccl
# torch
jinja2==3.1.6
# via
# -c requirements/xpu.txt
# lm-eval
# torch
jiwer==4.0.0
# via -r requirements/test/xpu.in
joblib==1.5.3
# via
# librosa
# nltk
# scikit-learn
jsonlines==4.0.0
# via lm-eval
jsonschema==4.26.0
# via
# hypothesis-jsonschema
# mistral-common
# schemathesis
jsonschema-rs==0.45.0
# via schemathesis
jsonschema-specifications==2025.9.1
# via jsonschema
junit-xml==1.9
# via schemathesis
lazy-loader==0.5
# via
# librosa
# scikit-image
librosa==0.10.2.post1
# via -r requirements/test/xpu.in
llvmlite==0.44.0
# via numba
lm-eval==0.4.11
# via -r requirements/test/xpu.in
lxml==6.0.2
# via
# blobfile
# gpt-oss
# sacrebleu
markdown-it-py==4.0.0
# via rich
markupsafe==3.0.3
# via
# jinja2
# werkzeug
mbstrdecoder==1.1.4
# via
# dataproperty
# pytablewriter
# typepy
mdurl==0.1.2
# via markdown-it-py
mistral-common==1.11.0
# via
# -c requirements/common.txt
# -r requirements/test/xpu.in
mkl==2025.3.0
# via
# onemkl-sycl-blas
# onemkl-sycl-dft
# onemkl-sycl-lapack
# onemkl-sycl-rng
# onemkl-sycl-sparse
# torch
modelscope==1.35.3
# via -r requirements/test/xpu.in
more-itertools==10.8.0
# via lm-eval
mpmath==1.3.0
# via sympy
msgpack==1.1.2
# via librosa
mteb==2.12.7
# via -r requirements/test/xpu.in
multidict==6.7.1
# via
# aiohttp
# yarl
multiprocess==0.70.19
# via
# datasets
# evaluate
networkx==3.6.1
# via
# scikit-image
# torch
nltk==3.9.4
# via rouge-score
num2words==0.5.14
# via -r requirements/test/xpu.in
numba==0.61.2
# via
# -c requirements/xpu.txt
# librosa
numpy==2.2.6
# via
# accelerate
# albumentations
# bm25s
# datasets
# evaluate
# imageio
# librosa
# lm-eval
# mistral-common
# mteb
# numba
# opencv-python-headless
# pandas
# pytrec-eval-terrier
# rouge-score
# sacrebleu
# scikit-image
# scikit-learn
# scipy
# sentence-transformers
# soundfile
# soxr
# tifffile
# torchvision
# transformers
oneccl==2021.17.1
# via
# oneccl-devel
# torch
oneccl-devel==2021.17.1
# via torch
onemkl-license==2025.3.0
# via
# mkl
# torch
onemkl-sycl-blas==2025.3.0
# via
# onemkl-sycl-lapack
# onemkl-sycl-sparse
# torch
onemkl-sycl-dft==2025.3.0
# via torch
onemkl-sycl-lapack==2025.3.0
# via torch
onemkl-sycl-rng==2025.3.0
# via torch
onemkl-sycl-sparse==2025.3.0
# via torch
openai-harmony==0.0.8
# via
# -c requirements/common.txt
# gpt-oss
opencv-python-headless==4.13.0.92
# via
# -c requirements/common.txt
# albumentations
# mistral-common
packaging==26.0
# via
# -c requirements/xpu.txt
# accelerate
# datasets
# evaluate
# huggingface-hub
# lazy-loader
# modelscope
# pooch
# pytest
# pytest-rerunfailures
# scikit-image
# transformers
# typepy
pandas==3.0.1
# via
# datasets
# evaluate
pathvalidate==3.3.1
# via pytablewriter
pillow==12.1.1
# via
# imageio
# mistral-common
# scikit-image
# torchvision
platformdirs==4.9.4
# via pooch
pluggy==1.6.0
# via
# pytest
# pytest-cov
polars==1.39.3
# via mteb
polars-runtime-32==1.39.3
# via polars
pooch==1.8.2
# via
# -r requirements/test/xpu.in
# librosa
portalocker==3.2.0
# via sacrebleu
pqdm==0.2.0
# via -r requirements/test/xpu.in
propcache==0.4.1
# via
# aiohttp
# yarl
psutil==7.2.2
# via accelerate
py==1.11.0
# via pytest-forked
pyarrow==23.0.1
# via datasets
pycountry==26.2.16
# via pydantic-extra-types
pycparser==3.0
# via cffi
pycryptodomex==3.23.0
# via blobfile
pydantic==2.12.5
# via
# -c requirements/common.txt
# albumentations
# fastapi
# gpt-oss
# mistral-common
# mteb
# openai-harmony
# pydantic-extra-types
pydantic-core==2.41.5
# via pydantic
pydantic-extra-types==2.11.1
# via mistral-common
pyelftools==0.32
# via triton-xpu
pygments==2.20.0
# via
# pytest
# rich
pyrate-limiter==4.1.0
# via schemathesis
pystemmer==3.0.0
# via
# -r requirements/test/xpu.in
# mteb
pytablewriter==1.2.1
# via lm-eval
pytest==9.0.2
# via
# -r requirements/test/xpu.in
# pytest-asyncio
# pytest-cov
# pytest-forked
# pytest-rerunfailures
# pytest-shard
# pytest-timeout
# schemathesis
pytest-asyncio==1.3.0
# via -r requirements/test/xpu.in
pytest-cov==6.3.0
# via -r requirements/test/xpu.in
pytest-forked==1.6.0
# via -r requirements/test/xpu.in
pytest-rerunfailures==14.0
# via -r requirements/test/xpu.in
pytest-shard==0.1.2
# via -r requirements/test/xpu.in
pytest-timeout==2.3.1
# via -r requirements/test/xpu.in
python-dateutil==2.9.0.post0
# via
# pandas
# typepy
pytrec-eval-terrier==0.5.10
# via mteb
pytz==2026.1.post1
# via typepy
pyyaml==6.0.3
# via
# accelerate
# albumentations
# datasets
# huggingface-hub
# schemathesis
# timm
# transformers
rapidfuzz==3.12.1
# via
# -r requirements/test/xpu.in
# jiwer
referencing==0.37.0
# via
# jsonschema
# jsonschema-specifications
regex==2026.3.32
# via
# nltk
# sacrebleu
# tiktoken
# transformers
requests==2.33.1
# via
# -c requirements/common.txt
# datasets
# docker
# evaluate
# gpt-oss
# lm-eval
# mistral-common
# modelscope
# mteb
# pooch
# schemathesis
# starlette-testclient
# tiktoken
rich==14.3.3
# via
# mteb
# schemathesis
# typer
rouge-score==0.1.2
# via lm-eval
rpds-py==0.30.0
# via
# jsonschema
# referencing
sacrebleu==2.6.0
# via lm-eval
safetensors==0.7.0
# via
# accelerate
# timm
# transformers
schemathesis==4.14.2
# via -r requirements/test/xpu.in
scikit-image==0.26.0
# via albumentations
scikit-learn==1.8.0
# via
# albumentations
# librosa
# lm-eval
# mteb
# sentence-transformers
scipy==1.17.1
# via
# albumentations
# bm25s
# librosa
# mteb
# pytrec-eval-terrier
# scikit-image
# scikit-learn
# sentence-transformers
sentence-transformers==5.3.0
# via mteb
setuptools==80.10.2
# via
# -c requirements/common.txt
# -c requirements/xpu.txt
# modelscope
# pytablewriter
# torch
shellingham==1.5.4
# via typer
six==1.17.0
# via
# -c requirements/common.txt
# junit-xml
# python-dateutil
# rouge-score
sortedcontainers==2.4.0
# via hypothesis
soundfile==0.13.1
# via
# -r requirements/test/xpu.in
# librosa
# mistral-common
soxr==0.5.0.post1
# via
# -r requirements/test/xpu.in
# librosa
# mistral-common
sqlitedict==2.1.0
# via lm-eval
starlette==1.0.0
# via
# fastapi
# starlette-testclient
starlette-testclient==0.4.1
# via schemathesis
structlog==25.5.0
# via gpt-oss
sympy==1.14.0
# via torch
tabledata==1.3.4
# via pytablewriter
tabulate==0.10.0
# via sacrebleu
tbb==2022.3.0
# via
# intel-opencl-rt
# mkl
# torch
tblib==3.1.0
# via -r requirements/test/xpu.in
tcmlib==1.4.1
# via
# tbb
# torch
# umf
tcolorpy==0.1.7
# via pytablewriter
tenacity==9.1.4
# via
# gpt-oss
# lm-eval
# schemathesis
termcolor==3.3.0
# via gpt-oss
threadpoolctl==3.6.0
# via scikit-learn
tifffile==2026.3.3
# via scikit-image
tiktoken==0.12.0
# via
# -c requirements/common.txt
# gpt-oss
# lm-eval
# mistral-common
timm==1.0.17
# via -r requirements/test/xpu.in
tokenizers==0.22.2
# via
# -c requirements/common.txt
# transformers
torch==2.10.0+xpu
# via
# -c requirements/xpu.txt
# accelerate
# mteb
# sentence-transformers
# timm
# torchvision
torchvision==0.25.0+xpu
# via timm
tqdm==4.67.3
# via
# datasets
# evaluate
# huggingface-hub
# lm-eval
# modelscope
# mteb
# nltk
# pqdm
# sentence-transformers
# transformers
transformers==5.5.3
# via
# -c requirements/common.txt
# sentence-transformers
triton-xpu==3.6.0
# via torch
typepy==1.3.4
# via
# dataproperty
# pytablewriter
# tabledata
typer==0.24.1
# via
# huggingface-hub
# transformers
typing-extensions==4.15.0
# via
# -c requirements/common.txt
# aiosignal
# albumentations
# anyio
# chz
# fastapi
# huggingface-hub
# librosa
# lm-eval
# mistral-common
# mteb
# pqdm
# pydantic
# pydantic-core
# pydantic-extra-types
# pytest-asyncio
# referencing
# schemathesis
# sentence-transformers
# starlette
# torch
# typing-inspection
typing-inspection==0.4.2
# via
# fastapi
# pydantic
umf==1.0.2
# via
# intel-cmplr-lib-ur
# torch
urllib3==2.6.3
# via
# blobfile
# docker
# modelscope
# requests
uvicorn==0.42.0
# via gpt-oss
werkzeug==3.1.7
# via schemathesis
word2number==1.1
# via lm-eval
xxhash==3.6.0
# via
# datasets
# evaluate
yarl==1.23.0
# via aiohttp
zstandard==0.25.0
# via lm-eval
requirements/xpu-test.in
View file @
fc67613a
...
@@ -9,6 +9,8 @@ pytest-shard
...
@@ -9,6 +9,8 @@ pytest-shard
# --- Core Tools & Bindings ---
# --- Core Tools & Bindings ---
absl-py
absl-py
arctic-inference
arctic-inference
lm_eval[api]
modelscope
# --- Audio Processing ---
# --- Audio Processing ---
librosa
librosa
...
...
tests/conftest.py
View file @
fc67613a
...
@@ -409,6 +409,15 @@ class HfRunner:
...
@@ -409,6 +409,15 @@ class HfRunner:
model_name
,
model_name
,
trust_remote_code
=
trust_remote_code
,
trust_remote_code
=
trust_remote_code
,
)
)
# HF runner should use the HF config so that it's consistent with the HF model
if
self
.
config
.
__module__
.
startswith
(
"vllm.transformers_utils.configs"
):
from
transformers.models.auto.configuration_auto
import
CONFIG_MAPPING
del
CONFIG_MAPPING
.
_extra_content
[
self
.
config
.
model_type
]
self
.
config
=
AutoConfig
.
from_pretrained
(
model_name
,
trust_remote_code
=
trust_remote_code
,
)
self
.
device
=
self
.
get_default_device
()
self
.
device
=
self
.
get_default_device
()
self
.
dtype
=
dtype
=
_get_and_verify_dtype
(
self
.
dtype
=
dtype
=
_get_and_verify_dtype
(
self
.
model_name
,
self
.
model_name
,
...
...
tests/kernels/core/test_minimax_reduce_rms.py
0 → 100644
View file @
fc67613a
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Tests for MiniMax QK RMS-norm: NCCL reference vs Lamport fused kernel."""
import
pytest
import
torch
import
torch.nn
as
nn
from
torch.multiprocessing
import
spawn
from
tests.kernels.utils
import
opcheck
from
tests.utils
import
ensure_current_vllm_config
,
init_test_distributed_environment
from
vllm.distributed
import
cleanup_dist_env_and_memory
from
vllm.model_executor.layers.mamba.linear_attn
import
MiniMaxText01RMSNormTP
from
vllm.platforms
import
current_platform
from
vllm.utils.network_utils
import
get_open_port
from
vllm.utils.torch_utils
import
set_random_seed
@
ensure_current_vllm_config
()
def
_worker_forward_qk
(
local_rank
,
world_size
,
port
,
num_tokens
,
hidden_q_full
,
hidden_k_full
,
dtype
,
seed
,
eps
,
):
"""Per-rank worker: compare NCCL allreduce path vs Lamport fused kernel."""
if
not
hasattr
(
torch
.
ops
.
_C
,
"minimax_allreduce_rms_qk"
):
cleanup_dist_env_and_memory
()
return
device
=
torch
.
device
(
f
"cuda:
{
local_rank
}
"
)
torch
.
accelerator
.
set_device_index
(
device
)
init_test_distributed_environment
(
world_size
,
1
,
local_rank
,
port
,
local_rank
=
local_rank
)
hq
=
hidden_q_full
//
world_size
hk
=
hidden_k_full
//
world_size
q_norm
=
MiniMaxText01RMSNormTP
(
hidden_q_full
,
eps
=
eps
).
cuda
()
k_norm
=
MiniMaxText01RMSNormTP
(
hidden_k_full
,
eps
=
eps
).
cuda
()
set_random_seed
(
seed
)
qw
=
torch
.
randn
(
hidden_q_full
,
dtype
=
dtype
,
device
=
"cuda"
)
kw
=
torch
.
randn
(
hidden_k_full
,
dtype
=
dtype
,
device
=
"cuda"
)
q_norm
.
weight
=
nn
.
Parameter
(
qw
[
local_rank
*
hq
:
(
local_rank
+
1
)
*
hq
])
k_norm
.
weight
=
nn
.
Parameter
(
kw
[
local_rank
*
hk
:
(
local_rank
+
1
)
*
hk
])
torch
.
manual_seed
(
seed
+
1000
+
local_rank
)
qkv
=
torch
.
randn
(
num_tokens
,
hq
+
hk
+
hk
,
dtype
=
dtype
,
device
=
"cuda"
)
q_ref
,
k_ref
,
v_ref
=
qkv
.
clone
().
split
([
hq
,
hk
,
hk
],
dim
=-
1
)
ref_q
,
ref_k
=
MiniMaxText01RMSNormTP
.
forward_qk
(
q_norm
,
k_norm
,
q_ref
,
k_ref
)
# Set up Lamport workspace.
from
vllm.distributed.parallel_state
import
get_tp_group
from
vllm.model_executor.layers.mamba.lamport_workspace
import
(
get_allreduce_workspace
,
)
workspace
=
get_allreduce_workspace
(
rank
=
local_rank
,
world_size
=
world_size
,
max_tokens
=
num_tokens
,
process_group
=
get_tp_group
().
cpu_group
,
)
opcheck
(
torch
.
ops
.
_C
.
minimax_allreduce_rms_qk
,
(
qkv
.
clone
(),
q_norm
.
weight
,
k_norm
.
weight
,
workspace
,
hq
,
hk
,
local_rank
,
world_size
,
eps
,
),
)
fused_q
,
fused_k
=
torch
.
ops
.
_C
.
minimax_allreduce_rms_qk
(
qkv
.
clone
(),
q_norm
.
weight
,
k_norm
.
weight
,
workspace
,
hq
,
hk
,
local_rank
,
world_size
,
eps
,
)
_
,
_
,
fused_v
=
qkv
.
split
([
hq
,
hk
,
hk
],
dim
=-
1
)
torch
.
accelerator
.
synchronize
()
torch
.
testing
.
assert_close
(
fused_q
,
ref_q
,
atol
=
3e-2
,
rtol
=
3e-2
,
)
torch
.
testing
.
assert_close
(
fused_k
,
ref_k
,
atol
=
3e-2
,
rtol
=
3e-2
)
cleanup_dist_env_and_memory
()
@
pytest
.
mark
.
skipif
(
not
current_platform
.
is_cuda
(),
reason
=
"CUDA required"
,
)
@
pytest
.
mark
.
parametrize
(
"world_size"
,
[
2
,
4
,
8
])
@
pytest
.
mark
.
parametrize
(
"num_tokens"
,
[
1
,
128
,
333
])
@
pytest
.
mark
.
parametrize
(
"hidden_dims"
,
[(
6144
,
1024
)],
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
bfloat16
,
torch
.
float16
])
@
pytest
.
mark
.
parametrize
(
"eps"
,
[
1e-6
])
@
pytest
.
mark
.
parametrize
(
"seed"
,
[
42
])
def
test_minimax_reduce_rms_qk
(
world_size
,
num_tokens
,
hidden_dims
,
dtype
,
eps
,
seed
,
):
num_gpus
=
current_platform
.
device_count
()
if
num_gpus
<
world_size
:
pytest
.
skip
(
f
"Need >=
{
world_size
}
GPUs, have
{
num_gpus
}
"
)
hidden_q_full
,
hidden_k_full
=
hidden_dims
port
=
str
(
get_open_port
())
spawn
(
_worker_forward_qk
,
args
=
(
world_size
,
port
,
num_tokens
,
hidden_q_full
,
hidden_k_full
,
dtype
,
seed
,
eps
,
),
nprocs
=
world_size
,
join
=
True
,
)
tests/lora/conftest.py
View file @
fc67613a
...
@@ -3,6 +3,7 @@
...
@@ -3,6 +3,7 @@
import
tempfile
import
tempfile
from
collections
import
OrderedDict
from
collections
import
OrderedDict
from
importlib
import
reload
from
unittest.mock
import
MagicMock
from
unittest.mock
import
MagicMock
import
pytest
import
pytest
...
@@ -43,6 +44,18 @@ def cleanup_fixture(should_do_global_cleanup_after_test: bool):
...
@@ -43,6 +44,18 @@ def cleanup_fixture(should_do_global_cleanup_after_test: bool):
cleanup_dist_env_and_memory
(
shutdown_ray
=
True
)
cleanup_dist_env_and_memory
(
shutdown_ray
=
True
)
@
pytest
.
fixture
def
maybe_enable_lora_dual_stream
(
monkeypatch
:
pytest
.
MonkeyPatch
):
if
current_platform
.
is_cuda
():
monkeypatch
.
setenv
(
"VLLM_LORA_ENABLE_DUAL_STREAM"
,
"1"
)
import
vllm.lora.layers.base_linear
if
not
hasattr
(
vllm
.
lora
.
layers
.
base_linear
,
"lora_linear_async"
):
# Reload the module to ensure the environment variable takes effect.
reload
(
vllm
.
lora
.
layers
.
base_linear
)
yield
@
pytest
.
fixture
@
pytest
.
fixture
def
dist_init
():
def
dist_init
():
from
tests.utils
import
ensure_current_vllm_config
from
tests.utils
import
ensure_current_vllm_config
...
...
tests/lora/test_lora_checkpoints.py
View file @
fc67613a
...
@@ -5,7 +5,9 @@ import pytest
...
@@ -5,7 +5,9 @@ import pytest
from
vllm.lora.lora_model
import
LoRAModel
from
vllm.lora.lora_model
import
LoRAModel
from
vllm.lora.peft_helper
import
PEFTHelper
from
vllm.lora.peft_helper
import
PEFTHelper
from
vllm.lora.utils
import
parse_fine_tuned_lora_name
from
vllm.model_executor.models.baichuan
import
BaiChuanBaseForCausalLM
from
vllm.model_executor.models.baichuan
import
BaiChuanBaseForCausalLM
from
vllm.model_executor.models.gemma4
import
Gemma4ForCausalLM
from
vllm.model_executor.models.utils
import
WeightsMapper
from
vllm.model_executor.models.utils
import
WeightsMapper
lora_lst
=
[
"baichuan7B"
,
"baichuan7B-zero"
,
"baichuan7B-zero-regex"
,
"chatglm3-6b"
]
lora_lst
=
[
"baichuan7B"
,
"baichuan7B-zero"
,
"baichuan7B-zero-regex"
,
"chatglm3-6b"
]
...
@@ -128,3 +130,24 @@ def test_lora_weights_mapping(baichuan_lora_files):
...
@@ -128,3 +130,24 @@ def test_lora_weights_mapping(baichuan_lora_files):
for
name
in
lora_model
.
loras
:
for
name
in
lora_model
.
loras
:
assert
name
.
startswith
(
hf_to_vllm_mapper
.
orig_to_new_prefix
[
"model."
])
assert
name
.
startswith
(
hf_to_vllm_mapper
.
orig_to_new_prefix
[
"model."
])
assert
".baichuan_layers."
in
name
assert
".baichuan_layers."
in
name
def
test_gemma4_lora_weights_mapping
():
mapper
=
Gemma4ForCausalLM
.
hf_to_vllm_mapper
name
=
"base_model.model.model.language_model.layers.9.mlp.down_proj.lora_A.weight"
assert
parse_fine_tuned_lora_name
(
name
,
mapper
)
==
(
"model.layers.9.mlp.down_proj"
,
True
,
)
def
test_gemma4_moe_lora_weights_mapping
():
mapper
=
Gemma4ForCausalLM
.
hf_to_vllm_mapper
name
=
(
"base_model.model.model.language_model.layers.9.moe.experts."
"gate_up_proj.lora_B.weight"
)
assert
parse_fine_tuned_lora_name
(
name
,
mapper
)
==
(
"model.layers.9.moe.gate_up_proj"
,
False
,
)
tests/lora/test_minicpmv_tp.py
View file @
fc67613a
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
importlib.metadata
import
version
import
pytest
import
pytest
from
packaging.version
import
Version
import
vllm
import
vllm
from
vllm.assets.image
import
ImageAsset
from
vllm.assets.image
import
ImageAsset
...
@@ -10,6 +13,14 @@ from vllm.platforms import current_platform
...
@@ -10,6 +13,14 @@ from vllm.platforms import current_platform
from
..utils
import
multi_gpu_test
from
..utils
import
multi_gpu_test
pytestmark
=
pytest
.
mark
.
skipif
(
Version
(
"5.0"
)
<=
Version
(
version
(
"transformers"
)),
reason
=
(
"MiniCPMV custom processor uses tokenizer.im_start_id which is not "
"available on TokenizersBackend in transformers v5.0+"
),
)
MODEL_PATH
=
"openbmb/MiniCPM-Llama3-V-2_5"
MODEL_PATH
=
"openbmb/MiniCPM-Llama3-V-2_5"
PROMPT_TEMPLATE
=
(
PROMPT_TEMPLATE
=
(
...
...
tests/model_executor/test_weight_utils.py
View file @
fc67613a
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
os
import
tempfile
import
tempfile
import
huggingface_hub.constants
import
huggingface_hub.constants
...
@@ -10,26 +9,10 @@ from huggingface_hub.utils import LocalEntryNotFoundError
...
@@ -10,26 +9,10 @@ from huggingface_hub.utils import LocalEntryNotFoundError
from
vllm.model_executor.model_loader.weight_utils
import
(
from
vllm.model_executor.model_loader.weight_utils
import
(
download_weights_from_hf
,
download_weights_from_hf
,
enable_hf_transfer
,
maybe_remap_kv_scale_name
,
maybe_remap_kv_scale_name
,
)
)
def
test_hf_transfer_auto_activation
():
if
"HF_HUB_ENABLE_HF_TRANSFER"
in
os
.
environ
:
# in case it is already set, we can't test the auto activation
pytest
.
skip
(
"HF_HUB_ENABLE_HF_TRANSFER is set, can't test auto activation"
)
enable_hf_transfer
()
try
:
# enable hf hub transfer if available
import
hf_transfer
# type: ignore # noqa
HF_TRANSFER_ACTIVE
=
True
except
ImportError
:
HF_TRANSFER_ACTIVE
=
False
assert
huggingface_hub
.
constants
.
HF_HUB_ENABLE_HF_TRANSFER
==
HF_TRANSFER_ACTIVE
def
test_download_weights_from_hf
():
def
test_download_weights_from_hf
():
with
tempfile
.
TemporaryDirectory
()
as
tmpdir
:
with
tempfile
.
TemporaryDirectory
()
as
tmpdir
:
# assert LocalEntryNotFoundError error is thrown
# assert LocalEntryNotFoundError error is thrown
...
@@ -178,5 +161,4 @@ class TestMaybeRemapKvScaleName:
...
@@ -178,5 +161,4 @@ class TestMaybeRemapKvScaleName:
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
test_hf_transfer_auto_activation
()
test_download_weights_from_hf
()
test_download_weights_from_hf
()
tests/models/language/generation/test_common.py
View file @
fc67613a
...
@@ -143,6 +143,11 @@ def test_models(
...
@@ -143,6 +143,11 @@ def test_models(
# in parts of the operators
# in parts of the operators
pytest
.
skip
(
f
"Skipping '
{
model
}
' model test with AITER kernel."
)
pytest
.
skip
(
f
"Skipping '
{
model
}
' model test with AITER kernel."
)
if
current_platform
.
is_cpu
()
and
model
==
"TitanML/tiny-mixtral"
:
# This untrained model is sensitive to the rounding error
# Fuse ops to reduce bfloat16 rounding
monkeypatch
.
setenv
(
"VLLM_CPU_CI_ENV"
,
"0"
)
with
hf_runner
(
model
)
as
hf_model
:
with
hf_runner
(
model
)
as
hf_model
:
hf_outputs
=
hf_model
.
generate_greedy_logprobs_limit
(
hf_outputs
=
hf_model
.
generate_greedy_logprobs_limit
(
example_prompts
,
max_tokens
,
num_logprobs
example_prompts
,
max_tokens
,
num_logprobs
...
...
tests/models/language/pooling/test_colbert.py
View file @
fc67613a
...
@@ -109,6 +109,14 @@ def _load_hf_model(model_name: str, hf_spec: dict, device: torch.device):
...
@@ -109,6 +109,14 @@ def _load_hf_model(model_name: str, hf_spec: dict, device: torch.device):
**
extra
,
**
extra
,
).
to
(
device
)
).
to
(
device
)
model
.
eval
()
model
.
eval
()
# Transformers 5.0 weight materialization can clear non-persistent
# buffers (e.g. rotary inv_freq) that were registered with
# persistent=False. Re-compute them so the model produces valid output.
for
mod
in
model
.
modules
():
if
hasattr
(
mod
,
"_compute_inv_freq"
)
and
hasattr
(
mod
,
"inv_freq"
):
mod
.
inv_freq
=
mod
.
_compute_inv_freq
(
device
=
device
)
return
model
return
model
...
...
tests/models/language/pooling/test_nomic_max_model_len.py
View file @
fc67613a
...
@@ -8,7 +8,13 @@ import pytest
...
@@ -8,7 +8,13 @@ import pytest
from
...utils
import
EmbedModelInfo
from
...utils
import
EmbedModelInfo
MODELS
=
[
MODELS
=
[
EmbedModelInfo
(
"nomic-ai/nomic-embed-text-v1"
),
EmbedModelInfo
(
"nomic-ai/nomic-embed-text-v1"
,
# Fixme:
# Update nomic-embed code to support the latest
# HF version and remove revision set.
revision
=
"720244025c1a7e15661a174c63cce63c8218e52b"
,
),
# EmbedModelInfo("nomic-ai/nomic-embed-text-v1.5"),
# EmbedModelInfo("nomic-ai/nomic-embed-text-v1.5"),
# EmbedModelInfo("nomic-ai/CodeRankEmbed"),
# EmbedModelInfo("nomic-ai/CodeRankEmbed"),
EmbedModelInfo
(
"nomic-ai/nomic-embed-text-v2-moe"
),
EmbedModelInfo
(
"nomic-ai/nomic-embed-text-v2-moe"
),
...
@@ -24,7 +30,10 @@ max_model_len = int(original_max_position_embeddings * factor)
...
@@ -24,7 +30,10 @@ max_model_len = int(original_max_position_embeddings * factor)
@
pytest
.
mark
.
parametrize
(
"model_info"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"model_info"
,
MODELS
)
def
test_default
(
model_info
,
vllm_runner
):
def
test_default
(
model_info
,
vllm_runner
):
with
vllm_runner
(
with
vllm_runner
(
model_info
.
name
,
runner
=
"pooling"
,
max_model_len
=
None
model_info
.
name
,
revision
=
model_info
.
revision
,
runner
=
"pooling"
,
max_model_len
=
None
,
)
as
vllm_model
:
)
as
vllm_model
:
model_config
=
vllm_model
.
llm
.
llm_engine
.
model_config
model_config
=
vllm_model
.
llm
.
llm_engine
.
model_config
if
model_info
.
name
==
"nomic-ai/nomic-embed-text-v2-moe"
:
if
model_info
.
name
==
"nomic-ai/nomic-embed-text-v2-moe"
:
...
@@ -39,7 +48,10 @@ def test_default(model_info, vllm_runner):
...
@@ -39,7 +48,10 @@ def test_default(model_info, vllm_runner):
def
test_set_max_model_len_legal
(
model_info
,
vllm_runner
):
def
test_set_max_model_len_legal
(
model_info
,
vllm_runner
):
# set max_model_len <= 512
# set max_model_len <= 512
with
vllm_runner
(
with
vllm_runner
(
model_info
.
name
,
runner
=
"pooling"
,
max_model_len
=
256
model_info
.
name
,
revision
=
model_info
.
revision
,
runner
=
"pooling"
,
max_model_len
=
256
,
)
as
vllm_model
:
)
as
vllm_model
:
model_config
=
vllm_model
.
llm
.
llm_engine
.
model_config
model_config
=
vllm_model
.
llm
.
llm_engine
.
model_config
assert
model_config
.
max_model_len
==
256
assert
model_config
.
max_model_len
==
256
...
@@ -49,11 +61,19 @@ def test_set_max_model_len_legal(model_info, vllm_runner):
...
@@ -49,11 +61,19 @@ def test_set_max_model_len_legal(model_info, vllm_runner):
# For nomic-embed-text-v2-moe the length is set to 512
# For nomic-embed-text-v2-moe the length is set to 512
# by sentence_bert_config.json.
# by sentence_bert_config.json.
with
pytest
.
raises
(
ValueError
):
with
pytest
.
raises
(
ValueError
):
with
vllm_runner
(
model_info
.
name
,
runner
=
"pooling"
,
max_model_len
=
1024
):
with
vllm_runner
(
model_info
.
name
,
revision
=
model_info
.
revision
,
runner
=
"pooling"
,
max_model_len
=
1024
,
):
pass
pass
else
:
else
:
with
vllm_runner
(
with
vllm_runner
(
model_info
.
name
,
runner
=
"pooling"
,
max_model_len
=
1024
model_info
.
name
,
revision
=
model_info
.
revision
,
runner
=
"pooling"
,
max_model_len
=
1024
,
)
as
vllm_model
:
)
as
vllm_model
:
model_config
=
vllm_model
.
llm
.
llm_engine
.
model_config
model_config
=
vllm_model
.
llm
.
llm_engine
.
model_config
assert
model_config
.
max_model_len
==
1024
assert
model_config
.
max_model_len
==
1024
...
@@ -63,7 +83,12 @@ def test_set_max_model_len_legal(model_info, vllm_runner):
...
@@ -63,7 +83,12 @@ def test_set_max_model_len_legal(model_info, vllm_runner):
def
test_set_max_model_len_illegal
(
model_info
,
vllm_runner
):
def
test_set_max_model_len_illegal
(
model_info
,
vllm_runner
):
# set max_model_len > 2048
# set max_model_len > 2048
with
pytest
.
raises
(
ValueError
):
with
pytest
.
raises
(
ValueError
):
with
vllm_runner
(
model_info
.
name
,
runner
=
"pooling"
,
max_model_len
=
4096
):
with
vllm_runner
(
model_info
.
name
,
revision
=
model_info
.
revision
,
runner
=
"pooling"
,
max_model_len
=
4096
,
):
pass
pass
# set max_model_len > 2048 by hf_overrides
# set max_model_len > 2048 by hf_overrides
...
@@ -71,6 +96,7 @@ def test_set_max_model_len_illegal(model_info, vllm_runner):
...
@@ -71,6 +96,7 @@ def test_set_max_model_len_illegal(model_info, vllm_runner):
with
pytest
.
raises
(
ValueError
):
with
pytest
.
raises
(
ValueError
):
with
vllm_runner
(
with
vllm_runner
(
model_info
.
name
,
model_info
.
name
,
revision
=
model_info
.
revision
,
runner
=
"pooling"
,
runner
=
"pooling"
,
max_model_len
=
None
,
max_model_len
=
None
,
hf_overrides
=
hf_overrides
,
hf_overrides
=
hf_overrides
,
...
@@ -91,7 +117,11 @@ def test_use_rope_scaling_legal(model_info, vllm_runner):
...
@@ -91,7 +117,11 @@ def test_use_rope_scaling_legal(model_info, vllm_runner):
}
}
with
vllm_runner
(
with
vllm_runner
(
model_info
.
name
,
runner
=
"pooling"
,
max_model_len
=
None
,
hf_overrides
=
hf_overrides
model_info
.
name
,
revision
=
model_info
.
revision
,
runner
=
"pooling"
,
max_model_len
=
None
,
hf_overrides
=
hf_overrides
,
):
):
pass
pass
...
@@ -110,6 +140,7 @@ def test_use_rope_scaling_illegal(model_info, vllm_runner):
...
@@ -110,6 +140,7 @@ def test_use_rope_scaling_illegal(model_info, vllm_runner):
with
pytest
.
raises
(
ValueError
):
with
pytest
.
raises
(
ValueError
):
with
vllm_runner
(
with
vllm_runner
(
model_info
.
name
,
model_info
.
name
,
revision
=
model_info
.
revision
,
runner
=
"pooling"
,
runner
=
"pooling"
,
max_model_len
=
max_model_len
+
1
,
max_model_len
=
max_model_len
+
1
,
hf_overrides
=
hf_overrides
,
hf_overrides
=
hf_overrides
,
...
@@ -129,6 +160,7 @@ def test_use_rope_scaling_illegal(model_info, vllm_runner):
...
@@ -129,6 +160,7 @@ def test_use_rope_scaling_illegal(model_info, vllm_runner):
with
pytest
.
raises
(
ValueError
):
with
pytest
.
raises
(
ValueError
):
with
vllm_runner
(
with
vllm_runner
(
model_info
.
name
,
model_info
.
name
,
revision
=
model_info
.
revision
,
runner
=
"pooling"
,
runner
=
"pooling"
,
max_model_len
=
None
,
max_model_len
=
None
,
hf_overrides
=
hf_overrides
,
hf_overrides
=
hf_overrides
,
...
...
tests/models/language/pooling_mteb_test/mteb_embed_utils.py
View file @
fc67613a
...
@@ -151,6 +151,7 @@ def mteb_test_embed_models(
...
@@ -151,6 +151,7 @@ def mteb_test_embed_models(
with
vllm_runner
(
with
vllm_runner
(
model_info
.
name
,
model_info
.
name
,
revision
=
model_info
.
revision
,
runner
=
"pooling"
,
runner
=
"pooling"
,
max_model_len
=
model_info
.
max_model_len
,
max_model_len
=
model_info
.
max_model_len
,
**
vllm_extra_kwargs
,
**
vllm_extra_kwargs
,
...
@@ -201,6 +202,7 @@ def mteb_test_embed_models(
...
@@ -201,6 +202,7 @@ def mteb_test_embed_models(
if
model_info
.
mteb_score
is
None
:
if
model_info
.
mteb_score
is
None
:
with
hf_runner
(
with
hf_runner
(
model_info
.
name
,
model_info
.
name
,
revision
=
model_info
.
revision
,
is_sentence_transformer
=
True
,
is_sentence_transformer
=
True
,
dtype
=
ci_envs
.
VLLM_CI_HF_DTYPE
or
model_info
.
hf_dtype
,
dtype
=
ci_envs
.
VLLM_CI_HF_DTYPE
or
model_info
.
hf_dtype
,
)
as
hf_model
:
)
as
hf_model
:
...
...
tests/models/language/pooling_mteb_test/mteb_score_utils.py
View file @
fc67613a
...
@@ -241,6 +241,7 @@ def mteb_test_rerank_models(
...
@@ -241,6 +241,7 @@ def mteb_test_rerank_models(
with
vllm_runner
(
with
vllm_runner
(
model_info
.
name
,
model_info
.
name
,
revision
=
model_info
.
revision
,
runner
=
"pooling"
,
runner
=
"pooling"
,
max_model_len
=
None
,
max_model_len
=
None
,
max_num_seqs
=
8
,
max_num_seqs
=
8
,
...
@@ -286,7 +287,9 @@ def mteb_test_rerank_models(
...
@@ -286,7 +287,9 @@ def mteb_test_rerank_models(
# Accelerate mteb test by setting
# Accelerate mteb test by setting
# SentenceTransformers mteb score to a constant
# SentenceTransformers mteb score to a constant
if
model_info
.
mteb_score
is
None
:
if
model_info
.
mteb_score
is
None
:
with
hf_runner
(
model_info
.
name
,
dtype
=
model_info
.
hf_dtype
)
as
hf_model
:
with
hf_runner
(
model_info
.
name
,
revision
=
model_info
.
revision
,
dtype
=
model_info
.
hf_dtype
)
as
hf_model
:
hf_model
.
chat_template
=
chat_template
hf_model
.
chat_template
=
chat_template
st_main_score
=
run_mteb_rerank
(
st_main_score
=
run_mteb_rerank
(
hf_model
,
hf_model
,
...
...
tests/models/language/pooling_mteb_test/test_baai.py
View file @
fc67613a
...
@@ -69,7 +69,10 @@ MODELS = [
...
@@ -69,7 +69,10 @@ MODELS = [
attn_type
=
"decoder"
,
attn_type
=
"decoder"
,
is_prefix_caching_supported
=
True
,
is_prefix_caching_supported
=
True
,
is_chunked_prefill_supported
=
True
,
is_chunked_prefill_supported
=
True
,
enable_test
=
True
,
# Skip: model's custom tokenizer on HF hub is incompatible with
# transformers v5 (sets attrs before super().__init__, triggering
# AttributeError on 'verbose' in __getattr__).
enable_test
=
False
,
),
),
]
]
...
...
tests/models/language/pooling_mteb_test/test_gte.py
View file @
fc67613a
...
@@ -72,7 +72,8 @@ MODELS = [
...
@@ -72,7 +72,8 @@ MODELS = [
attn_type
=
"encoder_only"
,
attn_type
=
"encoder_only"
,
is_prefix_caching_supported
=
False
,
is_prefix_caching_supported
=
False
,
is_chunked_prefill_supported
=
False
,
is_chunked_prefill_supported
=
False
,
enable_test
=
True
,
# Skip: numerical regression with transformers v5.
enable_test
=
False
,
),
),
########## ModernBertModel
########## ModernBertModel
EmbedModelInfo
(
EmbedModelInfo
(
...
...
tests/models/language/pooling_mteb_test/test_jina.py
View file @
fc67613a
...
@@ -75,6 +75,10 @@ def test_rerank_models_mteb(vllm_runner, model_info: RerankModelInfo) -> None:
...
@@ -75,6 +75,10 @@ def test_rerank_models_mteb(vllm_runner, model_info: RerankModelInfo) -> None:
mteb_test_rerank_models
(
vllm_runner
,
model_info
)
mteb_test_rerank_models
(
vllm_runner
,
model_info
)
@
pytest
.
mark
.
skip
(
reason
=
"jinaai/jina-embeddings-v3 custom XLMRobertaLoRA model on HF hub "
"is incompatible with transformers v5 (missing all_tied_weights_keys)"
)
@
pytest
.
mark
.
parametrize
(
"model_info"
,
EMBEDDING_MODELS
)
@
pytest
.
mark
.
parametrize
(
"model_info"
,
EMBEDDING_MODELS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
@
pytest
.
mark
.
parametrize
(
"dimensions"
,
[
16
,
32
])
@
pytest
.
mark
.
parametrize
(
"dimensions"
,
[
16
,
32
])
...
...
tests/models/language/pooling_mteb_test/test_nomic.py
View file @
fc67613a
...
@@ -12,6 +12,10 @@ MODELS = [
...
@@ -12,6 +12,10 @@ MODELS = [
EmbedModelInfo
(
EmbedModelInfo
(
"nomic-ai/nomic-embed-text-v1"
,
"nomic-ai/nomic-embed-text-v1"
,
architecture
=
"NomicBertModel"
,
architecture
=
"NomicBertModel"
,
# Fixme:
# Update nomic-embed code to support the latest
# HF version and remove revision set.
revision
=
"720244025c1a7e15661a174c63cce63c8218e52b"
,
mteb_score
=
0.737568559
,
mteb_score
=
0.737568559
,
enable_test
=
True
,
enable_test
=
True
,
seq_pooling_type
=
"MEAN"
,
seq_pooling_type
=
"MEAN"
,
...
...
tests/models/multimodal/generation/test_common.py
View file @
fc67613a
...
@@ -186,7 +186,14 @@ VLM_TEST_SETTINGS = {
...
@@ -186,7 +186,14 @@ VLM_TEST_SETTINGS = {
max_num_seqs
=
2
,
max_num_seqs
=
2
,
auto_cls
=
AutoModel
,
auto_cls
=
AutoModel
,
hf_output_post_proc
=
model_utils
.
ultravox_trunc_hf_output
,
hf_output_post_proc
=
model_utils
.
ultravox_trunc_hf_output
,
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
],
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
,
# TODO: Remove skip once model has been upstreamed to Transformers
pytest
.
mark
.
skip
(
reason
=
"Custom model code is not compatible with Transformers v5"
),
],
),
),
#### Transformers fallback to test
#### Transformers fallback to test
## To reduce test burden, we only test batching arbitrary image size
## To reduce test burden, we only test batching arbitrary image size
...
@@ -397,14 +404,14 @@ VLM_TEST_SETTINGS = {
...
@@ -397,14 +404,14 @@ VLM_TEST_SETTINGS = {
"gemma4"
:
VLMTestInfo
(
"gemma4"
:
VLMTestInfo
(
models
=
[
"google/gemma-4-E2B-it"
],
models
=
[
"google/gemma-4-E2B-it"
],
test_type
=
(
VLMTestType
.
IMAGE
,
VLMTestType
.
MULTI_IMAGE
),
test_type
=
(
VLMTestType
.
IMAGE
,
VLMTestType
.
MULTI_IMAGE
),
prompt_formatter
=
lambda
img_prompt
:
f
"<bos><
start_of_
turn>user
\n
{
img_prompt
}
<
end_of_
turn>
\n
<
start_of_
turn>model
\n
"
,
# noqa: E501
prompt_formatter
=
lambda
img_prompt
:
f
"<bos><
|
turn>user
\n
{
img_prompt
}
<turn
|
>
\n
<
|
turn>model
\n
"
,
# noqa: E501
single_image_prompts
=
IMAGE_ASSETS
.
prompts
(
single_image_prompts
=
IMAGE_ASSETS
.
prompts
(
{
{
"stop_sign"
:
"What's the content in the center of the image?"
,
"stop_sign"
:
"
<|image|>
What's the content in the center of the image?"
,
# noqa: E501
"cherry_blossom"
:
"What is the season?"
,
"cherry_blossom"
:
"
<|image|>
What is the season?"
,
}
}
),
),
multi_image_prompt
=
"Describe the two images in detail."
,
multi_image_prompt
=
"
<|image|><|image|>
Describe the two images in detail."
,
# noqa: E501
max_model_len
=
4096
,
max_model_len
=
4096
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
auto_cls
=
AutoModelForImageTextToText
,
auto_cls
=
AutoModelForImageTextToText
,
...
@@ -533,6 +540,12 @@ VLM_TEST_SETTINGS = {
...
@@ -533,6 +540,12 @@ VLM_TEST_SETTINGS = {
max_model_len
=
4096
,
max_model_len
=
4096
,
use_tokenizer_eos
=
True
,
use_tokenizer_eos
=
True
,
patch_hf_runner
=
model_utils
.
internvl_patch_hf_runner
,
patch_hf_runner
=
model_utils
.
internvl_patch_hf_runner
,
# TODO: Remove skip once model has been upstreamed to Transformers
marks
=
[
pytest
.
mark
.
skip
(
reason
=
"Custom model code tries to access data from meta-tensor"
)
],
),
),
"intern_vl-video"
:
VLMTestInfo
(
"intern_vl-video"
:
VLMTestInfo
(
models
=
[
models
=
[
...
@@ -545,6 +558,12 @@ VLM_TEST_SETTINGS = {
...
@@ -545,6 +558,12 @@ VLM_TEST_SETTINGS = {
use_tokenizer_eos
=
True
,
use_tokenizer_eos
=
True
,
patch_hf_runner
=
model_utils
.
internvl_patch_hf_runner
,
patch_hf_runner
=
model_utils
.
internvl_patch_hf_runner
,
num_logprobs
=
10
if
current_platform
.
is_rocm
()
else
5
,
num_logprobs
=
10
if
current_platform
.
is_rocm
()
else
5
,
# TODO: Remove skip once model has been upstreamed to Transformers
marks
=
[
pytest
.
mark
.
skip
(
reason
=
"Custom model code tries to access data from meta-tensor"
)
],
),
),
"intern_vl-hf"
:
VLMTestInfo
(
"intern_vl-hf"
:
VLMTestInfo
(
models
=
[
"OpenGVLab/InternVL3-1B-hf"
],
models
=
[
"OpenGVLab/InternVL3-1B-hf"
],
...
@@ -591,6 +610,8 @@ VLM_TEST_SETTINGS = {
...
@@ -591,6 +610,8 @@ VLM_TEST_SETTINGS = {
hf_model_kwargs
=
{
"device_map"
:
"auto"
},
hf_model_kwargs
=
{
"device_map"
:
"auto"
},
patch_hf_runner
=
model_utils
.
isaac_patch_hf_runner
,
patch_hf_runner
=
model_utils
.
isaac_patch_hf_runner
,
image_size_factors
=
[(
0.25
,),
(
0.25
,
0.25
,
0.25
),
(
0.25
,
0.2
,
0.15
)],
image_size_factors
=
[(
0.25
,),
(
0.25
,
0.25
,
0.25
),
(
0.25
,
0.2
,
0.15
)],
# TODO: Remove skip once model has been upstreamed to Transformers
marks
=
[
pytest
.
mark
.
skip
(
reason
=
"Custom model imports deleted object"
)],
# noqa: E501
),
),
"kimi_vl"
:
VLMTestInfo
(
"kimi_vl"
:
VLMTestInfo
(
models
=
[
"moonshotai/Kimi-VL-A3B-Instruct"
],
models
=
[
"moonshotai/Kimi-VL-A3B-Instruct"
],
...
@@ -806,7 +827,12 @@ VLM_TEST_SETTINGS = {
...
@@ -806,7 +827,12 @@ VLM_TEST_SETTINGS = {
pytest
.
mark
.
skipif
(
pytest
.
mark
.
skipif
(
Version
(
TRANSFORMERS_VERSION
)
==
Version
(
"4.57.3"
),
Version
(
TRANSFORMERS_VERSION
)
==
Version
(
"4.57.3"
),
reason
=
"This model is broken in Transformers v4.57.3"
,
reason
=
"This model is broken in Transformers v4.57.3"
,
)
),
pytest
.
mark
.
skipif
(
Version
(
TRANSFORMERS_VERSION
)
>=
Version
(
"5.0.0"
),
reason
=
"Model's custom code uses ROPE_INIT_FUNCTIONS"
"['default'] which was removed in transformers v5"
,
),
],
],
),
),
"phi3v"
:
VLMTestInfo
(
"phi3v"
:
VLMTestInfo
(
...
@@ -960,6 +986,12 @@ VLM_TEST_SETTINGS = {
...
@@ -960,6 +986,12 @@ VLM_TEST_SETTINGS = {
)
)
for
inp
in
custom_inputs
.
different_patch_input_cases_internvl
()
for
inp
in
custom_inputs
.
different_patch_input_cases_internvl
()
],
],
# TODO: Remove skip once model has been upstreamed to Transformers
marks
=
[
pytest
.
mark
.
skip
(
reason
=
"Custom model code tries to access data from meta-tensor"
)
],
),
),
"llava_onevision-multiple-images"
:
VLMTestInfo
(
"llava_onevision-multiple-images"
:
VLMTestInfo
(
models
=
[
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
],
models
=
[
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
],
...
...
tests/models/multimodal/generation/test_nemotron_parse.py
View file @
fc67613a
...
@@ -103,6 +103,10 @@ def run_test(
...
@@ -103,6 +103,10 @@ def run_test(
)
)
@
pytest
.
mark
.
skip
(
reason
=
"Model's custom MBart decoder has head count mismatch with "
"transformers v5's GQA-aware cross-attention (8 vs 16 heads)"
)
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"nvidia/NVIDIA-Nemotron-Parse-v1.1"
])
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"nvidia/NVIDIA-Nemotron-Parse-v1.1"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"bfloat16"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"bfloat16"
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment