Commit 459d9b38 authored by Harry Mellor's avatar Harry Mellor Committed by khluu
Browse files

Update to transformers v5 (#30566)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
Signed-off-by: default avatarkhluu <khluu000@gmail.com>
Signed-off-by: default avatarKevin H. Luu <khluu000@gmail.com>
Signed-off-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: default avatarkhluu <khluu000@gmail.com>
Co-authored-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: default avatarjiang1.li <jiang1.li@intel.com>
(cherry picked from commit 03f8d3a5)
parent b1568cf4
......@@ -16,5 +16,5 @@ echo "--- :docker: Building Docker image"
docker build --progress plain --tag "$IMAGE_NAME" --target vllm-test -f docker/Dockerfile.cpu .
# Run the image, setting --shm-size=4g for tensor parallel.
docker run --rm --cpuset-cpus="$CORE_RANGE" --cpuset-mems="$NUMA_NODE" -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN -e VLLM_CPU_KVCACHE_SPACE=16 -e VLLM_CPU_CI_ENV=1 -e VLLM_CPU_SIM_MULTI_NUMA=1 --shm-size=4g "$IMAGE_NAME" \
docker run --rm --cpuset-cpus="$CORE_RANGE" --cpuset-mems="$NUMA_NODE" -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN -e VLLM_CPU_KVCACHE_SPACE=16 -e VLLM_CPU_CI_ENV=1 -e VLLM_CPU_SIM_MULTI_NUMA=1 -e VLLM_CPU_ATTN_SPLIT_KV=0 --shm-size=4g "$IMAGE_NAME" \
timeout "$TIMEOUT_VAL" bash -c "set -euox pipefail; echo \"--- Print packages\"; pip list; echo \"--- Running tests\"; ${TEST_COMMAND}"
......@@ -69,3 +69,18 @@ steps:
- python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
# Whisper needs spawn method to avoid deadlock
- VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
- label: Transformers Backward Compatibility Models Test
working_dir: "/vllm-workspace/"
optional: true
soft_fail: true
commands:
- pip install transformers==4.57.5
- pytest -v -s tests/models/test_initialization.py
- pytest -v -s tests/models/test_transformers.py
- pytest -v -s tests/models/multimodal/processing/
- pytest -v -s tests/models/multimodal/test_mapping.py
- python3 examples/offline_inference/basic/chat.py
- python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
# Whisper needs spawn method to avoid deadlock
- VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
......@@ -649,7 +649,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
else \
BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \
fi; \
uv pip install --system accelerate hf_transfer modelscope \
uv pip install --system accelerate modelscope \
"bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs,azure]${RUNAI_MODEL_STREAMER_VERSION}"
# ============================================================
......@@ -772,9 +772,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -e tests/vllm_test_utils
# enable fast downloads from hf (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system hf_transfer
ENV HF_HUB_ENABLE_HF_TRANSFER 1
ENV HF_XET_HIGH_PERFORMANCE 1
# increase timeout for hf downloads (for testing)
ENV HF_HUB_DOWNLOAD_TIMEOUT 60
# Copy in the v1 package for testing (it isn't distributed yet)
COPY vllm/v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1
......
......@@ -195,6 +195,12 @@ ADD ./.buildkite/ ./.buildkite/
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install -e tests/vllm_test_utils
# enable fast downloads from hf (for testing)
ENV HF_XET_HIGH_PERFORMANCE 1
# increase timeout for hf downloads (for testing)
ENV HF_HUB_DOWNLOAD_TIMEOUT 60
######################### RELEASE IMAGE #########################
FROM base AS vllm-openai
......
......@@ -269,9 +269,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -e tests/vllm_test_utils
# enable fast downloads from hf (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system hf_transfer
ENV HF_HUB_ENABLE_HF_TRANSFER 1
ENV HF_XET_HIGH_PERFORMANCE 1
# increase timeout for hf downloads (for testing)
ENV HF_HUB_DOWNLOAD_TIMEOUT 60
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -r requirements/nightly_torch_test.txt
......
......@@ -364,9 +364,10 @@ RUN cd /vllm-workspace \
&& python3 -m pip install pytest-shard
# enable fast downloads from hf (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system hf_transfer
ENV HF_HUB_ENABLE_HF_TRANSFER=1
ENV HF_XET_HIGH_PERFORMANCE=1
# increase timeout for hf downloads (for testing)
ENV HF_HUB_DOWNLOAD_TIMEOUT 60
# install audio decode package `torchcodec` from source (required due to
# ROCm and torch version mismatch) for tests with datasets package
......
......@@ -147,7 +147,7 @@ uv pip install vllm --extra-index-url https://wheels.vllm.ai/rocm/0.15.0/rocm700
# Install dependencies
pip install --upgrade numba \
scipy \
huggingface-hub[cli,hf_transfer] \
huggingface-hub[cli] \
setuptools_scm
pip install -r requirements/rocm.txt
......
......@@ -7,7 +7,7 @@ requests >= 2.26.0
tqdm
blake3
py-cpuinfo
transformers >= 4.56.0, < 5
transformers >= 4.56.0, != 5.0.*, != 5.1.*, != 5.2.*, != 5.3.*, != 5.4.*, != 5.5.0
tokenizers >= 0.21.1 # Required for fast incremental detokenization.
protobuf >= 5.29.6, !=6.30.*, !=6.31.*, !=6.32.*, !=6.33.0.*, !=6.33.1.*, !=6.33.2.*, !=6.33.3.*, !=6.33.4.* # Required by LlamaTokenizer, gRPC. CVE-2026-0994
fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
......@@ -37,7 +37,7 @@ pyyaml
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
setuptools>=77.0.3,<81.0.0; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
einops # Required for Qwen2-VL.
compressed-tensors == 0.14.0.1 # required for compressed-tensors
compressed-tensors == 0.15.0.1 # required for compressed-tensors
depyf==0.20.0 # required for profiling and debugging with compilation config
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
watchfiles # required for http server to monitor the updates of TLS files
......
......@@ -29,8 +29,8 @@ opencv-python-headless >= 4.13.0 # required for video test
datamodel_code_generator # required for minicpm3 test
lm-eval[api]>=0.4.11 # required for model evaluation test
mteb[bm25s]>=2, <3 # required for mteb test
transformers==4.57.5
tokenizers==0.22.0
transformers==5.5.3
tokenizers==0.22.2
schemathesis>=3.39.15 # Required for openai schema test.
# quantization
bitsandbytes>=0.49.2
......
......@@ -36,8 +36,8 @@ opencv-python-headless>=4.13.0 # required for video test
datamodel_code_generator # required for minicpm3 test
lm-eval[api]>=0.4.11 # required for model evaluation test
mteb[bm25s]>=2, <3 # required for mteb test
transformers==4.57.5
tokenizers==0.22.0
transformers==5.5.3
tokenizers==0.22.2
schemathesis>=3.39.15 # Required for openai schema test
# quantization
bitsandbytes==0.49.2
......@@ -80,4 +80,3 @@ plotly # required for perf comparison html report
rapidfuzz
torchgeo==0.7.0
multiprocess==0.70.16
huggingface-hub==0.36.2
......@@ -232,7 +232,6 @@ filelock==3.25.2
# python-discovery
# ray
# torch
# transformers
# virtualenv
fiona==1.10.1
# via torchgeo
......@@ -318,7 +317,7 @@ h5py==3.16.0
# via terratorch
harfile==0.4.0
# via schemathesis
hf-xet==1.4.2
hf-xet==1.4.3
# via huggingface-hub
hiredis==3.3.1
# via tensorizer
......@@ -332,11 +331,11 @@ httpx==0.27.2
# via
# -r requirements/rocm-test.in
# diffusers
# huggingface-hub
# perceptron
# schemathesis
huggingface-hub==0.36.2
huggingface-hub==1.10.2
# via
# -r requirements/rocm-test.in
# accelerate
# datasets
# diffusers
......@@ -970,7 +969,6 @@ requests==2.32.5
# google-api-core
# google-cloud-storage
# gpt-oss
# huggingface-hub
# lightly
# lm-eval
# mistral-common
......@@ -983,7 +981,6 @@ requests==2.32.5
# starlette-testclient
# tacoreader
# tiktoken
# transformers
# wandb
resampy==0.4.3
# via -r requirements/rocm-test.in
......@@ -1191,7 +1188,7 @@ timm==1.0.17
# segmentation-models-pytorch
# terratorch
# torchgeo
tokenizers==0.22.0
tokenizers==0.22.2
# via
# -c requirements/common.txt
# -r requirements/rocm-test.in
......@@ -1230,7 +1227,7 @@ tqdm==4.67.3
# tacoreader
# terratorch
# transformers
transformers==4.57.5
transformers==5.5.3
# via
# -c requirements/common.txt
# -r requirements/rocm-test.in
......@@ -1252,7 +1249,9 @@ typepy==1.3.4
typer==0.24.1
# via
# fastsafetensors
# huggingface-hub
# perceptron
# transformers
typeshed-client==2.9.0
# via jsonargparse
typing-extensions==4.15.0
......
......@@ -18,7 +18,7 @@ httpx
librosa # required for audio tests
vector_quantize_pytorch # required for minicpmo_26 test
vocos # required for minicpmo_26 test
peft>=0.15.0 # required for phi-4-mm test
peft>=0.18.1 # required for phi-4-mm test
pqdm
ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests
resampy # required for audio tests
......@@ -39,8 +39,8 @@ opencv-python-headless >= 4.13.0 # required for video test
datamodel_code_generator # required for minicpm3 test
lm-eval[api]>=0.4.11 # required for model evaluation test
mteb[bm25s]>=2, <3 # required for mteb test
transformers==4.57.5
tokenizers==0.22.0
transformers==5.5.3
tokenizers==0.22.2
schemathesis>=3.39.15 # Required for openai schema test.
# quantization
bitsandbytes==0.49.2
......
......@@ -4,7 +4,7 @@ absl-py==2.1.0
# via
# rouge-score
# tensorboard
accelerate==1.0.1
accelerate==1.13.0
# via peft
aenum==3.1.16
# via lightly
......@@ -240,7 +240,6 @@ filelock==3.16.1
# huggingface-hub
# ray
# torch
# transformers
# virtualenv
fiona==1.10.1
# via torchgeo
......@@ -323,7 +322,7 @@ h5py==3.13.0
# via terratorch
harfile==0.3.0
# via schemathesis
hf-xet==1.1.7
hf-xet==1.4.3
# via huggingface-hub
hiredis==3.0.0
# via tensorizer
......@@ -337,9 +336,10 @@ httpx==0.27.2
# via
# -r requirements/test.in
# diffusers
# huggingface-hub
# perceptron
# schemathesis
huggingface-hub==0.36.2
huggingface-hub==1.10.2
# via
# accelerate
# datasets
......@@ -740,7 +740,7 @@ pathvalidate==3.2.1
# via pytablewriter
patsy==1.0.1
# via statsmodels
peft==0.16.0
peft==0.18.1
# via -r requirements/test.in
perceptron==0.1.4
# via -r requirements/test.in
......@@ -963,7 +963,7 @@ referencing==0.35.1
# via
# jsonschema
# jsonschema-specifications
regex==2024.9.11
regex==2026.2.28
# via
# diffusers
# nltk
......@@ -982,7 +982,6 @@ requests==2.32.3
# google-api-core
# google-cloud-storage
# gpt-oss
# huggingface-hub
# lightly
# lm-eval
# mistral-common
......@@ -995,7 +994,6 @@ requests==2.32.3
# starlette-testclient
# tacoreader
# tiktoken
# transformers
# wandb
resampy==0.4.3
# via -r requirements/test.in
......@@ -1193,7 +1191,7 @@ timm==1.0.17
# segmentation-models-pytorch
# terratorch
# torchgeo
tokenizers==0.22.0
tokenizers==0.22.2
# via
# -r requirements/test.in
# transformers
......@@ -1269,7 +1267,7 @@ tqdm==4.67.3
# tacoreader
# terratorch
# transformers
transformers==4.57.5
transformers==5.5.3
# via
# -r requirements/test.in
# genai-perf
......@@ -1290,7 +1288,9 @@ typepy==1.3.2
typer==0.15.2
# via
# fastsafetensors
# huggingface-hub
# perceptron
# transformers
types-python-dateutil==2.9.0.20241206
# via arrow
typeshed-client==2.8.2
......
# This file was autogenerated by uv via the following command:
# uv pip compile requirements/test/xpu.in -c requirements/xpu.txt -o requirements/test/xpu.txt --index-strategy unsafe-best-match --torch-backend xpu --python-platform x86_64-manylinux_2_39 --python-version 3.12
absl-py==2.4.0
# via
# -r requirements/test/xpu.in
# rouge-score
accelerate==1.13.0
# via -r requirements/test/xpu.in
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.13.4
# via
# -c requirements/common.txt
# fsspec
# gpt-oss
# lm-eval
aiosignal==1.4.0
# via aiohttp
albumentations==1.4.6
# via -r requirements/test/xpu.in
annotated-doc==0.0.4
# via
# fastapi
# typer
annotated-types==0.7.0
# via pydantic
anyio==4.13.0
# via
# httpx
# starlette
arctic-inference==0.1.1
# via -r requirements/test/xpu.in
attrs==26.1.0
# via
# aiohttp
# jsonlines
# jsonschema
# referencing
audioread==3.0.1
# via
# -r requirements/test/xpu.in
# librosa
blobfile==3.0.0
# via -r requirements/test/xpu.in
bm25s==0.2.13
# via
# -r requirements/test/xpu.in
# mteb
bounded-pool-executor==0.0.3
# via pqdm
certifi==2026.2.25
# via
# httpcore
# httpx
# requests
cffi==2.0.0
# via soundfile
chardet==5.2.0
# via mbstrdecoder
charset-normalizer==3.4.6
# via requests
chz==0.4.0
# via gpt-oss
click==8.3.1
# via
# jiwer
# nltk
# schemathesis
# typer
# uvicorn
colorama==0.4.6
# via sacrebleu
coverage==7.13.5
# via pytest-cov
dataproperty==1.1.0
# via
# pytablewriter
# tabledata
datasets==4.8.4
# via
# evaluate
# lm-eval
# mteb
decorator==5.2.1
# via librosa
dill==0.4.1
# via
# datasets
# evaluate
# lm-eval
# multiprocess
docker==7.1.0
# via gpt-oss
docopt==0.6.2
# via num2words
dpcpp-cpp-rt==2025.3.1
# via
# onemkl-sycl-blas
# onemkl-sycl-dft
# onemkl-sycl-lapack
# onemkl-sycl-rng
# onemkl-sycl-sparse
# torch
evaluate==0.4.6
# via lm-eval
fastapi==0.135.2
# via
# -c requirements/common.txt
# gpt-oss
filelock==3.25.2
# via
# -c requirements/common.txt
# blobfile
# datasets
# huggingface-hub
# modelscope
# torch
frozenlist==1.8.0
# via
# aiohttp
# aiosignal
fsspec==2026.2.0
# via
# datasets
# evaluate
# huggingface-hub
# torch
gpt-oss==0.0.8
# via -r requirements/test/xpu.in
graphql-core==3.2.8
# via hypothesis-graphql
h11==0.16.0
# via
# httpcore
# uvicorn
harfile==0.4.0
# via schemathesis
hf-xet==1.4.3
# via huggingface-hub
html2text==2025.4.15
# via gpt-oss
httpcore==1.0.9
# via httpx
httpx==0.28.1
# via
# datasets
# huggingface-hub
# schemathesis
huggingface-hub==1.10.2
# via
# accelerate
# datasets
# evaluate
# sentence-transformers
# timm
# tokenizers
# transformers
hypothesis==6.151.10
# via
# hypothesis-graphql
# hypothesis-jsonschema
# schemathesis
hypothesis-graphql==0.12.0
# via schemathesis
hypothesis-jsonschema==0.23.1
# via schemathesis
idna==3.11
# via
# anyio
# httpx
# requests
# yarl
imageio==2.37.3
# via scikit-image
impi-rt==2021.17.0
# via
# oneccl
# torch
iniconfig==2.3.0
# via pytest
intel-cmplr-lib-rt==2025.3.1
# via
# intel-sycl-rt
# torch
intel-cmplr-lib-ur==2025.3.1
# via
# intel-openmp
# intel-sycl-rt
# torch
intel-cmplr-lic-rt==2025.3.1
# via
# intel-opencl-rt
# intel-sycl-rt
# torch
intel-opencl-rt==2025.3.1
# via
# dpcpp-cpp-rt
# onemkl-sycl-blas
# onemkl-sycl-dft
# onemkl-sycl-lapack
# onemkl-sycl-rng
# onemkl-sycl-sparse
# torch
intel-openmp==2025.3.1
# via
# dpcpp-cpp-rt
# mkl
# torch
intel-pti==0.15.0
# via torch
intel-sycl-rt==2025.3.1
# via
# dpcpp-cpp-rt
# oneccl
# torch
jinja2==3.1.6
# via
# -c requirements/xpu.txt
# lm-eval
# torch
jiwer==4.0.0
# via -r requirements/test/xpu.in
joblib==1.5.3
# via
# librosa
# nltk
# scikit-learn
jsonlines==4.0.0
# via lm-eval
jsonschema==4.26.0
# via
# hypothesis-jsonschema
# mistral-common
# schemathesis
jsonschema-rs==0.45.0
# via schemathesis
jsonschema-specifications==2025.9.1
# via jsonschema
junit-xml==1.9
# via schemathesis
lazy-loader==0.5
# via
# librosa
# scikit-image
librosa==0.10.2.post1
# via -r requirements/test/xpu.in
llvmlite==0.44.0
# via numba
lm-eval==0.4.11
# via -r requirements/test/xpu.in
lxml==6.0.2
# via
# blobfile
# gpt-oss
# sacrebleu
markdown-it-py==4.0.0
# via rich
markupsafe==3.0.3
# via
# jinja2
# werkzeug
mbstrdecoder==1.1.4
# via
# dataproperty
# pytablewriter
# typepy
mdurl==0.1.2
# via markdown-it-py
mistral-common==1.11.0
# via
# -c requirements/common.txt
# -r requirements/test/xpu.in
mkl==2025.3.0
# via
# onemkl-sycl-blas
# onemkl-sycl-dft
# onemkl-sycl-lapack
# onemkl-sycl-rng
# onemkl-sycl-sparse
# torch
modelscope==1.35.3
# via -r requirements/test/xpu.in
more-itertools==10.8.0
# via lm-eval
mpmath==1.3.0
# via sympy
msgpack==1.1.2
# via librosa
mteb==2.12.7
# via -r requirements/test/xpu.in
multidict==6.7.1
# via
# aiohttp
# yarl
multiprocess==0.70.19
# via
# datasets
# evaluate
networkx==3.6.1
# via
# scikit-image
# torch
nltk==3.9.4
# via rouge-score
num2words==0.5.14
# via -r requirements/test/xpu.in
numba==0.61.2
# via
# -c requirements/xpu.txt
# librosa
numpy==2.2.6
# via
# accelerate
# albumentations
# bm25s
# datasets
# evaluate
# imageio
# librosa
# lm-eval
# mistral-common
# mteb
# numba
# opencv-python-headless
# pandas
# pytrec-eval-terrier
# rouge-score
# sacrebleu
# scikit-image
# scikit-learn
# scipy
# sentence-transformers
# soundfile
# soxr
# tifffile
# torchvision
# transformers
oneccl==2021.17.1
# via
# oneccl-devel
# torch
oneccl-devel==2021.17.1
# via torch
onemkl-license==2025.3.0
# via
# mkl
# torch
onemkl-sycl-blas==2025.3.0
# via
# onemkl-sycl-lapack
# onemkl-sycl-sparse
# torch
onemkl-sycl-dft==2025.3.0
# via torch
onemkl-sycl-lapack==2025.3.0
# via torch
onemkl-sycl-rng==2025.3.0
# via torch
onemkl-sycl-sparse==2025.3.0
# via torch
openai-harmony==0.0.8
# via
# -c requirements/common.txt
# gpt-oss
opencv-python-headless==4.13.0.92
# via
# -c requirements/common.txt
# albumentations
# mistral-common
packaging==26.0
# via
# -c requirements/xpu.txt
# accelerate
# datasets
# evaluate
# huggingface-hub
# lazy-loader
# modelscope
# pooch
# pytest
# pytest-rerunfailures
# scikit-image
# transformers
# typepy
pandas==3.0.1
# via
# datasets
# evaluate
pathvalidate==3.3.1
# via pytablewriter
pillow==12.1.1
# via
# imageio
# mistral-common
# scikit-image
# torchvision
platformdirs==4.9.4
# via pooch
pluggy==1.6.0
# via
# pytest
# pytest-cov
polars==1.39.3
# via mteb
polars-runtime-32==1.39.3
# via polars
pooch==1.8.2
# via
# -r requirements/test/xpu.in
# librosa
portalocker==3.2.0
# via sacrebleu
pqdm==0.2.0
# via -r requirements/test/xpu.in
propcache==0.4.1
# via
# aiohttp
# yarl
psutil==7.2.2
# via accelerate
py==1.11.0
# via pytest-forked
pyarrow==23.0.1
# via datasets
pycountry==26.2.16
# via pydantic-extra-types
pycparser==3.0
# via cffi
pycryptodomex==3.23.0
# via blobfile
pydantic==2.12.5
# via
# -c requirements/common.txt
# albumentations
# fastapi
# gpt-oss
# mistral-common
# mteb
# openai-harmony
# pydantic-extra-types
pydantic-core==2.41.5
# via pydantic
pydantic-extra-types==2.11.1
# via mistral-common
pyelftools==0.32
# via triton-xpu
pygments==2.20.0
# via
# pytest
# rich
pyrate-limiter==4.1.0
# via schemathesis
pystemmer==3.0.0
# via
# -r requirements/test/xpu.in
# mteb
pytablewriter==1.2.1
# via lm-eval
pytest==9.0.2
# via
# -r requirements/test/xpu.in
# pytest-asyncio
# pytest-cov
# pytest-forked
# pytest-rerunfailures
# pytest-shard
# pytest-timeout
# schemathesis
pytest-asyncio==1.3.0
# via -r requirements/test/xpu.in
pytest-cov==6.3.0
# via -r requirements/test/xpu.in
pytest-forked==1.6.0
# via -r requirements/test/xpu.in
pytest-rerunfailures==14.0
# via -r requirements/test/xpu.in
pytest-shard==0.1.2
# via -r requirements/test/xpu.in
pytest-timeout==2.3.1
# via -r requirements/test/xpu.in
python-dateutil==2.9.0.post0
# via
# pandas
# typepy
pytrec-eval-terrier==0.5.10
# via mteb
pytz==2026.1.post1
# via typepy
pyyaml==6.0.3
# via
# accelerate
# albumentations
# datasets
# huggingface-hub
# schemathesis
# timm
# transformers
rapidfuzz==3.12.1
# via
# -r requirements/test/xpu.in
# jiwer
referencing==0.37.0
# via
# jsonschema
# jsonschema-specifications
regex==2026.3.32
# via
# nltk
# sacrebleu
# tiktoken
# transformers
requests==2.33.1
# via
# -c requirements/common.txt
# datasets
# docker
# evaluate
# gpt-oss
# lm-eval
# mistral-common
# modelscope
# mteb
# pooch
# schemathesis
# starlette-testclient
# tiktoken
rich==14.3.3
# via
# mteb
# schemathesis
# typer
rouge-score==0.1.2
# via lm-eval
rpds-py==0.30.0
# via
# jsonschema
# referencing
sacrebleu==2.6.0
# via lm-eval
safetensors==0.7.0
# via
# accelerate
# timm
# transformers
schemathesis==4.14.2
# via -r requirements/test/xpu.in
scikit-image==0.26.0
# via albumentations
scikit-learn==1.8.0
# via
# albumentations
# librosa
# lm-eval
# mteb
# sentence-transformers
scipy==1.17.1
# via
# albumentations
# bm25s
# librosa
# mteb
# pytrec-eval-terrier
# scikit-image
# scikit-learn
# sentence-transformers
sentence-transformers==5.3.0
# via mteb
setuptools==80.10.2
# via
# -c requirements/common.txt
# -c requirements/xpu.txt
# modelscope
# pytablewriter
# torch
shellingham==1.5.4
# via typer
six==1.17.0
# via
# -c requirements/common.txt
# junit-xml
# python-dateutil
# rouge-score
sortedcontainers==2.4.0
# via hypothesis
soundfile==0.13.1
# via
# -r requirements/test/xpu.in
# librosa
# mistral-common
soxr==0.5.0.post1
# via
# -r requirements/test/xpu.in
# librosa
# mistral-common
sqlitedict==2.1.0
# via lm-eval
starlette==1.0.0
# via
# fastapi
# starlette-testclient
starlette-testclient==0.4.1
# via schemathesis
structlog==25.5.0
# via gpt-oss
sympy==1.14.0
# via torch
tabledata==1.3.4
# via pytablewriter
tabulate==0.10.0
# via sacrebleu
tbb==2022.3.0
# via
# intel-opencl-rt
# mkl
# torch
tblib==3.1.0
# via -r requirements/test/xpu.in
tcmlib==1.4.1
# via
# tbb
# torch
# umf
tcolorpy==0.1.7
# via pytablewriter
tenacity==9.1.4
# via
# gpt-oss
# lm-eval
# schemathesis
termcolor==3.3.0
# via gpt-oss
threadpoolctl==3.6.0
# via scikit-learn
tifffile==2026.3.3
# via scikit-image
tiktoken==0.12.0
# via
# -c requirements/common.txt
# gpt-oss
# lm-eval
# mistral-common
timm==1.0.17
# via -r requirements/test/xpu.in
tokenizers==0.22.2
# via
# -c requirements/common.txt
# transformers
torch==2.10.0+xpu
# via
# -c requirements/xpu.txt
# accelerate
# mteb
# sentence-transformers
# timm
# torchvision
torchvision==0.25.0+xpu
# via timm
tqdm==4.67.3
# via
# datasets
# evaluate
# huggingface-hub
# lm-eval
# modelscope
# mteb
# nltk
# pqdm
# sentence-transformers
# transformers
transformers==5.5.3
# via
# -c requirements/common.txt
# sentence-transformers
triton-xpu==3.6.0
# via torch
typepy==1.3.4
# via
# dataproperty
# pytablewriter
# tabledata
typer==0.24.1
# via
# huggingface-hub
# transformers
typing-extensions==4.15.0
# via
# -c requirements/common.txt
# aiosignal
# albumentations
# anyio
# chz
# fastapi
# huggingface-hub
# librosa
# lm-eval
# mistral-common
# mteb
# pqdm
# pydantic
# pydantic-core
# pydantic-extra-types
# pytest-asyncio
# referencing
# schemathesis
# sentence-transformers
# starlette
# torch
# typing-inspection
typing-inspection==0.4.2
# via
# fastapi
# pydantic
umf==1.0.2
# via
# intel-cmplr-lib-ur
# torch
urllib3==2.6.3
# via
# blobfile
# docker
# modelscope
# requests
uvicorn==0.42.0
# via gpt-oss
werkzeug==3.1.7
# via schemathesis
word2number==1.1
# via lm-eval
xxhash==3.6.0
# via
# datasets
# evaluate
yarl==1.23.0
# via aiohttp
zstandard==0.25.0
# via lm-eval
......@@ -9,6 +9,8 @@ pytest-shard
# --- Core Tools & Bindings ---
absl-py
arctic-inference
lm_eval[api]
modelscope
# --- Audio Processing ---
librosa
......
......@@ -409,6 +409,15 @@ class HfRunner:
model_name,
trust_remote_code=trust_remote_code,
)
# HF runner should use the HF config so that it's consistent with the HF model
if self.config.__module__.startswith("vllm.transformers_utils.configs"):
from transformers.models.auto.configuration_auto import CONFIG_MAPPING
del CONFIG_MAPPING._extra_content[self.config.model_type]
self.config = AutoConfig.from_pretrained(
model_name,
trust_remote_code=trust_remote_code,
)
self.device = self.get_default_device()
self.dtype = dtype = _get_and_verify_dtype(
self.model_name,
......
......@@ -3,6 +3,7 @@
import tempfile
from collections import OrderedDict
from importlib import reload
from unittest.mock import MagicMock
import pytest
......@@ -43,6 +44,18 @@ def cleanup_fixture(should_do_global_cleanup_after_test: bool):
cleanup_dist_env_and_memory(shutdown_ray=True)
@pytest.fixture
def maybe_enable_lora_dual_stream(monkeypatch: pytest.MonkeyPatch):
if current_platform.is_cuda():
monkeypatch.setenv("VLLM_LORA_ENABLE_DUAL_STREAM", "1")
import vllm.lora.layers.base_linear
if not hasattr(vllm.lora.layers.base_linear, "lora_linear_async"):
# Reload the module to ensure the environment variable takes effect.
reload(vllm.lora.layers.base_linear)
yield
@pytest.fixture
def dist_init():
from tests.utils import ensure_current_vllm_config
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from importlib.metadata import version
import pytest
from packaging.version import Version
import vllm
from vllm.assets.image import ImageAsset
......@@ -10,6 +13,14 @@ from vllm.platforms import current_platform
from ..utils import multi_gpu_test
pytestmark = pytest.mark.skipif(
Version("5.0") <= Version(version("transformers")),
reason=(
"MiniCPMV custom processor uses tokenizer.im_start_id which is not "
"available on TokenizersBackend in transformers v5.0+"
),
)
MODEL_PATH = "openbmb/MiniCPM-Llama3-V-2_5"
PROMPT_TEMPLATE = (
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import os
import tempfile
import huggingface_hub.constants
......@@ -10,26 +9,10 @@ from huggingface_hub.utils import LocalEntryNotFoundError
from vllm.model_executor.model_loader.weight_utils import (
download_weights_from_hf,
enable_hf_transfer,
maybe_remap_kv_scale_name,
)
def test_hf_transfer_auto_activation():
if "HF_HUB_ENABLE_HF_TRANSFER" in os.environ:
# in case it is already set, we can't test the auto activation
pytest.skip("HF_HUB_ENABLE_HF_TRANSFER is set, can't test auto activation")
enable_hf_transfer()
try:
# enable hf hub transfer if available
import hf_transfer # type: ignore # noqa
HF_TRANSFER_ACTIVE = True
except ImportError:
HF_TRANSFER_ACTIVE = False
assert huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER == HF_TRANSFER_ACTIVE
def test_download_weights_from_hf():
with tempfile.TemporaryDirectory() as tmpdir:
# assert LocalEntryNotFoundError error is thrown
......@@ -178,5 +161,4 @@ class TestMaybeRemapKvScaleName:
if __name__ == "__main__":
test_hf_transfer_auto_activation()
test_download_weights_from_hf()
......@@ -143,6 +143,11 @@ def test_models(
# in parts of the operators
pytest.skip(f"Skipping '{model}' model test with AITER kernel.")
if current_platform.is_cpu() and model == "TitanML/tiny-mixtral":
# This untrained model is sensitive to the rounding error
# Fuse ops to reduce bfloat16 rounding
monkeypatch.setenv("VLLM_CPU_CI_ENV", "0")
with hf_runner(model) as hf_model:
hf_outputs = hf_model.generate_greedy_logprobs_limit(
example_prompts, max_tokens, num_logprobs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment