Update to transformers v5 (#30566)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: khluu <khluu000@gmail.com> Signed-off-by: Kevin H. Luu <khluu000@gmail.com> Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: khluu <khluu000@gmail.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: jiang1.li <jiang1.li@intel.com> (cherry picked from commit 03f8d3a5)

Update to transformers v5 (#30566)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: khluu <khluu000@gmail.com> Signed-off-by: Kevin H. Luu <khluu000@gmail.com> Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: khluu <khluu000@gmail.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: jiang1.li <jiang1.li@intel.com> (cherry picked from commit 03f8d3a5)
459d9b38 · Harry Mellor · khluu · b1568cf4 · 459d9b38 · 459d9b38
Commit 459d9b38 authored Apr 16, 2026 by Harry Mellor Committed by khluu Apr 16, 2026
20 changed files
--- a/.buildkite/scripts/hardware_ci/run-cpu-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-cpu-test.sh
@@ -16,5 +16,5 @@ echo "--- :docker: Building Docker image"
 docker build --progress plain --tag "$IMAGE_NAME" --target vllm-test -f docker/Dockerfile.cpu .
 # Run the image, setting --shm-size=4g for tensor parallel.
-docker run --rm --cpuset-cpus="$CORE_RANGE" --cpuset-mems="$NUMA_NODE" -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN -e VLLM_CPU_KVCACHE_SPACE=16 -e VLLM_CPU_CI_ENV=1 -e VLLM_CPU_SIM_MULTI_NUMA=1 --shm-size=4g "$IMAGE_NAME" \
+docker run --rm --cpuset-cpus="$CORE_RANGE" --cpuset-mems="$NUMA_NODE" -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN -e VLLM_CPU_KVCACHE_SPACE=16 -e VLLM_CPU_CI_ENV=1 -e VLLM_CPU_SIM_MULTI_NUMA=1 -e VLLM_CPU_ATTN_SPLIT_KV=0 --shm-size=4g "$IMAGE_NAME" \
        timeout "$TIMEOUT_VAL" bash -c "set -euox pipefail; echo \"--- Print packages\"; pip list; echo \"--- Running tests\"; ${TEST_COMMAND}"
--- a/.buildkite/test_areas/models_basic.yaml
+++ b/.buildkite/test_areas/models_basic.yaml
@@ -69,3 +69,18 @@ steps:
    - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
    # Whisper needs spawn method to avoid deadlock
    - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
+- label: Transformers Backward Compatibility Models Test
+  working_dir: "/vllm-workspace/"
+  optional: true
+  soft_fail: true
+  commands:
+    - pip install transformers==4.57.5
+    - pytest -v -s tests/models/test_initialization.py
+    - pytest -v -s tests/models/test_transformers.py
+    - pytest -v -s tests/models/multimodal/processing/
+    - pytest -v -s tests/models/multimodal/test_mapping.py
+    - python3 examples/offline_inference/basic/chat.py
+    - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
+    # Whisper needs spawn method to avoid deadlock
+    - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -649,7 +649,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
    else \
        BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \
    fi; \
-    uv pip install --system accelerate hf_transfer modelscope \
+    uv pip install --system accelerate modelscope \
        "bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs,azure]${RUNAI_MODEL_STREAMER_VERSION}"
 # ============================================================
@@ -772,9 +772,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install --system -e tests/vllm_test_utils
 # enable fast downloads from hf (for testing)
-RUN --mount=type=cache,target=/root/.cache/uv \
+ENV HF_XET_HIGH_PERFORMANCE 1
-    uv pip install --system hf_transfer
-ENV HF_HUB_ENABLE_HF_TRANSFER 1
+# increase timeout for hf downloads (for testing)
+ENV HF_HUB_DOWNLOAD_TIMEOUT 60
 # Copy in the v1 package for testing (it isn't distributed yet)
 COPY vllm/v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1

--- a/docker/Dockerfile.cpu
+++ b/docker/Dockerfile.cpu
@@ -195,6 +195,12 @@ ADD ./.buildkite/ ./.buildkite/
 RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install -e tests/vllm_test_utils
+# enable fast downloads from hf (for testing)
+ENV HF_XET_HIGH_PERFORMANCE 1
+# increase timeout for hf downloads (for testing)
+ENV HF_HUB_DOWNLOAD_TIMEOUT 60
 ######################### RELEASE IMAGE #########################
 FROM base AS vllm-openai

--- a/docker/Dockerfile.nightly_torch
+++ b/docker/Dockerfile.nightly_torch
@@ -269,9 +269,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install --system -e tests/vllm_test_utils
 # enable fast downloads from hf (for testing)
-RUN --mount=type=cache,target=/root/.cache/uv \
+ENV HF_XET_HIGH_PERFORMANCE 1
-    uv pip install --system hf_transfer
-ENV HF_HUB_ENABLE_HF_TRANSFER 1
+# increase timeout for hf downloads (for testing)
+ENV HF_HUB_DOWNLOAD_TIMEOUT 60
 RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install --system -r requirements/nightly_torch_test.txt

--- a/docker/Dockerfile.rocm
+++ b/docker/Dockerfile.rocm
@@ -364,9 +364,10 @@ RUN cd /vllm-workspace \
    && python3 -m pip install pytest-shard
 # enable fast downloads from hf (for testing)
-RUN --mount=type=cache,target=/root/.cache/uv \
+ENV HF_XET_HIGH_PERFORMANCE=1
-    uv pip install --system hf_transfer
-ENV HF_HUB_ENABLE_HF_TRANSFER=1
+# increase timeout for hf downloads (for testing)
+ENV HF_HUB_DOWNLOAD_TIMEOUT 60
 # install audio decode package `torchcodec` from source (required due to 
 # ROCm and torch version mismatch) for tests with datasets package

--- a/docs/getting_started/installation/gpu.rocm.inc.md
+++ b/docs/getting_started/installation/gpu.rocm.inc.md
@@ -147,7 +147,7 @@ uv pip install vllm --extra-index-url https://wheels.vllm.ai/rocm/0.15.0/rocm700
        # Install dependencies
        pip install --upgrade numba \
            scipy \
-            huggingface-hub[cli,hf_transfer] \
+            huggingface-hub[cli] \
            setuptools_scm
        pip install -r requirements/rocm.txt

--- a/requirements/common.txt
+++ b/requirements/common.txt
@@ -7,7 +7,7 @@ requests >= 2.26.0
 tqdm
 blake3
 py-cpuinfo
-transformers >= 4.56.0, < 5
+transformers >= 4.56.0, != 5.0.*, != 5.1.*, != 5.2.*, != 5.3.*, != 5.4.*, != 5.5.0
 tokenizers >= 0.21.1  # Required for fast incremental detokenization.
 protobuf >= 5.29.6, !=6.30.*, !=6.31.*, !=6.32.*, !=6.33.0.*, !=6.33.1.*, !=6.33.2.*, !=6.33.3.*, !=6.33.4.* # Required by LlamaTokenizer, gRPC. CVE-2026-0994
 fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
@@ -37,7 +37,7 @@ pyyaml
 six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
 setuptools>=77.0.3,<81.0.0; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
 einops # Required for Qwen2-VL.
-compressed-tensors == 0.14.0.1 # required for compressed-tensors
+compressed-tensors == 0.15.0.1 # required for compressed-tensors
 depyf==0.20.0 # required for profiling and debugging with compilation config
 cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
 watchfiles # required for http server to monitor the updates of TLS files

--- a/requirements/nightly_torch_test.txt
+++ b/requirements/nightly_torch_test.txt
@@ -29,8 +29,8 @@ opencv-python-headless >= 4.13.0 # required for video test
 datamodel_code_generator # required for minicpm3 test
 lm-eval[api]>=0.4.11 # required for model evaluation test
 mteb[bm25s]>=2, <3 # required for mteb test
-transformers==4.57.5
+transformers==5.5.3
-tokenizers==0.22.0
+tokenizers==0.22.2
 schemathesis>=3.39.15 # Required for openai schema test.
 # quantization
 bitsandbytes>=0.49.2

--- a/requirements/rocm-test.in
+++ b/requirements/rocm-test.in
@@ -36,8 +36,8 @@ opencv-python-headless>=4.13.0 # required for video test
 datamodel_code_generator # required for minicpm3 test
 lm-eval[api]>=0.4.11 # required for model evaluation test
 mteb[bm25s]>=2, <3 # required for mteb test
-transformers==4.57.5
+transformers==5.5.3
-tokenizers==0.22.0
+tokenizers==0.22.2
 schemathesis>=3.39.15 # Required for openai schema test
 # quantization
 bitsandbytes==0.49.2
@@ -80,4 +80,3 @@ plotly # required for perf comparison html report
 rapidfuzz
 torchgeo==0.7.0
 multiprocess==0.70.16
-huggingface-hub==0.36.2
--- a/requirements/rocm-test.txt
+++ b/requirements/rocm-test.txt
@@ -232,7 +232,6 @@ filelock==3.25.2
    #   python-discovery
    #   ray
    #   torch
-    #   transformers
    #   virtualenv
 fiona==1.10.1
    # via torchgeo
@@ -318,7 +317,7 @@ h5py==3.16.0
    # via terratorch
 harfile==0.4.0
    # via schemathesis
-hf-xet==1.4.2
+hf-xet==1.4.3
    # via huggingface-hub
 hiredis==3.3.1
    # via tensorizer
@@ -332,11 +331,11 @@ httpx==0.27.2
    # via
    #   -r requirements/rocm-test.in
    #   diffusers
+    #   huggingface-hub
    #   perceptron
    #   schemathesis
-huggingface-hub==0.36.2
+huggingface-hub==1.10.2
    # via
-    #   -r requirements/rocm-test.in
    #   accelerate
    #   datasets
    #   diffusers
@@ -970,7 +969,6 @@ requests==2.32.5
    #   google-api-core
    #   google-cloud-storage
    #   gpt-oss
-    #   huggingface-hub
    #   lightly
    #   lm-eval
    #   mistral-common
@@ -983,7 +981,6 @@ requests==2.32.5
    #   starlette-testclient
    #   tacoreader
    #   tiktoken
-    #   transformers
    #   wandb
 resampy==0.4.3
    # via -r requirements/rocm-test.in
@@ -1191,7 +1188,7 @@ timm==1.0.17
    #   segmentation-models-pytorch
    #   terratorch
    #   torchgeo
-tokenizers==0.22.0
+tokenizers==0.22.2
    # via
    #   -c requirements/common.txt
    #   -r requirements/rocm-test.in
@@ -1230,7 +1227,7 @@ tqdm==4.67.3
    #   tacoreader
    #   terratorch
    #   transformers
-transformers==4.57.5
+transformers==5.5.3
    # via
    #   -c requirements/common.txt
    #   -r requirements/rocm-test.in
@@ -1252,7 +1249,9 @@ typepy==1.3.4
 typer==0.24.1
    # via
    #   fastsafetensors
+    #   huggingface-hub
    #   perceptron
+    #   transformers
 typeshed-client==2.9.0
    # via jsonargparse
 typing-extensions==4.15.0

--- a/requirements/test.in
+++ b/requirements/test.in
@@ -18,7 +18,7 @@ httpx
 librosa # required for audio tests
 vector_quantize_pytorch # required for minicpmo_26 test
 vocos # required for minicpmo_26 test
-peft>=0.15.0 # required for phi-4-mm test
+peft>=0.18.1 # required for phi-4-mm test
 pqdm
 ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests
 resampy # required for audio tests
@@ -39,8 +39,8 @@ opencv-python-headless >= 4.13.0 # required for video test
 datamodel_code_generator # required for minicpm3 test
 lm-eval[api]>=0.4.11 # required for model evaluation test
 mteb[bm25s]>=2, <3 # required for mteb test
-transformers==4.57.5
+transformers==5.5.3
-tokenizers==0.22.0
+tokenizers==0.22.2
 schemathesis>=3.39.15 # Required for openai schema test.
 # quantization
 bitsandbytes==0.49.2

--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -4,7 +4,7 @@ absl-py==2.1.0
    # via
    #   rouge-score
    #   tensorboard
-accelerate==1.0.1
+accelerate==1.13.0
    # via peft
 aenum==3.1.16
    # via lightly
@@ -240,7 +240,6 @@ filelock==3.16.1
    #   huggingface-hub
    #   ray
    #   torch
-    #   transformers
    #   virtualenv
 fiona==1.10.1
    # via torchgeo
@@ -323,7 +322,7 @@ h5py==3.13.0
    # via terratorch
 harfile==0.3.0
    # via schemathesis
-hf-xet==1.1.7
+hf-xet==1.4.3
    # via huggingface-hub
 hiredis==3.0.0
    # via tensorizer
@@ -337,9 +336,10 @@ httpx==0.27.2
    # via
    #   -r requirements/test.in
    #   diffusers
+    #   huggingface-hub
    #   perceptron
    #   schemathesis
-huggingface-hub==0.36.2
+huggingface-hub==1.10.2
    # via
    #   accelerate
    #   datasets
@@ -740,7 +740,7 @@ pathvalidate==3.2.1
    # via pytablewriter
 patsy==1.0.1
    # via statsmodels
-peft==0.16.0
+peft==0.18.1
    # via -r requirements/test.in
 perceptron==0.1.4
    # via -r requirements/test.in
@@ -963,7 +963,7 @@ referencing==0.35.1
    # via
    #   jsonschema
    #   jsonschema-specifications
-regex==2024.9.11
+regex==2026.2.28
    # via
    #   diffusers
    #   nltk
@@ -982,7 +982,6 @@ requests==2.32.3
    #   google-api-core
    #   google-cloud-storage
    #   gpt-oss
-    #   huggingface-hub
    #   lightly
    #   lm-eval
    #   mistral-common
@@ -995,7 +994,6 @@ requests==2.32.3
    #   starlette-testclient
    #   tacoreader
    #   tiktoken
-    #   transformers
    #   wandb
 resampy==0.4.3
    # via -r requirements/test.in
@@ -1193,7 +1191,7 @@ timm==1.0.17
    #   segmentation-models-pytorch
    #   terratorch
    #   torchgeo
-tokenizers==0.22.0
+tokenizers==0.22.2
    # via
    #   -r requirements/test.in
    #   transformers
@@ -1269,7 +1267,7 @@ tqdm==4.67.3
    #   tacoreader
    #   terratorch
    #   transformers
-transformers==4.57.5
+transformers==5.5.3
    # via
    #   -r requirements/test.in
    #   genai-perf
@@ -1290,7 +1288,9 @@ typepy==1.3.2
 typer==0.15.2
    # via
    #   fastsafetensors
+    #   huggingface-hub
    #   perceptron
+    #   transformers
 types-python-dateutil==2.9.0.20241206
    # via arrow
 typeshed-client==2.8.2

--- a/requirements/test/xpu.txt
+++ b/requirements/test/xpu.txt
+# This file was autogenerated by uv via the following command:
+#    uv pip compile requirements/test/xpu.in -c requirements/xpu.txt -o requirements/test/xpu.txt --index-strategy unsafe-best-match --torch-backend xpu --python-platform x86_64-manylinux_2_39 --python-version 3.12
+absl-py==2.4.0
+    # via
+    #   -r requirements/test/xpu.in
+    #   rouge-score
+accelerate==1.13.0
+    # via -r requirements/test/xpu.in
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.13.4
+    # via
+    #   -c requirements/common.txt
+    #   fsspec
+    #   gpt-oss
+    #   lm-eval
+aiosignal==1.4.0
+    # via aiohttp
+albumentations==1.4.6
+    # via -r requirements/test/xpu.in
+annotated-doc==0.0.4
+    # via
+    #   fastapi
+    #   typer
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.13.0
+    # via
+    #   httpx
+    #   starlette
+arctic-inference==0.1.1
+    # via -r requirements/test/xpu.in
+attrs==26.1.0
+    # via
+    #   aiohttp
+    #   jsonlines
+    #   jsonschema
+    #   referencing
+audioread==3.0.1
+    # via
+    #   -r requirements/test/xpu.in
+    #   librosa
+blobfile==3.0.0
+    # via -r requirements/test/xpu.in
+bm25s==0.2.13
+    # via
+    #   -r requirements/test/xpu.in
+    #   mteb
+bounded-pool-executor==0.0.3
+    # via pqdm
+certifi==2026.2.25
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==2.0.0
+    # via soundfile
+chardet==5.2.0
+    # via mbstrdecoder
+charset-normalizer==3.4.6
+    # via requests
+chz==0.4.0
+    # via gpt-oss
+click==8.3.1
+    # via
+    #   jiwer
+    #   nltk
+    #   schemathesis
+    #   typer
+    #   uvicorn
+colorama==0.4.6
+    # via sacrebleu
+coverage==7.13.5
+    # via pytest-cov
+dataproperty==1.1.0
+    # via
+    #   pytablewriter
+    #   tabledata
+datasets==4.8.4
+    # via
+    #   evaluate
+    #   lm-eval
+    #   mteb
+decorator==5.2.1
+    # via librosa
+dill==0.4.1
+    # via
+    #   datasets
+    #   evaluate
+    #   lm-eval
+    #   multiprocess
+docker==7.1.0
+    # via gpt-oss
+docopt==0.6.2
+    # via num2words
+dpcpp-cpp-rt==2025.3.1
+    # via
+    #   onemkl-sycl-blas
+    #   onemkl-sycl-dft
+    #   onemkl-sycl-lapack
+    #   onemkl-sycl-rng
+    #   onemkl-sycl-sparse
+    #   torch
+evaluate==0.4.6
+    # via lm-eval
+fastapi==0.135.2
+    # via
+    #   -c requirements/common.txt
+    #   gpt-oss
+filelock==3.25.2
+    # via
+    #   -c requirements/common.txt
+    #   blobfile
+    #   datasets
+    #   huggingface-hub
+    #   modelscope
+    #   torch
+frozenlist==1.8.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2026.2.0
+    # via
+    #   datasets
+    #   evaluate
+    #   huggingface-hub
+    #   torch
+gpt-oss==0.0.8
+    # via -r requirements/test/xpu.in
+graphql-core==3.2.8
+    # via hypothesis-graphql
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+harfile==0.4.0
+    # via schemathesis
+hf-xet==1.4.3
+    # via huggingface-hub
+html2text==2025.4.15
+    # via gpt-oss
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   schemathesis
+huggingface-hub==1.10.2
+    # via
+    #   accelerate
+    #   datasets
+    #   evaluate
+    #   sentence-transformers
+    #   timm
+    #   tokenizers
+    #   transformers
+hypothesis==6.151.10
+    # via
+    #   hypothesis-graphql
+    #   hypothesis-jsonschema
+    #   schemathesis
+hypothesis-graphql==0.12.0
+    # via schemathesis
+hypothesis-jsonschema==0.23.1
+    # via schemathesis
+idna==3.11
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+imageio==2.37.3
+    # via scikit-image
+impi-rt==2021.17.0
+    # via
+    #   oneccl
+    #   torch
+iniconfig==2.3.0
+    # via pytest
+intel-cmplr-lib-rt==2025.3.1
+    # via
+    #   intel-sycl-rt
+    #   torch
+intel-cmplr-lib-ur==2025.3.1
+    # via
+    #   intel-openmp
+    #   intel-sycl-rt
+    #   torch
+intel-cmplr-lic-rt==2025.3.1
+    # via
+    #   intel-opencl-rt
+    #   intel-sycl-rt
+    #   torch
+intel-opencl-rt==2025.3.1
+    # via
+    #   dpcpp-cpp-rt
+    #   onemkl-sycl-blas
+    #   onemkl-sycl-dft
+    #   onemkl-sycl-lapack
+    #   onemkl-sycl-rng
+    #   onemkl-sycl-sparse
+    #   torch
+intel-openmp==2025.3.1
+    # via
+    #   dpcpp-cpp-rt
+    #   mkl
+    #   torch
+intel-pti==0.15.0
+    # via torch
+intel-sycl-rt==2025.3.1
+    # via
+    #   dpcpp-cpp-rt
+    #   oneccl
+    #   torch
+jinja2==3.1.6
+    # via
+    #   -c requirements/xpu.txt
+    #   lm-eval
+    #   torch
+jiwer==4.0.0
+    # via -r requirements/test/xpu.in
+joblib==1.5.3
+    # via
+    #   librosa
+    #   nltk
+    #   scikit-learn
+jsonlines==4.0.0
+    # via lm-eval
+jsonschema==4.26.0
+    # via
+    #   hypothesis-jsonschema
+    #   mistral-common
+    #   schemathesis
+jsonschema-rs==0.45.0
+    # via schemathesis
+jsonschema-specifications==2025.9.1
+    # via jsonschema
+junit-xml==1.9
+    # via schemathesis
+lazy-loader==0.5
+    # via
+    #   librosa
+    #   scikit-image
+librosa==0.10.2.post1
+    # via -r requirements/test/xpu.in
+llvmlite==0.44.0
+    # via numba
+lm-eval==0.4.11
+    # via -r requirements/test/xpu.in
+lxml==6.0.2
+    # via
+    #   blobfile
+    #   gpt-oss
+    #   sacrebleu
+markdown-it-py==4.0.0
+    # via rich
+markupsafe==3.0.3
+    # via
+    #   jinja2
+    #   werkzeug
+mbstrdecoder==1.1.4
+    # via
+    #   dataproperty
+    #   pytablewriter
+    #   typepy
+mdurl==0.1.2
+    # via markdown-it-py
+mistral-common==1.11.0
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/xpu.in
+mkl==2025.3.0
+    # via
+    #   onemkl-sycl-blas
+    #   onemkl-sycl-dft
+    #   onemkl-sycl-lapack
+    #   onemkl-sycl-rng
+    #   onemkl-sycl-sparse
+    #   torch
+modelscope==1.35.3
+    # via -r requirements/test/xpu.in
+more-itertools==10.8.0
+    # via lm-eval
+mpmath==1.3.0
+    # via sympy
+msgpack==1.1.2
+    # via librosa
+mteb==2.12.7
+    # via -r requirements/test/xpu.in
+multidict==6.7.1
+    # via
+    #   aiohttp
+    #   yarl
+multiprocess==0.70.19
+    # via
+    #   datasets
+    #   evaluate
+networkx==3.6.1
+    # via
+    #   scikit-image
+    #   torch
+nltk==3.9.4
+    # via rouge-score
+num2words==0.5.14
+    # via -r requirements/test/xpu.in
+numba==0.61.2
+    # via
+    #   -c requirements/xpu.txt
+    #   librosa
+numpy==2.2.6
+    # via
+    #   accelerate
+    #   albumentations
+    #   bm25s
+    #   datasets
+    #   evaluate
+    #   imageio
+    #   librosa
+    #   lm-eval
+    #   mistral-common
+    #   mteb
+    #   numba
+    #   opencv-python-headless
+    #   pandas
+    #   pytrec-eval-terrier
+    #   rouge-score
+    #   sacrebleu
+    #   scikit-image
+    #   scikit-learn
+    #   scipy
+    #   sentence-transformers
+    #   soundfile
+    #   soxr
+    #   tifffile
+    #   torchvision
+    #   transformers
+oneccl==2021.17.1
+    # via
+    #   oneccl-devel
+    #   torch
+oneccl-devel==2021.17.1
+    # via torch
+onemkl-license==2025.3.0
+    # via
+    #   mkl
+    #   torch
+onemkl-sycl-blas==2025.3.0
+    # via
+    #   onemkl-sycl-lapack
+    #   onemkl-sycl-sparse
+    #   torch
+onemkl-sycl-dft==2025.3.0
+    # via torch
+onemkl-sycl-lapack==2025.3.0
+    # via torch
+onemkl-sycl-rng==2025.3.0
+    # via torch
+onemkl-sycl-sparse==2025.3.0
+    # via torch
+openai-harmony==0.0.8
+    # via
+    #   -c requirements/common.txt
+    #   gpt-oss
+opencv-python-headless==4.13.0.92
+    # via
+    #   -c requirements/common.txt
+    #   albumentations
+    #   mistral-common
+packaging==26.0
+    # via
+    #   -c requirements/xpu.txt
+    #   accelerate
+    #   datasets
+    #   evaluate
+    #   huggingface-hub
+    #   lazy-loader
+    #   modelscope
+    #   pooch
+    #   pytest
+    #   pytest-rerunfailures
+    #   scikit-image
+    #   transformers
+    #   typepy
+pandas==3.0.1
+    # via
+    #   datasets
+    #   evaluate
+pathvalidate==3.3.1
+    # via pytablewriter
+pillow==12.1.1
+    # via
+    #   imageio
+    #   mistral-common
+    #   scikit-image
+    #   torchvision
+platformdirs==4.9.4
+    # via pooch
+pluggy==1.6.0
+    # via
+    #   pytest
+    #   pytest-cov
+polars==1.39.3
+    # via mteb
+polars-runtime-32==1.39.3
+    # via polars
+pooch==1.8.2
+    # via
+    #   -r requirements/test/xpu.in
+    #   librosa
+portalocker==3.2.0
+    # via sacrebleu
+pqdm==0.2.0
+    # via -r requirements/test/xpu.in
+propcache==0.4.1
+    # via
+    #   aiohttp
+    #   yarl
+psutil==7.2.2
+    # via accelerate
+py==1.11.0
+    # via pytest-forked
+pyarrow==23.0.1
+    # via datasets
+pycountry==26.2.16
+    # via pydantic-extra-types
+pycparser==3.0
+    # via cffi
+pycryptodomex==3.23.0
+    # via blobfile
+pydantic==2.12.5
+    # via
+    #   -c requirements/common.txt
+    #   albumentations
+    #   fastapi
+    #   gpt-oss
+    #   mistral-common
+    #   mteb
+    #   openai-harmony
+    #   pydantic-extra-types
+pydantic-core==2.41.5
+    # via pydantic
+pydantic-extra-types==2.11.1
+    # via mistral-common
+pyelftools==0.32
+    # via triton-xpu
+pygments==2.20.0
+    # via
+    #   pytest
+    #   rich
+pyrate-limiter==4.1.0
+    # via schemathesis
+pystemmer==3.0.0
+    # via
+    #   -r requirements/test/xpu.in
+    #   mteb
+pytablewriter==1.2.1
+    # via lm-eval
+pytest==9.0.2
+    # via
+    #   -r requirements/test/xpu.in
+    #   pytest-asyncio
+    #   pytest-cov
+    #   pytest-forked
+    #   pytest-rerunfailures
+    #   pytest-shard
+    #   pytest-timeout
+    #   schemathesis
+pytest-asyncio==1.3.0
+    # via -r requirements/test/xpu.in
+pytest-cov==6.3.0
+    # via -r requirements/test/xpu.in
+pytest-forked==1.6.0
+    # via -r requirements/test/xpu.in
+pytest-rerunfailures==14.0
+    # via -r requirements/test/xpu.in
+pytest-shard==0.1.2
+    # via -r requirements/test/xpu.in
+pytest-timeout==2.3.1
+    # via -r requirements/test/xpu.in
+python-dateutil==2.9.0.post0
+    # via
+    #   pandas
+    #   typepy
+pytrec-eval-terrier==0.5.10
+    # via mteb
+pytz==2026.1.post1
+    # via typepy
+pyyaml==6.0.3
+    # via
+    #   accelerate
+    #   albumentations
+    #   datasets
+    #   huggingface-hub
+    #   schemathesis
+    #   timm
+    #   transformers
+rapidfuzz==3.12.1
+    # via
+    #   -r requirements/test/xpu.in
+    #   jiwer
+referencing==0.37.0
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+regex==2026.3.32
+    # via
+    #   nltk
+    #   sacrebleu
+    #   tiktoken
+    #   transformers
+requests==2.33.1
+    # via
+    #   -c requirements/common.txt
+    #   datasets
+    #   docker
+    #   evaluate
+    #   gpt-oss
+    #   lm-eval
+    #   mistral-common
+    #   modelscope
+    #   mteb
+    #   pooch
+    #   schemathesis
+    #   starlette-testclient
+    #   tiktoken
+rich==14.3.3
+    # via
+    #   mteb
+    #   schemathesis
+    #   typer
+rouge-score==0.1.2
+    # via lm-eval
+rpds-py==0.30.0
+    # via
+    #   jsonschema
+    #   referencing
+sacrebleu==2.6.0
+    # via lm-eval
+safetensors==0.7.0
+    # via
+    #   accelerate
+    #   timm
+    #   transformers
+schemathesis==4.14.2
+    # via -r requirements/test/xpu.in
+scikit-image==0.26.0
+    # via albumentations
+scikit-learn==1.8.0
+    # via
+    #   albumentations
+    #   librosa
+    #   lm-eval
+    #   mteb
+    #   sentence-transformers
+scipy==1.17.1
+    # via
+    #   albumentations
+    #   bm25s
+    #   librosa
+    #   mteb
+    #   pytrec-eval-terrier
+    #   scikit-image
+    #   scikit-learn
+    #   sentence-transformers
+sentence-transformers==5.3.0
+    # via mteb
+setuptools==80.10.2
+    # via
+    #   -c requirements/common.txt
+    #   -c requirements/xpu.txt
+    #   modelscope
+    #   pytablewriter
+    #   torch
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via
+    #   -c requirements/common.txt
+    #   junit-xml
+    #   python-dateutil
+    #   rouge-score
+sortedcontainers==2.4.0
+    # via hypothesis
+soundfile==0.13.1
+    # via
+    #   -r requirements/test/xpu.in
+    #   librosa
+    #   mistral-common
+soxr==0.5.0.post1
+    # via
+    #   -r requirements/test/xpu.in
+    #   librosa
+    #   mistral-common
+sqlitedict==2.1.0
+    # via lm-eval
+starlette==1.0.0
+    # via
+    #   fastapi
+    #   starlette-testclient
+starlette-testclient==0.4.1
+    # via schemathesis
+structlog==25.5.0
+    # via gpt-oss
+sympy==1.14.0
+    # via torch
+tabledata==1.3.4
+    # via pytablewriter
+tabulate==0.10.0
+    # via sacrebleu
+tbb==2022.3.0
+    # via
+    #   intel-opencl-rt
+    #   mkl
+    #   torch
+tblib==3.1.0
+    # via -r requirements/test/xpu.in
+tcmlib==1.4.1
+    # via
+    #   tbb
+    #   torch
+    #   umf
+tcolorpy==0.1.7
+    # via pytablewriter
+tenacity==9.1.4
+    # via
+    #   gpt-oss
+    #   lm-eval
+    #   schemathesis
+termcolor==3.3.0
+    # via gpt-oss
+threadpoolctl==3.6.0
+    # via scikit-learn
+tifffile==2026.3.3
+    # via scikit-image
+tiktoken==0.12.0
+    # via
+    #   -c requirements/common.txt
+    #   gpt-oss
+    #   lm-eval
+    #   mistral-common
+timm==1.0.17
+    # via -r requirements/test/xpu.in
+tokenizers==0.22.2
+    # via
+    #   -c requirements/common.txt
+    #   transformers
+torch==2.10.0+xpu
+    # via
+    #   -c requirements/xpu.txt
+    #   accelerate
+    #   mteb
+    #   sentence-transformers
+    #   timm
+    #   torchvision
+torchvision==0.25.0+xpu
+    # via timm
+tqdm==4.67.3
+    # via
+    #   datasets
+    #   evaluate
+    #   huggingface-hub
+    #   lm-eval
+    #   modelscope
+    #   mteb
+    #   nltk
+    #   pqdm
+    #   sentence-transformers
+    #   transformers
+transformers==5.5.3
+    # via
+    #   -c requirements/common.txt
+    #   sentence-transformers
+triton-xpu==3.6.0
+    # via torch
+typepy==1.3.4
+    # via
+    #   dataproperty
+    #   pytablewriter
+    #   tabledata
+typer==0.24.1
+    # via
+    #   huggingface-hub
+    #   transformers
+typing-extensions==4.15.0
+    # via
+    #   -c requirements/common.txt
+    #   aiosignal
+    #   albumentations
+    #   anyio
+    #   chz
+    #   fastapi
+    #   huggingface-hub
+    #   librosa
+    #   lm-eval
+    #   mistral-common
+    #   mteb
+    #   pqdm
+    #   pydantic
+    #   pydantic-core
+    #   pydantic-extra-types
+    #   pytest-asyncio
+    #   referencing
+    #   schemathesis
+    #   sentence-transformers
+    #   starlette
+    #   torch
+    #   typing-inspection
+typing-inspection==0.4.2
+    # via
+    #   fastapi
+    #   pydantic
+umf==1.0.2
+    # via
+    #   intel-cmplr-lib-ur
+    #   torch
+urllib3==2.6.3
+    # via
+    #   blobfile
+    #   docker
+    #   modelscope
+    #   requests
+uvicorn==0.42.0
+    # via gpt-oss
+werkzeug==3.1.7
+    # via schemathesis
+word2number==1.1
+    # via lm-eval
+xxhash==3.6.0
+    # via
+    #   datasets
+    #   evaluate
+yarl==1.23.0
+    # via aiohttp
+zstandard==0.25.0
+    # via lm-eval
--- a/requirements/xpu-test.in
+++ b/requirements/xpu-test.in
@@ -9,6 +9,8 @@ pytest-shard
 # --- Core Tools & Bindings ---
 absl-py
 arctic-inference
+lm_eval[api]
+modelscope
 # --- Audio Processing ---
 librosa

--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -409,6 +409,15 @@ class HfRunner:
            model_name,
            trust_remote_code=trust_remote_code,
        )
+        # HF runner should use the HF config so that it's consistent with the HF model
+        if self.config.__module__.startswith("vllm.transformers_utils.configs"):
+            from transformers.models.auto.configuration_auto import CONFIG_MAPPING
+            del CONFIG_MAPPING._extra_content[self.config.model_type]
+            self.config = AutoConfig.from_pretrained(
+                model_name,
+                trust_remote_code=trust_remote_code,
+            )
        self.device = self.get_default_device()
        self.dtype = dtype = _get_and_verify_dtype(
            self.model_name,

--- a/tests/lora/conftest.py
+++ b/tests/lora/conftest.py
@@ -3,6 +3,7 @@
 import tempfile
 from collections import OrderedDict
+from importlib import reload
 from unittest.mock import MagicMock
 import pytest
@@ -43,6 +44,18 @@ def cleanup_fixture(should_do_global_cleanup_after_test: bool):
        cleanup_dist_env_and_memory(shutdown_ray=True)
+@pytest.fixture
+def maybe_enable_lora_dual_stream(monkeypatch: pytest.MonkeyPatch):
+    if current_platform.is_cuda():
+        monkeypatch.setenv("VLLM_LORA_ENABLE_DUAL_STREAM", "1")
+        import vllm.lora.layers.base_linear
+        if not hasattr(vllm.lora.layers.base_linear, "lora_linear_async"):
+            # Reload the module to ensure the environment variable takes effect.
+            reload(vllm.lora.layers.base_linear)
+    yield
 @pytest.fixture
 def dist_init():
    from tests.utils import ensure_current_vllm_config

--- a/tests/lora/test_minicpmv_tp.py
+++ b/tests/lora/test_minicpmv_tp.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from importlib.metadata import version
 import pytest
+from packaging.version import Version
 import vllm
 from vllm.assets.image import ImageAsset
@@ -10,6 +13,14 @@ from vllm.platforms import current_platform
 from ..utils import multi_gpu_test
+pytestmark = pytest.mark.skipif(
+    Version("5.0") <= Version(version("transformers")),
+    reason=(
+        "MiniCPMV custom processor uses tokenizer.im_start_id which is not "
+        "available on TokenizersBackend in transformers v5.0+"
+    ),
+)
 MODEL_PATH = "openbmb/MiniCPM-Llama3-V-2_5"
 PROMPT_TEMPLATE = (

--- a/tests/model_executor/test_weight_utils.py
+++ b/tests/model_executor/test_weight_utils.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import os
 import tempfile
 import huggingface_hub.constants
@@ -10,26 +9,10 @@ from huggingface_hub.utils import LocalEntryNotFoundError
 from vllm.model_executor.model_loader.weight_utils import (
    download_weights_from_hf,
-    enable_hf_transfer,
    maybe_remap_kv_scale_name,
 )
-def test_hf_transfer_auto_activation():
-    if "HF_HUB_ENABLE_HF_TRANSFER" in os.environ:
-        # in case it is already set, we can't test the auto activation
-        pytest.skip("HF_HUB_ENABLE_HF_TRANSFER is set, can't test auto activation")
-    enable_hf_transfer()
-    try:
-        # enable hf hub transfer if available
-        import hf_transfer  # type: ignore # noqa
-        HF_TRANSFER_ACTIVE = True
-    except ImportError:
-        HF_TRANSFER_ACTIVE = False
-    assert huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER == HF_TRANSFER_ACTIVE
 def test_download_weights_from_hf():
    with tempfile.TemporaryDirectory() as tmpdir:
        # assert LocalEntryNotFoundError error is thrown
@@ -178,5 +161,4 @@ class TestMaybeRemapKvScaleName:
 if __name__ == "__main__":
-    test_hf_transfer_auto_activation()
    test_download_weights_from_hf()
--- a/tests/models/language/generation/test_common.py
+++ b/tests/models/language/generation/test_common.py
@@ -143,6 +143,11 @@ def test_models(
        # in parts of the operators
        pytest.skip(f"Skipping '{model}' model test with AITER kernel.")
+    if current_platform.is_cpu() and model == "TitanML/tiny-mixtral":
+        # This untrained model is sensitive to the rounding error
+        # Fuse ops to reduce bfloat16 rounding
+        monkeypatch.setenv("VLLM_CPU_CI_ENV", "0")
    with hf_runner(model) as hf_model:
        hf_outputs = hf_model.generate_greedy_logprobs_limit(
            example_prompts, max_tokens, num_logprobs