Unverified Commit 03f8d3a5 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Update to transformers v5 (#30566)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
Signed-off-by: default avatarkhluu <khluu000@gmail.com>
Signed-off-by: default avatarKevin H. Luu <khluu000@gmail.com>
Signed-off-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: default avatarkhluu <khluu000@gmail.com>
Co-authored-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: default avatarjiang1.li <jiang1.li@intel.com>
parent 6dc94914
...@@ -16,5 +16,5 @@ echo "--- :docker: Building Docker image" ...@@ -16,5 +16,5 @@ echo "--- :docker: Building Docker image"
docker build --progress plain --tag "$IMAGE_NAME" --target vllm-test -f docker/Dockerfile.cpu . docker build --progress plain --tag "$IMAGE_NAME" --target vllm-test -f docker/Dockerfile.cpu .
# Run the image, setting --shm-size=4g for tensor parallel. # Run the image, setting --shm-size=4g for tensor parallel.
docker run --rm --cpuset-cpus="$CORE_RANGE" --cpuset-mems="$NUMA_NODE" -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN -e VLLM_CPU_KVCACHE_SPACE=16 -e VLLM_CPU_CI_ENV=1 -e VLLM_CPU_SIM_MULTI_NUMA=1 --shm-size=4g "$IMAGE_NAME" \ docker run --rm --cpuset-cpus="$CORE_RANGE" --cpuset-mems="$NUMA_NODE" -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN -e VLLM_CPU_KVCACHE_SPACE=16 -e VLLM_CPU_CI_ENV=1 -e VLLM_CPU_SIM_MULTI_NUMA=1 -e VLLM_CPU_ATTN_SPLIT_KV=0 --shm-size=4g "$IMAGE_NAME" \
timeout "$TIMEOUT_VAL" bash -c "set -euox pipefail; echo \"--- Print packages\"; pip list; echo \"--- Running tests\"; ${TEST_COMMAND}" timeout "$TIMEOUT_VAL" bash -c "set -euox pipefail; echo \"--- Print packages\"; pip list; echo \"--- Running tests\"; ${TEST_COMMAND}"
...@@ -4,7 +4,6 @@ depends_on: ...@@ -4,7 +4,6 @@ depends_on:
steps: steps:
- label: Basic Models Tests (Initialization) - label: Basic Models Tests (Initialization)
timeout_in_minutes: 45 timeout_in_minutes: 45
device: h200_18gb
torch_nightly: true torch_nightly: true
source_file_dependencies: source_file_dependencies:
- vllm/ - vllm/
...@@ -73,3 +72,18 @@ steps: ...@@ -73,3 +72,18 @@ steps:
- python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
# Whisper needs spawn method to avoid deadlock # Whisper needs spawn method to avoid deadlock
- VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
- label: Transformers Backward Compatibility Models Test
working_dir: "/vllm-workspace/"
optional: true
soft_fail: true
commands:
- pip install transformers==4.57.5
- pytest -v -s tests/models/test_initialization.py
- pytest -v -s tests/models/test_transformers.py
- pytest -v -s tests/models/multimodal/processing/
- pytest -v -s tests/models/multimodal/test_mapping.py
- python3 examples/offline_inference/basic/chat.py
- python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
# Whisper needs spawn method to avoid deadlock
- VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
...@@ -642,7 +642,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ...@@ -642,7 +642,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
else \ else \
BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \ BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \
fi; \ fi; \
uv pip install --system accelerate hf_transfer modelscope \ uv pip install --system accelerate modelscope \
"bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs,azure]${RUNAI_MODEL_STREAMER_VERSION}" "bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs,azure]${RUNAI_MODEL_STREAMER_VERSION}"
# ============================================================ # ============================================================
...@@ -756,9 +756,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ...@@ -756,9 +756,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -e tests/vllm_test_utils uv pip install --system -e tests/vllm_test_utils
# enable fast downloads from hf (for testing) # enable fast downloads from hf (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \ ENV HF_XET_HIGH_PERFORMANCE 1
uv pip install --system hf_transfer
ENV HF_HUB_ENABLE_HF_TRANSFER 1 # increase timeout for hf downloads (for testing)
ENV HF_HUB_DOWNLOAD_TIMEOUT 60
# Copy in the v1 package for testing (it isn't distributed yet) # Copy in the v1 package for testing (it isn't distributed yet)
COPY vllm/v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1 COPY vllm/v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1
......
...@@ -197,6 +197,12 @@ ADD ./.buildkite/ ./.buildkite/ ...@@ -197,6 +197,12 @@ ADD ./.buildkite/ ./.buildkite/
RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install -e tests/vllm_test_utils uv pip install -e tests/vllm_test_utils
# enable fast downloads from hf (for testing)
ENV HF_XET_HIGH_PERFORMANCE 1
# increase timeout for hf downloads (for testing)
ENV HF_HUB_DOWNLOAD_TIMEOUT 60
######################### RELEASE IMAGE ######################### ######################### RELEASE IMAGE #########################
FROM base AS vllm-openai FROM base AS vllm-openai
......
...@@ -272,9 +272,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ...@@ -272,9 +272,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -e tests/vllm_test_utils uv pip install --system -e tests/vllm_test_utils
# enable fast downloads from hf (for testing) # enable fast downloads from hf (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \ ENV HF_XET_HIGH_PERFORMANCE 1
uv pip install --system hf_transfer
ENV HF_HUB_ENABLE_HF_TRANSFER 1 # increase timeout for hf downloads (for testing)
ENV HF_HUB_DOWNLOAD_TIMEOUT 60
RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -r requirements/test/nightly-torch.txt uv pip install --system -r requirements/test/nightly-torch.txt
......
...@@ -365,9 +365,10 @@ RUN cd /vllm-workspace \ ...@@ -365,9 +365,10 @@ RUN cd /vllm-workspace \
&& python3 -m pip install pytest-shard && python3 -m pip install pytest-shard
# enable fast downloads from hf (for testing) # enable fast downloads from hf (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \ ENV HF_XET_HIGH_PERFORMANCE=1
uv pip install --system hf_transfer
ENV HF_HUB_ENABLE_HF_TRANSFER=1 # increase timeout for hf downloads (for testing)
ENV HF_HUB_DOWNLOAD_TIMEOUT 60
# install audio decode package `torchcodec` from source (required due to # install audio decode package `torchcodec` from source (required due to
# ROCm and torch version mismatch) for tests with datasets package # ROCm and torch version mismatch) for tests with datasets package
......
...@@ -240,7 +240,7 @@ uv pip install vllm==${VLLM_VERSION} \ ...@@ -240,7 +240,7 @@ uv pip install vllm==${VLLM_VERSION} \
# Install dependencies # Install dependencies
pip install --upgrade numba \ pip install --upgrade numba \
scipy \ scipy \
huggingface-hub[cli,hf_transfer] \ huggingface-hub[cli] \
setuptools_scm setuptools_scm
pip install -r requirements/rocm.txt pip install -r requirements/rocm.txt
......
...@@ -7,7 +7,7 @@ requests >= 2.26.0 ...@@ -7,7 +7,7 @@ requests >= 2.26.0
tqdm tqdm
blake3 blake3
py-cpuinfo py-cpuinfo
transformers >= 4.56.0, < 5 transformers >= 4.56.0, != 5.0.*, != 5.1.*, != 5.2.*, != 5.3.*, != 5.4.*, != 5.5.0
tokenizers >= 0.21.1 # Required for fast incremental detokenization. tokenizers >= 0.21.1 # Required for fast incremental detokenization.
protobuf >= 5.29.6, !=6.30.*, !=6.31.*, !=6.32.*, !=6.33.0.*, !=6.33.1.*, !=6.33.2.*, !=6.33.3.*, !=6.33.4.* # Required by LlamaTokenizer, gRPC. CVE-2026-0994 protobuf >= 5.29.6, !=6.30.*, !=6.31.*, !=6.32.*, !=6.33.0.*, !=6.33.1.*, !=6.33.2.*, !=6.33.3.*, !=6.33.4.* # Required by LlamaTokenizer, gRPC. CVE-2026-0994
fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint. fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
...@@ -37,7 +37,7 @@ pyyaml ...@@ -37,7 +37,7 @@ pyyaml
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12 six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
setuptools>=77.0.3,<81.0.0; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12 setuptools>=77.0.3,<81.0.0; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
einops # Required for Qwen2-VL. einops # Required for Qwen2-VL.
compressed-tensors == 0.14.0.1 # required for compressed-tensors compressed-tensors == 0.15.0.1 # required for compressed-tensors
depyf==0.20.0 # required for profiling and debugging with compilation config depyf==0.20.0 # required for profiling and debugging with compilation config
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
watchfiles # required for http server to monitor the updates of TLS files watchfiles # required for http server to monitor the updates of TLS files
......
...@@ -18,7 +18,7 @@ httpx ...@@ -18,7 +18,7 @@ httpx
librosa # required for audio tests librosa # required for audio tests
vector_quantize_pytorch # required for minicpmo_26 test vector_quantize_pytorch # required for minicpmo_26 test
vocos # required for minicpmo_26 test vocos # required for minicpmo_26 test
peft>=0.15.0 # required for phi-4-mm test peft>=0.18.1 # required for phi-4-mm test
pqdm pqdm
ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests
resampy # required for audio tests resampy # required for audio tests
...@@ -39,8 +39,8 @@ opencv-python-headless >= 4.13.0 # required for video test ...@@ -39,8 +39,8 @@ opencv-python-headless >= 4.13.0 # required for video test
datamodel_code_generator # required for minicpm3 test datamodel_code_generator # required for minicpm3 test
lm-eval[api]>=0.4.11 # required for model evaluation test lm-eval[api]>=0.4.11 # required for model evaluation test
mteb[bm25s]>=2, <3 # required for mteb test mteb[bm25s]>=2, <3 # required for mteb test
transformers==4.57.5 transformers==5.5.3
tokenizers==0.22.0 tokenizers==0.22.2
schemathesis>=3.39.15 # Required for openai schema test. schemathesis>=3.39.15 # Required for openai schema test.
# quantization # quantization
bitsandbytes==0.49.2 bitsandbytes==0.49.2
......
...@@ -4,7 +4,7 @@ absl-py==2.1.0 ...@@ -4,7 +4,7 @@ absl-py==2.1.0
# via # via
# rouge-score # rouge-score
# tensorboard # tensorboard
accelerate==1.0.1 accelerate==1.13.0
# via peft # via peft
aenum==3.1.16 aenum==3.1.16
# via lightly # via lightly
...@@ -248,7 +248,6 @@ filelock==3.16.1 ...@@ -248,7 +248,6 @@ filelock==3.16.1
# huggingface-hub # huggingface-hub
# ray # ray
# torch # torch
# transformers
# virtualenv # virtualenv
fiona==1.10.1 fiona==1.10.1
# via torchgeo # via torchgeo
...@@ -331,7 +330,7 @@ h5py==3.13.0 ...@@ -331,7 +330,7 @@ h5py==3.13.0
# via terratorch # via terratorch
harfile==0.3.0 harfile==0.3.0
# via schemathesis # via schemathesis
hf-xet==1.1.7 hf-xet==1.4.3
# via huggingface-hub # via huggingface-hub
hiredis==3.0.0 hiredis==3.0.0
# via tensorizer # via tensorizer
...@@ -345,9 +344,10 @@ httpx==0.27.2 ...@@ -345,9 +344,10 @@ httpx==0.27.2
# via # via
# -r requirements/test/cuda.in # -r requirements/test/cuda.in
# diffusers # diffusers
# huggingface-hub
# perceptron # perceptron
# schemathesis # schemathesis
huggingface-hub==0.36.2 huggingface-hub==1.10.2
# via # via
# accelerate # accelerate
# datasets # datasets
...@@ -756,7 +756,7 @@ pathvalidate==3.2.1 ...@@ -756,7 +756,7 @@ pathvalidate==3.2.1
# via pytablewriter # via pytablewriter
patsy==1.0.1 patsy==1.0.1
# via statsmodels # via statsmodels
peft==0.16.0 peft==0.18.1
# via -r requirements/test/cuda.in # via -r requirements/test/cuda.in
perceptron==0.1.4 perceptron==0.1.4
# via -r requirements/test/cuda.in # via -r requirements/test/cuda.in
...@@ -982,7 +982,7 @@ referencing==0.35.1 ...@@ -982,7 +982,7 @@ referencing==0.35.1
# via # via
# jsonschema # jsonschema
# jsonschema-specifications # jsonschema-specifications
regex==2024.9.11 regex==2026.2.28
# via # via
# diffusers # diffusers
# nltk # nltk
...@@ -1002,7 +1002,6 @@ requests==2.32.3 ...@@ -1002,7 +1002,6 @@ requests==2.32.3
# google-api-core # google-api-core
# google-cloud-storage # google-cloud-storage
# gpt-oss # gpt-oss
# huggingface-hub
# lightly # lightly
# lm-eval # lm-eval
# mistral-common # mistral-common
...@@ -1015,7 +1014,6 @@ requests==2.32.3 ...@@ -1015,7 +1014,6 @@ requests==2.32.3
# starlette-testclient # starlette-testclient
# tacoreader # tacoreader
# tiktoken # tiktoken
# transformers
# wandb # wandb
resampy==0.4.3 resampy==0.4.3
# via -r requirements/test/cuda.in # via -r requirements/test/cuda.in
...@@ -1216,7 +1214,7 @@ timm==1.0.17 ...@@ -1216,7 +1214,7 @@ timm==1.0.17
# segmentation-models-pytorch # segmentation-models-pytorch
# terratorch # terratorch
# torchgeo # torchgeo
tokenizers==0.22.0 tokenizers==0.22.2
# via # via
# -c requirements/common.txt # -c requirements/common.txt
# -r requirements/test/cuda.in # -r requirements/test/cuda.in
...@@ -1295,7 +1293,7 @@ tqdm==4.67.3 ...@@ -1295,7 +1293,7 @@ tqdm==4.67.3
# tacoreader # tacoreader
# terratorch # terratorch
# transformers # transformers
transformers==4.57.5 transformers==5.5.3
# via # via
# -c requirements/common.txt # -c requirements/common.txt
# -r requirements/test/cuda.in # -r requirements/test/cuda.in
...@@ -1317,7 +1315,9 @@ typepy==1.3.2 ...@@ -1317,7 +1315,9 @@ typepy==1.3.2
typer==0.15.2 typer==0.15.2
# via # via
# fastsafetensors # fastsafetensors
# huggingface-hub
# perceptron # perceptron
# transformers
types-python-dateutil==2.9.0.20241206 types-python-dateutil==2.9.0.20241206
# via arrow # via arrow
typeshed-client==2.8.2 typeshed-client==2.8.2
......
...@@ -29,8 +29,8 @@ opencv-python-headless >= 4.13.0 # required for video test ...@@ -29,8 +29,8 @@ opencv-python-headless >= 4.13.0 # required for video test
datamodel_code_generator # required for minicpm3 test datamodel_code_generator # required for minicpm3 test
lm-eval[api]>=0.4.11 # required for model evaluation test lm-eval[api]>=0.4.11 # required for model evaluation test
mteb[bm25s]>=2, <3 # required for mteb test mteb[bm25s]>=2, <3 # required for mteb test
transformers==4.57.5 transformers==5.5.3
tokenizers==0.22.0 tokenizers==0.22.2
schemathesis>=3.39.15 # Required for openai schema test. schemathesis>=3.39.15 # Required for openai schema test.
# quantization # quantization
bitsandbytes>=0.49.2 bitsandbytes>=0.49.2
......
...@@ -38,8 +38,8 @@ opencv-python-headless>=4.13.0 # required for video test ...@@ -38,8 +38,8 @@ opencv-python-headless>=4.13.0 # required for video test
datamodel_code_generator # required for minicpm3 test datamodel_code_generator # required for minicpm3 test
lm-eval[api]>=0.4.11 # required for model evaluation test lm-eval[api]>=0.4.11 # required for model evaluation test
mteb[bm25s]>=2, <3 # required for mteb test mteb[bm25s]>=2, <3 # required for mteb test
transformers==4.57.5 transformers==5.5.3
tokenizers==0.22.0 tokenizers==0.22.2
schemathesis>=3.39.15 # Required for openai schema test schemathesis>=3.39.15 # Required for openai schema test
# quantization # quantization
bitsandbytes==0.49.2 bitsandbytes==0.49.2
...@@ -82,4 +82,3 @@ plotly # required for perf comparison html report ...@@ -82,4 +82,3 @@ plotly # required for perf comparison html report
rapidfuzz rapidfuzz
torchgeo==0.7.0 torchgeo==0.7.0
multiprocess==0.70.16 multiprocess==0.70.16
huggingface-hub==0.36.2
...@@ -39,7 +39,7 @@ annotated-doc==0.0.4 ...@@ -39,7 +39,7 @@ annotated-doc==0.0.4
# typer # typer
annotated-types==0.7.0 annotated-types==0.7.0
# via pydantic # via pydantic
anthropic==0.89.0 anthropic==0.93.0
# via # via
# -c requirements/common.txt # -c requirements/common.txt
# -r requirements/test/../common.txt # -r requirements/test/../common.txt
...@@ -172,7 +172,7 @@ colorful==0.5.8 ...@@ -172,7 +172,7 @@ colorful==0.5.8
# via ray # via ray
colorlog==6.10.1 colorlog==6.10.1
# via optuna # via optuna
compressed-tensors==0.14.0.1 compressed-tensors==0.15.0.1
# via # via
# -c requirements/common.txt # -c requirements/common.txt
# -r requirements/test/../common.txt # -r requirements/test/../common.txt
...@@ -269,9 +269,9 @@ fastapi==0.135.2 ...@@ -269,9 +269,9 @@ fastapi==0.135.2
# model-hosting-container-standards # model-hosting-container-standards
fastapi-cli==0.0.24 fastapi-cli==0.0.24
# via fastapi # via fastapi
fastapi-cloud-cli==0.15.1 fastapi-cloud-cli==0.16.1
# via fastapi-cli # via fastapi-cli
fastar==0.9.0 fastar==0.10.0
# via fastapi-cloud-cli # via fastapi-cloud-cli
fastparquet==2026.3.0 fastparquet==2026.3.0
# via genai-perf # via genai-perf
...@@ -290,7 +290,6 @@ filelock==3.25.2 ...@@ -290,7 +290,6 @@ filelock==3.25.2
# python-discovery # python-discovery
# ray # ray
# torch # torch
# transformers
# virtualenv # virtualenv
fiona==1.10.1 fiona==1.10.1
# via torchgeo # via torchgeo
...@@ -384,7 +383,7 @@ h5py==3.16.0 ...@@ -384,7 +383,7 @@ h5py==3.16.0
# via terratorch # via terratorch
harfile==0.4.0 harfile==0.4.0
# via schemathesis # via schemathesis
hf-xet==1.4.2 hf-xet==1.4.3
# via huggingface-hub # via huggingface-hub
hiredis==3.3.1 hiredis==3.3.1
# via tensorizer # via tensorizer
...@@ -403,6 +402,7 @@ httpx==0.27.2 ...@@ -403,6 +402,7 @@ httpx==0.27.2
# diffusers # diffusers
# fastapi # fastapi
# fastapi-cloud-cli # fastapi-cloud-cli
# huggingface-hub
# mcp # mcp
# model-hosting-container-standards # model-hosting-container-standards
# openai # openai
...@@ -410,9 +410,8 @@ httpx==0.27.2 ...@@ -410,9 +410,8 @@ httpx==0.27.2
# schemathesis # schemathesis
httpx-sse==0.4.3 httpx-sse==0.4.3
# via mcp # via mcp
huggingface-hub==0.36.2 huggingface-hub==1.10.2
# via # via
# -r requirements/test/rocm.in
# accelerate # accelerate
# datasets # datasets
# diffusers # diffusers
...@@ -484,7 +483,7 @@ jinja2==3.1.6 ...@@ -484,7 +483,7 @@ jinja2==3.1.6
# genai-perf # genai-perf
# lm-eval # lm-eval
# torch # torch
jiter==0.13.0 jiter==0.14.0
# via # via
# anthropic # anthropic
# openai # openai
...@@ -631,7 +630,7 @@ msgpack==1.1.2 ...@@ -631,7 +630,7 @@ msgpack==1.1.2
# via # via
# librosa # librosa
# ray # ray
msgspec==0.20.0 msgspec==0.21.0
# via -r requirements/test/../common.txt # via -r requirements/test/../common.txt
mteb==2.11.5 mteb==2.11.5
# via -r requirements/test/rocm.in # via -r requirements/test/rocm.in
...@@ -742,7 +741,7 @@ omegaconf==2.3.0 ...@@ -742,7 +741,7 @@ omegaconf==2.3.0
# lightning # lightning
open-clip-torch==2.32.0 open-clip-torch==2.32.0
# via -r requirements/test/rocm.in # via -r requirements/test/rocm.in
openai==2.30.0 openai==2.31.0
# via # via
# -c requirements/common.txt # -c requirements/common.txt
# -r requirements/test/../common.txt # -r requirements/test/../common.txt
...@@ -1093,7 +1092,7 @@ python-dotenv==1.2.2 ...@@ -1093,7 +1092,7 @@ python-dotenv==1.2.2
# uvicorn # uvicorn
python-json-logger==4.1.0 python-json-logger==4.1.0
# via -r requirements/test/../common.txt # via -r requirements/test/../common.txt
python-multipart==0.0.22 python-multipart==0.0.26
# via # via
# fastapi # fastapi
# mcp # mcp
...@@ -1180,7 +1179,6 @@ requests==2.32.5 ...@@ -1180,7 +1179,6 @@ requests==2.32.5
# google-api-core # google-api-core
# google-cloud-storage # google-cloud-storage
# gpt-oss # gpt-oss
# huggingface-hub
# lightly # lightly
# lm-eval # lm-eval
# mistral-common # mistral-common
...@@ -1194,7 +1192,6 @@ requests==2.32.5 ...@@ -1194,7 +1192,6 @@ requests==2.32.5
# starlette-testclient # starlette-testclient
# tacoreader # tacoreader
# tiktoken # tiktoken
# transformers
# wandb # wandb
resampy==0.4.3 resampy==0.4.3
# via -r requirements/test/rocm.in # via -r requirements/test/rocm.in
...@@ -1428,7 +1425,7 @@ timm==1.0.17 ...@@ -1428,7 +1425,7 @@ timm==1.0.17
# segmentation-models-pytorch # segmentation-models-pytorch
# terratorch # terratorch
# torchgeo # torchgeo
tokenizers==0.22.0 tokenizers==0.22.2
# via # via
# -c requirements/common.txt # -c requirements/common.txt
# -r requirements/test/../common.txt # -r requirements/test/../common.txt
...@@ -1471,7 +1468,7 @@ tqdm==4.67.3 ...@@ -1471,7 +1468,7 @@ tqdm==4.67.3
# tacoreader # tacoreader
# terratorch # terratorch
# transformers # transformers
transformers==4.57.5 transformers==5.5.3
# via # via
# -c requirements/common.txt # -c requirements/common.txt
# -r requirements/test/../common.txt # -r requirements/test/../common.txt
...@@ -1498,7 +1495,9 @@ typer==0.24.1 ...@@ -1498,7 +1495,9 @@ typer==0.24.1
# fastapi-cli # fastapi-cli
# fastapi-cloud-cli # fastapi-cloud-cli
# fastsafetensors # fastsafetensors
# huggingface-hub
# perceptron # perceptron
# transformers
typeshed-client==2.9.0 typeshed-client==2.9.0
# via jsonargparse # via jsonargparse
typing-extensions==4.15.0 typing-extensions==4.15.0
......
...@@ -13,7 +13,6 @@ pytest-shard ...@@ -13,7 +13,6 @@ pytest-shard
absl-py absl-py
accelerate accelerate
arctic-inference arctic-inference
hf_transfer
lm_eval[api] lm_eval[api]
modelscope modelscope
......
...@@ -19,7 +19,9 @@ aiosignal==1.4.0 ...@@ -19,7 +19,9 @@ aiosignal==1.4.0
albumentations==1.4.6 albumentations==1.4.6
# via -r requirements/test/xpu.in # via -r requirements/test/xpu.in
annotated-doc==0.0.4 annotated-doc==0.0.4
# via fastapi # via
# fastapi
# typer
annotated-types==0.7.0 annotated-types==0.7.0
# via pydantic # via pydantic
anyio==4.13.0 anyio==4.13.0
...@@ -64,6 +66,7 @@ click==8.3.1 ...@@ -64,6 +66,7 @@ click==8.3.1
# jiwer # jiwer
# nltk # nltk
# schemathesis # schemathesis
# typer
# uvicorn # uvicorn
colorama==0.4.6 colorama==0.4.6
# via sacrebleu # via sacrebleu
...@@ -112,7 +115,6 @@ filelock==3.25.2 ...@@ -112,7 +115,6 @@ filelock==3.25.2
# huggingface-hub # huggingface-hub
# modelscope # modelscope
# torch # torch
# transformers
frozenlist==1.8.0 frozenlist==1.8.0
# via # via
# aiohttp # aiohttp
...@@ -133,9 +135,7 @@ h11==0.16.0 ...@@ -133,9 +135,7 @@ h11==0.16.0
# uvicorn # uvicorn
harfile==0.4.0 harfile==0.4.0
# via schemathesis # via schemathesis
hf-transfer==0.1.9 hf-xet==1.4.3
# via -r requirements/test/xpu.in
hf-xet==1.4.2
# via huggingface-hub # via huggingface-hub
html2text==2025.4.15 html2text==2025.4.15
# via gpt-oss # via gpt-oss
...@@ -144,8 +144,9 @@ httpcore==1.0.9 ...@@ -144,8 +144,9 @@ httpcore==1.0.9
httpx==0.28.1 httpx==0.28.1
# via # via
# datasets # datasets
# huggingface-hub
# schemathesis # schemathesis
huggingface-hub==0.36.2 huggingface-hub==1.10.2
# via # via
# accelerate # accelerate
# datasets # datasets
...@@ -515,7 +516,6 @@ requests==2.33.1 ...@@ -515,7 +516,6 @@ requests==2.33.1
# docker # docker
# evaluate # evaluate
# gpt-oss # gpt-oss
# huggingface-hub
# lm-eval # lm-eval
# mistral-common # mistral-common
# modelscope # modelscope
...@@ -524,11 +524,11 @@ requests==2.33.1 ...@@ -524,11 +524,11 @@ requests==2.33.1
# schemathesis # schemathesis
# starlette-testclient # starlette-testclient
# tiktoken # tiktoken
# transformers
rich==14.3.3 rich==14.3.3
# via # via
# mteb # mteb
# schemathesis # schemathesis
# typer
rouge-score==0.1.2 rouge-score==0.1.2
# via lm-eval # via lm-eval
rpds-py==0.30.0 rpds-py==0.30.0
...@@ -572,6 +572,8 @@ setuptools==80.10.2 ...@@ -572,6 +572,8 @@ setuptools==80.10.2
# modelscope # modelscope
# pytablewriter # pytablewriter
# torch # torch
shellingham==1.5.4
# via typer
six==1.17.0 six==1.17.0
# via # via
# -c requirements/common.txt # -c requirements/common.txt
...@@ -665,7 +667,7 @@ tqdm==4.67.3 ...@@ -665,7 +667,7 @@ tqdm==4.67.3
# pqdm # pqdm
# sentence-transformers # sentence-transformers
# transformers # transformers
transformers==4.57.6 transformers==5.5.3
# via # via
# -c requirements/common.txt # -c requirements/common.txt
# sentence-transformers # sentence-transformers
...@@ -676,6 +678,10 @@ typepy==1.3.4 ...@@ -676,6 +678,10 @@ typepy==1.3.4
# dataproperty # dataproperty
# pytablewriter # pytablewriter
# tabledata # tabledata
typer==0.24.1
# via
# huggingface-hub
# transformers
typing-extensions==4.15.0 typing-extensions==4.15.0
# via # via
# -c requirements/common.txt # -c requirements/common.txt
......
...@@ -410,6 +410,15 @@ class HfRunner: ...@@ -410,6 +410,15 @@ class HfRunner:
model_name, model_name,
trust_remote_code=trust_remote_code, trust_remote_code=trust_remote_code,
) )
# HF runner should use the HF config so that it's consistent with the HF model
if self.config.__module__.startswith("vllm.transformers_utils.configs"):
from transformers.models.auto.configuration_auto import CONFIG_MAPPING
del CONFIG_MAPPING._extra_content[self.config.model_type]
self.config = AutoConfig.from_pretrained(
model_name,
trust_remote_code=trust_remote_code,
)
self.device = self.get_default_device() self.device = self.get_default_device()
self.dtype = dtype = _get_and_verify_dtype( self.dtype = dtype = _get_and_verify_dtype(
self.model_name, self.model_name,
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import tempfile import tempfile
from collections import OrderedDict from collections import OrderedDict
from importlib import reload
from unittest.mock import MagicMock from unittest.mock import MagicMock
import pytest import pytest
...@@ -47,6 +48,11 @@ def cleanup_fixture(should_do_global_cleanup_after_test: bool): ...@@ -47,6 +48,11 @@ def cleanup_fixture(should_do_global_cleanup_after_test: bool):
def maybe_enable_lora_dual_stream(monkeypatch: pytest.MonkeyPatch): def maybe_enable_lora_dual_stream(monkeypatch: pytest.MonkeyPatch):
if current_platform.is_cuda(): if current_platform.is_cuda():
monkeypatch.setenv("VLLM_LORA_ENABLE_DUAL_STREAM", "1") monkeypatch.setenv("VLLM_LORA_ENABLE_DUAL_STREAM", "1")
import vllm.lora.layers.base_linear
if not hasattr(vllm.lora.layers.base_linear, "lora_linear_async"):
# Reload the module to ensure the environment variable takes effect.
reload(vllm.lora.layers.base_linear)
yield yield
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from importlib.metadata import version
import pytest import pytest
from packaging.version import Version
import vllm import vllm
from vllm.assets.image import ImageAsset from vllm.assets.image import ImageAsset
...@@ -10,6 +13,14 @@ from vllm.platforms import current_platform ...@@ -10,6 +13,14 @@ from vllm.platforms import current_platform
from ..utils import multi_gpu_test from ..utils import multi_gpu_test
pytestmark = pytest.mark.skipif(
Version("5.0") <= Version(version("transformers")),
reason=(
"MiniCPMV custom processor uses tokenizer.im_start_id which is not "
"available on TokenizersBackend in transformers v5.0+"
),
)
MODEL_PATH = "openbmb/MiniCPM-Llama3-V-2_5" MODEL_PATH = "openbmb/MiniCPM-Llama3-V-2_5"
PROMPT_TEMPLATE = ( PROMPT_TEMPLATE = (
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import os
import tempfile import tempfile
import huggingface_hub.constants import huggingface_hub.constants
...@@ -10,26 +9,10 @@ from huggingface_hub.utils import LocalEntryNotFoundError ...@@ -10,26 +9,10 @@ from huggingface_hub.utils import LocalEntryNotFoundError
from vllm.model_executor.model_loader.weight_utils import ( from vllm.model_executor.model_loader.weight_utils import (
download_weights_from_hf, download_weights_from_hf,
enable_hf_transfer,
maybe_remap_kv_scale_name, maybe_remap_kv_scale_name,
) )
def test_hf_transfer_auto_activation():
if "HF_HUB_ENABLE_HF_TRANSFER" in os.environ:
# in case it is already set, we can't test the auto activation
pytest.skip("HF_HUB_ENABLE_HF_TRANSFER is set, can't test auto activation")
enable_hf_transfer()
try:
# enable hf hub transfer if available
import hf_transfer # type: ignore # noqa
HF_TRANSFER_ACTIVE = True
except ImportError:
HF_TRANSFER_ACTIVE = False
assert huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER == HF_TRANSFER_ACTIVE
def test_download_weights_from_hf(): def test_download_weights_from_hf():
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
# assert LocalEntryNotFoundError error is thrown # assert LocalEntryNotFoundError error is thrown
...@@ -178,5 +161,4 @@ class TestMaybeRemapKvScaleName: ...@@ -178,5 +161,4 @@ class TestMaybeRemapKvScaleName:
if __name__ == "__main__": if __name__ == "__main__":
test_hf_transfer_auto_activation()
test_download_weights_from_hf() test_download_weights_from_hf()
...@@ -143,6 +143,11 @@ def test_models( ...@@ -143,6 +143,11 @@ def test_models(
# in parts of the operators # in parts of the operators
pytest.skip(f"Skipping '{model}' model test with AITER kernel.") pytest.skip(f"Skipping '{model}' model test with AITER kernel.")
if current_platform.is_cpu() and model == "TitanML/tiny-mixtral":
# This untrained model is sensitive to the rounding error
# Fuse ops to reduce bfloat16 rounding
monkeypatch.setenv("VLLM_CPU_CI_ENV", "0")
with hf_runner(model) as hf_model: with hf_runner(model) as hf_model:
hf_outputs = hf_model.generate_greedy_logprobs_limit( hf_outputs = hf_model.generate_greedy_logprobs_limit(
example_prompts, max_tokens, num_logprobs example_prompts, max_tokens, num_logprobs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment