Unverified Commit e5ef4fa9 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Upgrade `transformers` to `v4.50.3` (#13905)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 037bcd94
...@@ -73,7 +73,7 @@ The Transformers fallback explicitly supports the following features: ...@@ -73,7 +73,7 @@ The Transformers fallback explicitly supports the following features:
- <project:#quantization-index> (except GGUF) - <project:#quantization-index> (except GGUF)
- <project:#lora-adapter> - <project:#lora-adapter>
- <project:#distributed-serving> (requires `transformers>=4.49.0`) - <project:#distributed-serving>
#### Remote code #### Remote code
......
...@@ -6,7 +6,7 @@ requests >= 2.26.0 ...@@ -6,7 +6,7 @@ requests >= 2.26.0
tqdm tqdm
blake3 blake3
py-cpuinfo py-cpuinfo
transformers >= 4.48.2 # Required for Bamba model and Transformers backend. transformers >= 4.50.3
tokenizers >= 0.19.1 # Required for Llama 3. tokenizers >= 0.19.1 # Required for Llama 3.
protobuf # Required by LlamaTokenizer. protobuf # Required by LlamaTokenizer.
fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint. fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
......
...@@ -30,7 +30,7 @@ matplotlib # required for qwen-vl test ...@@ -30,7 +30,7 @@ matplotlib # required for qwen-vl test
mistral_common[opencv] >= 1.5.4 # required for pixtral test mistral_common[opencv] >= 1.5.4 # required for pixtral test
datamodel_code_generator # required for minicpm3 test datamodel_code_generator # required for minicpm3 test
lm-eval[api]==0.4.4 # required for model evaluation test lm-eval[api]==0.4.4 # required for model evaluation test
transformers==4.48.2 transformers==4.50.3
# quantization # quantization
bitsandbytes>=0.45.3 bitsandbytes>=0.45.3
buildkite-test-collector==0.1.9 buildkite-test-collector==0.1.9
......
...@@ -643,7 +643,7 @@ tqdm==4.66.6 ...@@ -643,7 +643,7 @@ tqdm==4.66.6
# transformers # transformers
tqdm-multiprocess==0.0.11 tqdm-multiprocess==0.0.11
# via lm-eval # via lm-eval
transformers==4.48.2 transformers==4.50.3
# via # via
# -r requirements/test.in # -r requirements/test.in
# genai-perf # genai-perf
......
...@@ -245,7 +245,7 @@ TEST_MODELS = [ ...@@ -245,7 +245,7 @@ TEST_MODELS = [
# [LANGUAGE GENERATION] # [LANGUAGE GENERATION]
"microsoft/Phi-3.5-MoE-instruct", "microsoft/Phi-3.5-MoE-instruct",
"meta-llama/Llama-3.2-1B-Instruct", "meta-llama/Llama-3.2-1B-Instruct",
# "ArthurZ/Ilama-3.2-1B", NOTE: Uncomment after #13905 "ArthurZ/Ilama-3.2-1B",
"ibm/PowerLM-3b", "ibm/PowerLM-3b",
# [LANGUAGE EMBEDDING] # [LANGUAGE EMBEDDING]
"intfloat/e5-mistral-7b-instruct", "intfloat/e5-mistral-7b-instruct",
......
...@@ -8,9 +8,7 @@ from collections import defaultdict ...@@ -8,9 +8,7 @@ from collections import defaultdict
from pathlib import PosixPath from pathlib import PosixPath
import pytest import pytest
from packaging.version import Version
from transformers import AutoModelForImageTextToText, AutoModelForVision2Seq from transformers import AutoModelForImageTextToText, AutoModelForVision2Seq
from transformers import __version__ as TRANSFORMERS_VERSION
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import identity from vllm.utils import identity
...@@ -126,25 +124,6 @@ VLM_TEST_SETTINGS = { ...@@ -126,25 +124,6 @@ VLM_TEST_SETTINGS = {
dtype="bfloat16", dtype="bfloat16",
marks=[pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")], # noqa: E501 marks=[pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")], # noqa: E501
), ),
# TODO(ywang96): Move Qwen2-VL out of core models in favor of Qwen2.5-VL
# once we upgraded to transformers>=4.49.0.
"qwen2_vl": VLMTestInfo(
models=["Qwen/Qwen2-VL-2B-Instruct"],
test_type=(
VLMTestType.IMAGE,
VLMTestType.MULTI_IMAGE,
VLMTestType.VIDEO
),
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
max_model_len=4096,
max_num_seqs=2,
auto_cls=AutoModelForVision2Seq,
vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
),
"qwen2_5_vl": VLMTestInfo( "qwen2_5_vl": VLMTestInfo(
models=["Qwen/Qwen2.5-VL-3B-Instruct"], models=["Qwen/Qwen2.5-VL-3B-Instruct"],
test_type=( test_type=(
...@@ -218,12 +197,6 @@ VLM_TEST_SETTINGS = { ...@@ -218,12 +197,6 @@ VLM_TEST_SETTINGS = {
hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output, hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
stop_str=["<|end▁of▁sentence|>", "<|begin▁of▁sentence|>"], # noqa: E501 stop_str=["<|end▁of▁sentence|>", "<|begin▁of▁sentence|>"], # noqa: E501
image_size_factors=[(), (1.0, ), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)], image_size_factors=[(), (1.0, ), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
marks=[
pytest.mark.skipif(
Version(TRANSFORMERS_VERSION) >= Version("4.48"),
reason="HF model is not compatible with transformers>=4.48",
)
],
), ),
"fuyu": VLMTestInfo( "fuyu": VLMTestInfo(
models=["adept/fuyu-8b"], models=["adept/fuyu-8b"],
...@@ -336,6 +309,7 @@ VLM_TEST_SETTINGS = { ...@@ -336,6 +309,7 @@ VLM_TEST_SETTINGS = {
prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501 prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
num_video_frames=16, num_video_frames=16,
max_model_len=16384, max_model_len=16384,
hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"), # noqa: E501
auto_cls=AutoModelForVision2Seq, auto_cls=AutoModelForVision2Seq,
vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output, vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
custom_test_opts=[CustomTestOptions( custom_test_opts=[CustomTestOptions(
...@@ -365,12 +339,6 @@ VLM_TEST_SETTINGS = { ...@@ -365,12 +339,6 @@ VLM_TEST_SETTINGS = {
auto_cls=AutoModelForImageTextToText, auto_cls=AutoModelForImageTextToText,
vllm_output_post_proc=model_utils.mantis_vllm_to_hf_output, vllm_output_post_proc=model_utils.mantis_vllm_to_hf_output,
patch_hf_runner=model_utils.mantis_patch_hf_runner, patch_hf_runner=model_utils.mantis_patch_hf_runner,
marks=[
pytest.mark.skipif(
Version(TRANSFORMERS_VERSION) >= Version("4.48"),
reason="HF model is not compatible with transformers>=4.48",
)
],
), ),
"minicpmv_25": VLMTestInfo( "minicpmv_25": VLMTestInfo(
models=["openbmb/MiniCPM-Llama3-V-2_5"], models=["openbmb/MiniCPM-Llama3-V-2_5"],
...@@ -450,6 +418,23 @@ VLM_TEST_SETTINGS = { ...@@ -450,6 +418,23 @@ VLM_TEST_SETTINGS = {
vllm_output_post_proc=model_utils.qwen_vllm_to_hf_output, vllm_output_post_proc=model_utils.qwen_vllm_to_hf_output,
prompt_path_encoder=model_utils.qwen_prompt_path_encoder, prompt_path_encoder=model_utils.qwen_prompt_path_encoder,
), ),
"qwen2_vl": VLMTestInfo(
models=["Qwen/Qwen2-VL-2B-Instruct"],
test_type=(
VLMTestType.IMAGE,
VLMTestType.MULTI_IMAGE,
VLMTestType.VIDEO
),
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
max_model_len=4096,
max_num_seqs=2,
auto_cls=AutoModelForVision2Seq,
vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
marks=[pytest.mark.cpu_model],
),
"skywork_r1v": VLMTestInfo( "skywork_r1v": VLMTestInfo(
models=["Skywork/Skywork-R1V-38B"], models=["Skywork/Skywork-R1V-38B"],
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE), test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
...@@ -515,6 +500,7 @@ VLM_TEST_SETTINGS = { ...@@ -515,6 +500,7 @@ VLM_TEST_SETTINGS = {
max_model_len=16384, max_model_len=16384,
max_num_seqs=2, max_num_seqs=2,
auto_cls=AutoModelForVision2Seq, auto_cls=AutoModelForVision2Seq,
hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"), # noqa: E501
vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output, vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
custom_test_opts=[CustomTestOptions( custom_test_opts=[CustomTestOptions(
inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs( inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
......
...@@ -104,6 +104,13 @@ def _llava_vllm_to_hf_output(vllm_output: RunnerOutput, model: str, ...@@ -104,6 +104,13 @@ def _llava_vllm_to_hf_output(vllm_output: RunnerOutput, model: str,
return hf_output_ids, hf_output_str, out_logprobs return hf_output_ids, hf_output_str, out_logprobs
def llava_onevision_hf_model_kwargs(model: str) -> dict:
"""Workaround to fix the sliding window issue in llava_onevision."""
config = AutoConfig.from_pretrained(model)
config.text_config.sliding_window = None
return config.to_dict()
def llava_onevision_vllm_to_hf_output(vllm_output: RunnerOutput, def llava_onevision_vllm_to_hf_output(vllm_output: RunnerOutput,
model: str) -> RunnerOutput: model: str) -> RunnerOutput:
"""Sanitize vllm output [llava-onevision] to compare with hf output.""" """Sanitize vllm output [llava-onevision] to compare with hf output."""
......
...@@ -34,6 +34,16 @@ class _HfExamplesInfo: ...@@ -34,6 +34,16 @@ class _HfExamplesInfo:
The minimum version of HF Transformers that is required to run this model. The minimum version of HF Transformers that is required to run this model.
""" """
max_transformers_version: Optional[str] = None
"""
The maximum version of HF Transformers that this model runs on.
"""
transformers_version_reason: Optional[str] = None
"""
The reason for the minimum/maximum version requirement.
"""
is_available_online: bool = True is_available_online: bool = True
""" """
Set this to ``False`` if the name of this architecture no longer exists on Set this to ``False`` if the name of this architecture no longer exists on
...@@ -57,16 +67,23 @@ class _HfExamplesInfo: ...@@ -57,16 +67,23 @@ class _HfExamplesInfo:
If the installed transformers version does not meet the requirements, If the installed transformers version does not meet the requirements,
perform the given action. perform the given action.
""" """
if self.min_transformers_version is None: if (self.min_transformers_version is None
and self.max_transformers_version is None):
return return
current_version = TRANSFORMERS_VERSION current_version = TRANSFORMERS_VERSION
required_version = self.min_transformers_version min_version = self.min_transformers_version
if Version(current_version) < Version(required_version): max_version = self.max_transformers_version
msg = ( msg = f"`transformers=={current_version}` installed, but `transformers"
f"You have `transformers=={current_version}` installed, but " if min_version and Version(current_version) < Version(min_version):
f"`transformers>={required_version}` is required to run this " msg += f">={min_version}` is required to run this model."
"model") elif max_version and Version(current_version) > Version(max_version):
msg += f"<={max_version}` is required to run this model."
else:
return
if self.transformers_version_reason:
msg += f" Reason: {self.transformers_version_reason}"
if on_fail == "error": if on_fail == "error":
raise RuntimeError(msg) raise RuntimeError(msg)
...@@ -245,6 +262,9 @@ _MULTIMODAL_EXAMPLE_MODELS = { ...@@ -245,6 +262,9 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"Blip2ForConditionalGeneration": _HfExamplesInfo("Salesforce/blip2-opt-2.7b"), # noqa: E501 "Blip2ForConditionalGeneration": _HfExamplesInfo("Salesforce/blip2-opt-2.7b"), # noqa: E501
"ChameleonForConditionalGeneration": _HfExamplesInfo("facebook/chameleon-7b"), # noqa: E501 "ChameleonForConditionalGeneration": _HfExamplesInfo("facebook/chameleon-7b"), # noqa: E501
"DeepseekVLV2ForCausalLM": _HfExamplesInfo("deepseek-ai/deepseek-vl2-tiny", # noqa: E501 "DeepseekVLV2ForCausalLM": _HfExamplesInfo("deepseek-ai/deepseek-vl2-tiny", # noqa: E501
extras={"fork": "Isotr0py/deepseek-vl2-tiny"}, # noqa: E501
max_transformers_version="4.48", # noqa: E501
transformers_version_reason="HF model is not compatible.", # noqa: E501
hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]}), # noqa: E501 hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]}), # noqa: E501
"FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"), "FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"),
"Gemma3ForConditionalGeneration": _HfExamplesInfo("google/gemma-3-4b-it", "Gemma3ForConditionalGeneration": _HfExamplesInfo("google/gemma-3-4b-it",
...@@ -266,13 +286,19 @@ _MULTIMODAL_EXAMPLE_MODELS = { ...@@ -266,13 +286,19 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"LlavaNextVideoForConditionalGeneration": _HfExamplesInfo("llava-hf/LLaVA-NeXT-Video-7B-hf"), # noqa: E501 "LlavaNextVideoForConditionalGeneration": _HfExamplesInfo("llava-hf/LLaVA-NeXT-Video-7B-hf"), # noqa: E501
"LlavaOnevisionForConditionalGeneration": _HfExamplesInfo("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"), # noqa: E501 "LlavaOnevisionForConditionalGeneration": _HfExamplesInfo("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"), # noqa: E501
"MantisForConditionalGeneration": _HfExamplesInfo("TIGER-Lab/Mantis-8B-siglip-llama3", # noqa: E501 "MantisForConditionalGeneration": _HfExamplesInfo("TIGER-Lab/Mantis-8B-siglip-llama3", # noqa: E501
max_transformers_version="4.48", # noqa: E501
transformers_version_reason="HF model is not compatible.", # noqa: E501
hf_overrides={"architectures": ["MantisForConditionalGeneration"]}), # noqa: E501 hf_overrides={"architectures": ["MantisForConditionalGeneration"]}), # noqa: E501
"MiniCPMO": _HfExamplesInfo("openbmb/MiniCPM-o-2_6", "MiniCPMO": _HfExamplesInfo("openbmb/MiniCPM-o-2_6",
max_transformers_version="4.48",
transformers_version_reason="Use of deprecated imports which have been removed.", # noqa: E501
trust_remote_code=True), trust_remote_code=True),
"MiniCPMV": _HfExamplesInfo("openbmb/MiniCPM-Llama3-V-2_5", "MiniCPMV": _HfExamplesInfo("openbmb/MiniCPM-Llama3-V-2_5",
extras={"2.6": "openbmb/MiniCPM-V-2_6"}, # noqa: E501 extras={"2.6": "openbmb/MiniCPM-V-2_6"}, # noqa: E501
trust_remote_code=True), trust_remote_code=True),
"MolmoForCausalLM": _HfExamplesInfo("allenai/Molmo-7B-D-0924", "MolmoForCausalLM": _HfExamplesInfo("allenai/Molmo-7B-D-0924",
max_transformers_version="4.48",
transformers_version_reason="Use of private method which no longer exists.", # noqa: E501
extras={"olmo": "allenai/Molmo-7B-O-0924"}, # noqa: E501 extras={"olmo": "allenai/Molmo-7B-O-0924"}, # noqa: E501
trust_remote_code=True), trust_remote_code=True),
"NVLM_D": _HfExamplesInfo("nvidia/NVLM-D-72B", "NVLM_D": _HfExamplesInfo("nvidia/NVLM-D-72B",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment