Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4ecedd18
Unverified
Commit
4ecedd18
authored
Jul 23, 2025
by
Isotr0py
Committed by
GitHub
Jul 23, 2025
Browse files
[Bugfix] Fix nightly transformers CI failure (#21427)
Signed-off-by:
Isotr0py
<
2037008807@qq.com
>
parent
107111a8
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
67 additions
and
11 deletions
+67
-11
tests/models/registry.py
tests/models/registry.py
+6
-6
vllm/model_executor/models/tarsier.py
vllm/model_executor/models/tarsier.py
+1
-5
vllm/transformers_utils/config.py
vllm/transformers_utils/config.py
+2
-0
vllm/transformers_utils/configs/__init__.py
vllm/transformers_utils/configs/__init__.py
+2
-0
vllm/transformers_utils/configs/nemotron_vl.py
vllm/transformers_utils/configs/nemotron_vl.py
+56
-0
No files found.
tests/models/registry.py
View file @
4ecedd18
...
@@ -443,6 +443,12 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -443,6 +443,12 @@ _MULTIMODAL_EXAMPLE_MODELS = {
hf_overrides
=
{
"architectures"
:
[
"TarsierForConditionalGeneration"
]}),
# noqa: E501
hf_overrides
=
{
"architectures"
:
[
"TarsierForConditionalGeneration"
]}),
# noqa: E501
"Tarsier2ForConditionalGeneration"
:
_HfExamplesInfo
(
"omni-research/Tarsier2-Recap-7b"
,
# noqa: E501
"Tarsier2ForConditionalGeneration"
:
_HfExamplesInfo
(
"omni-research/Tarsier2-Recap-7b"
,
# noqa: E501
hf_overrides
=
{
"architectures"
:
[
"Tarsier2ForConditionalGeneration"
]}),
# noqa: E501
hf_overrides
=
{
"architectures"
:
[
"Tarsier2ForConditionalGeneration"
]}),
# noqa: E501
"VoxtralForConditionalGeneration"
:
_HfExamplesInfo
(
"mistralai/Voxtral-Mini-3B-2507"
,
min_transformers_version
=
"4.54"
,
# disable this temporarily until we support HF format
is_available_online
=
False
,
),
# [Encoder-decoder]
# [Encoder-decoder]
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
# Therefore, we borrow the BartTokenizer from the original Bart model
# Therefore, we borrow the BartTokenizer from the original Bart model
...
@@ -450,13 +456,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -450,13 +456,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
tokenizer
=
"Isotr0py/Florence-2-tokenizer"
,
# noqa: E501
tokenizer
=
"Isotr0py/Florence-2-tokenizer"
,
# noqa: E501
trust_remote_code
=
True
),
# noqa: E501
trust_remote_code
=
True
),
# noqa: E501
"MllamaForConditionalGeneration"
:
_HfExamplesInfo
(
"meta-llama/Llama-3.2-11B-Vision-Instruct"
),
# noqa: E501
"MllamaForConditionalGeneration"
:
_HfExamplesInfo
(
"meta-llama/Llama-3.2-11B-Vision-Instruct"
),
# noqa: E501
"VoxtralForConditionalGeneration"
:
_HfExamplesInfo
(
"mistralai/Voxtral-Mini-3B-2507"
,
tokenizer_mode
=
"mistral"
,
min_transformers_version
=
"4.54"
),
"WhisperForConditionalGeneration"
:
_HfExamplesInfo
(
"openai/whisper-large-v3"
),
# noqa: E501
"WhisperForConditionalGeneration"
:
_HfExamplesInfo
(
"openai/whisper-large-v3"
),
# noqa: E501
# [Cross-encoder]
# [Cross-encoder]
"JinaVLForRanking"
:
_HfExamplesInfo
(
"jinaai/jina-reranker-m0"
),
# noqa: E501
"JinaVLForRanking"
:
_HfExamplesInfo
(
"jinaai/jina-reranker-m0"
),
# noqa: E501
}
}
...
...
vllm/model_executor/models/tarsier.py
View file @
4ecedd18
...
@@ -13,8 +13,7 @@ from transformers import LlavaConfig as HfLlavaConfig
...
@@ -13,8 +13,7 @@ from transformers import LlavaConfig as HfLlavaConfig
from
transformers
import
PretrainedConfig
,
SiglipVisionConfig
from
transformers
import
PretrainedConfig
,
SiglipVisionConfig
from
transformers.image_utils
import
ImageInput
,
get_image_size
,
to_numpy_array
from
transformers.image_utils
import
ImageInput
,
get_image_size
,
to_numpy_array
from
transformers.models.llava
import
LlavaProcessor
from
transformers.models.llava
import
LlavaProcessor
from
transformers.processing_utils
import
(
ProcessingKwargs
,
Unpack
,
from
transformers.processing_utils
import
ProcessingKwargs
,
Unpack
_validate_images_text_input_order
)
from
transformers.tokenization_utils_base
import
PreTokenizedInput
,
TextInput
from
transformers.tokenization_utils_base
import
PreTokenizedInput
,
TextInput
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
...
@@ -94,9 +93,6 @@ class TarsierProcessor(LlavaProcessor):
...
@@ -94,9 +93,6 @@ class TarsierProcessor(LlavaProcessor):
raise
ValueError
(
raise
ValueError
(
"You have to specify at least one of `images` or `text`."
)
"You have to specify at least one of `images` or `text`."
)
# check if images and text inputs are reversed for BC
images
,
text
=
_validate_images_text_input_order
(
images
,
text
)
output_kwargs
=
self
.
_merge_kwargs
(
output_kwargs
=
self
.
_merge_kwargs
(
TarsierProcessorKwargs
,
TarsierProcessorKwargs
,
tokenizer_init_kwargs
=
self
.
tokenizer
.
init_kwargs
,
tokenizer_init_kwargs
=
self
.
tokenizer
.
init_kwargs
,
...
...
vllm/transformers_utils/config.py
View file @
4ecedd18
...
@@ -37,6 +37,7 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, Cohere2Config,
...
@@ -37,6 +37,7 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, Cohere2Config,
MiniMaxText01Config
,
MiniMaxText01Config
,
MiniMaxVL01Config
,
MllamaConfig
,
MiniMaxVL01Config
,
MllamaConfig
,
MLPSpeculatorConfig
,
MPTConfig
,
MLPSpeculatorConfig
,
MPTConfig
,
Nemotron_Nano_VL_Config
,
NemotronConfig
,
NVLM_D_Config
,
NemotronConfig
,
NVLM_D_Config
,
OvisConfig
,
RWConfig
,
OvisConfig
,
RWConfig
,
SkyworkR1VChatConfig
,
SolarConfig
,
SkyworkR1VChatConfig
,
SolarConfig
,
...
@@ -80,6 +81,7 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = {
...
@@ -80,6 +81,7 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = {
"dbrx"
:
DbrxConfig
,
"dbrx"
:
DbrxConfig
,
"deepseek_vl_v2"
:
DeepseekVLV2Config
,
"deepseek_vl_v2"
:
DeepseekVLV2Config
,
"kimi_vl"
:
KimiVLConfig
,
"kimi_vl"
:
KimiVLConfig
,
"Llama_Nemotron_Nano_VL"
:
Nemotron_Nano_VL_Config
,
"mpt"
:
MPTConfig
,
"mpt"
:
MPTConfig
,
"RefinedWeb"
:
RWConfig
,
# For tiiuae/falcon-40b(-instruct)
"RefinedWeb"
:
RWConfig
,
# For tiiuae/falcon-40b(-instruct)
"RefinedWebModel"
:
RWConfig
,
# For tiiuae/falcon-7b(-instruct)
"RefinedWebModel"
:
RWConfig
,
# For tiiuae/falcon-7b(-instruct)
...
...
vllm/transformers_utils/configs/__init__.py
View file @
4ecedd18
...
@@ -23,6 +23,7 @@ from vllm.transformers_utils.configs.moonvit import MoonViTConfig
...
@@ -23,6 +23,7 @@ from vllm.transformers_utils.configs.moonvit import MoonViTConfig
from
vllm.transformers_utils.configs.mpt
import
MPTConfig
from
vllm.transformers_utils.configs.mpt
import
MPTConfig
from
vllm.transformers_utils.configs.nemotron
import
NemotronConfig
from
vllm.transformers_utils.configs.nemotron
import
NemotronConfig
from
vllm.transformers_utils.configs.nemotron_h
import
NemotronHConfig
from
vllm.transformers_utils.configs.nemotron_h
import
NemotronHConfig
from
vllm.transformers_utils.configs.nemotron_vl
import
Nemotron_Nano_VL_Config
from
vllm.transformers_utils.configs.nvlm_d
import
NVLM_D_Config
from
vllm.transformers_utils.configs.nvlm_d
import
NVLM_D_Config
from
vllm.transformers_utils.configs.ovis
import
OvisConfig
from
vllm.transformers_utils.configs.ovis
import
OvisConfig
from
vllm.transformers_utils.configs.skyworkr1v
import
SkyworkR1VChatConfig
from
vllm.transformers_utils.configs.skyworkr1v
import
SkyworkR1VChatConfig
...
@@ -50,6 +51,7 @@ __all__ = [
...
@@ -50,6 +51,7 @@ __all__ = [
"KimiVLConfig"
,
"KimiVLConfig"
,
"NemotronConfig"
,
"NemotronConfig"
,
"NemotronHConfig"
,
"NemotronHConfig"
,
"Nemotron_Nano_VL_Config"
,
"NVLM_D_Config"
,
"NVLM_D_Config"
,
"OvisConfig"
,
"OvisConfig"
,
"SkyworkR1VChatConfig"
,
"SkyworkR1VChatConfig"
,
...
...
vllm/transformers_utils/configs/nemotron_vl.py
0 → 100644
View file @
4ecedd18
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# yapf: disable
# ruff: noqa: E501
# Adapted from
# https://huggingface.co/nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1/blob/main/configuration.py
# --------------------------------------------------------
# Adapted from https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B under MIT License
# LICENSE is in incl_licenses directory.
# --------------------------------------------------------
from
transformers
import
LlamaConfig
from
transformers.configuration_utils
import
PretrainedConfig
from
transformers.dynamic_module_utils
import
get_class_from_dynamic_module
class
Nemotron_Nano_VL_Config
(
PretrainedConfig
):
model_type
=
'Llama_Nemotron_Nano_VL'
is_composition
=
True
def
__init__
(
self
,
vision_config
=
None
,
llm_config
=
None
,
force_image_size
=
None
,
downsample_ratio
=
0.5
,
template
=
None
,
ps_version
=
'v1'
,
image_tag_type
=
"internvl"
,
projector_hidden_size
=
4096
,
vit_hidden_size
=
1280
,
**
kwargs
):
super
().
__init__
(
**
kwargs
)
if
vision_config
is
not
None
:
assert
"auto_map"
in
vision_config
and
"AutoConfig"
in
vision_config
[
"auto_map"
]
vision_auto_config
=
get_class_from_dynamic_module
(
*
vision_config
[
"auto_map"
][
"AutoConfig"
].
split
(
"--"
)[::
-
1
])
self
.
vision_config
=
vision_auto_config
(
**
vision_config
)
else
:
self
.
vision_config
=
PretrainedConfig
()
if
llm_config
is
None
:
self
.
text_config
=
LlamaConfig
()
else
:
self
.
text_config
=
LlamaConfig
(
**
llm_config
)
# Assign configuration values
self
.
force_image_size
=
force_image_size
self
.
downsample_ratio
=
downsample_ratio
self
.
template
=
template
# TODO move out of here and into the tokenizer
self
.
ps_version
=
ps_version
# Pixel shuffle version
self
.
image_tag_type
=
image_tag_type
# TODO: into the tokenizer too?
self
.
projector_hidden_size
=
projector_hidden_size
self
.
vit_hidden_size
=
vit_hidden_size
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment