Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
03858e6d
Unverified
Commit
03858e6d
authored
Sep 25, 2025
by
Isotr0py
Committed by
GitHub
Sep 25, 2025
Browse files
[Bugfix] Fix InternS1 video processing after Transformers v4.56 (#25644)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
532a6cfc
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
68 additions
and
3 deletions
+68
-3
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+2
-1
tests/models/multimodal/processing/test_common.py
tests/models/multimodal/processing/test_common.py
+1
-0
vllm/model_executor/models/interns1.py
vllm/model_executor/models/interns1.py
+10
-1
vllm/transformers_utils/processor.py
vllm/transformers_utils/processor.py
+55
-1
No files found.
.buildkite/test-pipeline.yaml
View file @
03858e6d
...
...
@@ -770,8 +770,9 @@ steps:
-
pytest -v -s tests/models/multimodal/processing/
-
pytest -v -s tests/models/multimodal/test_mapping.py
-
python3 examples/offline_inference/basic/chat.py
-
python3 examples/offline_inference/audio_language.py --model-type whisper
-
python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
# Whisper needs spawn method to avoid deadlock
-
VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
-
label
:
Blackwell Test
# 38 min
timeout_in_minutes
:
60
...
...
tests/models/multimodal/processing/test_common.py
View file @
03858e6d
...
...
@@ -213,6 +213,7 @@ _IGNORE_MM_KEYS = {
MM_DATA_PATCHES
=
{
# GLM4.1V and Qwen3-VL requires video metadata to be included in the input
"glm4v"
:
glm4_1v_patch_mm_data
,
"glm4v_moe"
:
glm4_1v_patch_mm_data
,
"qwen3_vl"
:
qwen3_vl_patch_mm_data
,
"qwen3_vl_moe"
:
qwen3_vl_patch_mm_data
,
}
...
...
vllm/model_executor/models/interns1.py
View file @
03858e6d
...
...
@@ -16,6 +16,8 @@ from transformers import BatchFeature, InternVLProcessor, PretrainedConfig
from
transformers.activations
import
ACT2FN
from
transformers.models.got_ocr2.image_processing_got_ocr2_fast
import
(
GotOcr2ImageProcessorFast
)
from
transformers.models.internvl.video_processing_internvl
import
(
InternVLVideoProcessor
)
from
vllm.config
import
VllmConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
...
...
@@ -31,6 +33,8 @@ from vllm.multimodal.processing import (BaseMultiModalProcessor,
PromptUpdate
,
PromptUpdateDetails
)
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.sequence
import
IntermediateTensors
from
vllm.transformers_utils.processor
import
(
cached_video_processor_from_config
)
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
.interfaces
import
(
MultiModalEmbeddings
,
SupportsLoRA
,
...
...
@@ -152,7 +156,12 @@ class InternS1ProcessingInfo(BaseProcessingInfo):
"""ProcessingInfo for InternS1-style models."""
def
get_hf_processor
(
self
,
**
kwargs
:
object
)
->
InternVLProcessor
:
return
self
.
ctx
.
get_hf_processor
(
InternVLProcessor
,
**
kwargs
)
hf_processor
=
self
.
ctx
.
get_hf_processor
(
InternVLProcessor
,
**
kwargs
)
hf_processor
.
video_processor
=
cached_video_processor_from_config
(
self
.
ctx
.
model_config
,
processor_cls
=
InternVLVideoProcessor
,
**
kwargs
)
return
hf_processor
def
get_supported_mm_limits
(
self
)
->
Mapping
[
str
,
Optional
[
int
]]:
return
{
"image"
:
None
,
"video"
:
None
}
...
...
vllm/transformers_utils/processor.py
View file @
03858e6d
...
...
@@ -5,10 +5,11 @@ from functools import lru_cache
from
typing
import
TYPE_CHECKING
,
Any
,
Optional
,
Union
,
cast
from
transformers
import
(
AutoFeatureExtractor
,
AutoImageProcessor
,
AutoProcessor
)
AutoProcessor
,
AutoVideoProcessor
)
from
transformers.feature_extraction_utils
import
FeatureExtractionMixin
from
transformers.image_processing_utils
import
BaseImageProcessor
from
transformers.processing_utils
import
ProcessorMixin
from
transformers.video_processing_utils
import
BaseVideoProcessor
from
typing_extensions
import
TypeVar
from
vllm.utils
import
get_allowed_kwarg_only_overrides
...
...
@@ -17,6 +18,7 @@ if TYPE_CHECKING:
from
vllm.config
import
ModelConfig
_P
=
TypeVar
(
"_P"
,
bound
=
ProcessorMixin
,
default
=
ProcessorMixin
)
_V
=
TypeVar
(
"_V"
,
bound
=
BaseVideoProcessor
,
default
=
BaseVideoProcessor
)
class
HashableDict
(
dict
):
...
...
@@ -243,3 +245,55 @@ def cached_image_processor_from_config(
trust_remote_code
=
model_config
.
trust_remote_code
,
**
_merge_mm_kwargs
(
model_config
,
AutoImageProcessor
,
**
kwargs
),
)
def
get_video_processor
(
processor_name
:
str
,
*
args
:
Any
,
revision
:
Optional
[
str
]
=
None
,
trust_remote_code
:
bool
=
False
,
processor_cls_overrides
:
Optional
[
type
[
_V
]]
=
None
,
**
kwargs
:
Any
,
):
"""Load a video processor for the given model name via HuggingFace."""
try
:
processor_cls
=
processor_cls_overrides
or
AutoVideoProcessor
processor
=
processor_cls
.
from_pretrained
(
processor_name
,
*
args
,
revision
=
revision
,
trust_remote_code
=
trust_remote_code
,
**
kwargs
)
except
ValueError
as
e
:
# If the error pertains to the processor class not existing or not
# currently being imported, suggest using the --trust-remote-code flag.
# Unlike AutoTokenizer, AutoVideoProcessor does not separate such errors
if
not
trust_remote_code
:
err_msg
=
(
"Failed to load the video processor. If the video processor is "
"a custom processor not yet available in the HuggingFace "
"transformers library, consider setting "
"`trust_remote_code=True` in LLM or using the "
"`--trust-remote-code` flag in the CLI."
)
raise
RuntimeError
(
err_msg
)
from
e
else
:
raise
e
return
cast
(
BaseVideoProcessor
,
processor
)
cached_get_video_processor
=
lru_cache
(
get_video_processor
)
def
cached_video_processor_from_config
(
model_config
:
"ModelConfig"
,
processor_cls
:
Optional
[
type
[
_V
]]
=
None
,
**
kwargs
:
Any
,
):
return
cached_get_video_processor
(
model_config
.
model
,
revision
=
model_config
.
revision
,
trust_remote_code
=
model_config
.
trust_remote_code
,
processor_cls_overrides
=
processor_cls
,
# type: ignore[arg-type]
**
_merge_mm_kwargs
(
model_config
,
AutoVideoProcessor
,
**
kwargs
),
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment