Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
845ee348
Unverified
Commit
845ee348
authored
Feb 26, 2026
by
Cyrus Leung
Committed by
GitHub
Feb 26, 2026
Browse files
[Misc] Standardize handling of `mm_processor_kwargs.size` (#35284)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
ec13e549
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
135 additions
and
28 deletions
+135
-28
tests/lora/test_qwenvl.py
tests/lora/test_qwenvl.py
+18
-5
tests/models/multimodal/processing/test_gemma3.py
tests/models/multimodal/processing/test_gemma3.py
+7
-5
tests/models/multimodal/processing/test_qwen2_vl.py
tests/models/multimodal/processing/test_qwen2_vl.py
+38
-7
vllm/model_executor/models/ernie45_vl.py
vllm/model_executor/models/ernie45_vl.py
+18
-3
vllm/model_executor/models/hunyuan_vision.py
vllm/model_executor/models/hunyuan_vision.py
+7
-1
vllm/model_executor/models/keye.py
vllm/model_executor/models/keye.py
+7
-1
vllm/model_executor/models/paddleocr_vl.py
vllm/model_executor/models/paddleocr_vl.py
+18
-3
vllm/model_executor/models/qwen2_vl.py
vllm/model_executor/models/qwen2_vl.py
+15
-2
vllm/model_executor/models/qwen3_vl.py
vllm/model_executor/models/qwen3_vl.py
+7
-1
No files found.
tests/lora/test_qwenvl.py
View file @
845ee348
...
@@ -2,6 +2,9 @@
...
@@ -2,6 +2,9 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
packaging.version
import
Version
from
transformers
import
__version__
as
TRANSFORMERS_VERSION
import
vllm
import
vllm
from
vllm.assets.image
import
ImageAsset
from
vllm.assets.image
import
ImageAsset
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
...
@@ -18,15 +21,25 @@ class TestConfig:
...
@@ -18,15 +21,25 @@ class TestConfig:
enable_tower_connector_lora
:
bool
=
False
enable_tower_connector_lora
:
bool
=
False
max_model_len
:
int
=
8192
max_model_len
:
int
=
8192
gpu_memory_utilization
:
float
=
0.85
gpu_memory_utilization
:
float
=
0.85
mm_processor_kwargs
:
dict
[
str
,
in
t
]
|
None
=
None
mm_processor_kwargs
:
dict
[
str
,
objec
t
]
|
None
=
None
mm_processor_cache_gb
:
float
=
4
mm_processor_cache_gb
:
float
=
4
def
__post_init__
(
self
):
def
__post_init__
(
self
):
if
self
.
mm_processor_kwargs
is
None
:
if
self
.
mm_processor_kwargs
is
None
:
self
.
mm_processor_kwargs
=
{
# There is a bug in transformers v4 where size is ignored by
"min_pixels"
:
28
*
28
,
# `Qwen2VLProcessor.__call__`
"max_pixels"
:
1280
*
28
*
28
,
if
Version
(
TRANSFORMERS_VERSION
)
<
Version
(
"5.2.0"
):
}
self
.
mm_processor_kwargs
=
{
"min_pixels"
:
28
*
28
,
"max_pixels"
:
1280
*
28
*
28
,
}
else
:
self
.
mm_processor_kwargs
=
{
"size"
:
{
"shortest_edge"
:
28
*
28
,
"longest_edge"
:
1280
*
28
*
28
,
}
}
class
Qwen2VLTester
:
class
Qwen2VLTester
:
...
...
tests/models/multimodal/processing/test_gemma3.py
View file @
845ee348
...
@@ -150,8 +150,11 @@ class TestGemma3nAudioTensorLogic:
...
@@ -150,8 +150,11 @@ class TestGemma3nAudioTensorLogic:
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
GEMMA3_MODEL_ID
])
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
GEMMA3_MODEL_ID
])
@
pytest
.
mark
.
parametrize
(
"mm_processor_kwargs"
,
[{}])
def
test_get_image_size_with_most_features
(
def
test_get_image_size_with_most_features
(
image_assets
:
ImageTestAssets
,
model_id
:
str
image_assets
:
ImageTestAssets
,
model_id
:
str
,
mm_processor_kwargs
:
dict
[
str
,
object
],
):
):
ctx
=
build_model_context
(
ctx
=
build_model_context
(
model_id
,
model_id
,
...
@@ -160,15 +163,14 @@ def test_get_image_size_with_most_features(
...
@@ -160,15 +163,14 @@ def test_get_image_size_with_most_features(
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model_config
)
hf_processor_mm_kwargs
:
dict
[
str
,
object
]
=
{}
hf_processor
=
processor
.
info
.
get_hf_processor
(
**
mm_processor_kwargs
)
hf_processor
=
processor
.
info
.
get_hf_processor
(
**
hf_processor_mm_kwargs
)
max_image_size
=
processor
.
info
.
get_image_size_with_most_features
()
max_image_size
=
processor
.
info
.
get_image_size_with_most_features
()
max_tokens
=
processor
.
info
.
get_num_image_tokens
(
max_tokens
=
processor
.
info
.
get_num_image_tokens
(
image_width
=
max_image_size
.
width
,
image_width
=
max_image_size
.
width
,
image_height
=
max_image_size
.
height
,
image_height
=
max_image_size
.
height
,
processor
=
hf_processor
,
processor
=
hf_processor
,
mm_kwargs
=
hf
_processor_
mm_
kwargs
,
mm_kwargs
=
mm
_processor_kwargs
,
)
)
prompt
=
"<start_of_image>"
prompt
=
"<start_of_image>"
...
@@ -179,7 +181,7 @@ def test_get_image_size_with_most_features(
...
@@ -179,7 +181,7 @@ def test_get_image_size_with_most_features(
processed_inputs
=
processor
(
processed_inputs
=
processor
(
prompt
,
prompt
,
mm_items
=
processor
.
info
.
parse_mm_data
(
mm_data
),
mm_items
=
processor
.
info
.
parse_mm_data
(
mm_data
),
hf_processor_mm_kwargs
=
hf
_processor_
mm_
kwargs
,
hf_processor_mm_kwargs
=
mm
_processor_kwargs
,
)
)
mm_kwargs_data
=
processed_inputs
[
"mm_kwargs"
].
get_data
()
mm_kwargs_data
=
processed_inputs
[
"mm_kwargs"
].
get_data
()
num_patches_tensor
=
mm_kwargs_data
[
"num_patches"
]
num_patches_tensor
=
mm_kwargs_data
[
"num_patches"
]
...
...
tests/models/multimodal/processing/test_qwen2_vl.py
View file @
845ee348
...
@@ -2,6 +2,8 @@
...
@@ -2,6 +2,8 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
import
pytest
from
packaging.version
import
Version
from
transformers
import
__version__
as
TRANSFORMERS_VERSION
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
...
@@ -15,6 +17,16 @@ from ...utils import build_model_context
...
@@ -15,6 +17,16 @@ from ...utils import build_model_context
[
[
({},
1426
,
(
5704
,
1176
)),
({},
1426
,
(
5704
,
1176
)),
({
"min_pixels"
:
64
**
2
,
"max_pixels"
:
512
**
2
},
330
,
(
1320
,
1176
)),
({
"min_pixels"
:
64
**
2
,
"max_pixels"
:
512
**
2
},
330
,
(
1320
,
1176
)),
(
{
"size"
:
{
"shortest_edge"
:
64
**
2
,
"longest_edge"
:
512
**
2
,
},
},
330
,
(
1320
,
1176
),
),
],
],
)
)
@
pytest
.
mark
.
parametrize
(
"num_imgs"
,
[
1
,
2
])
@
pytest
.
mark
.
parametrize
(
"num_imgs"
,
[
1
,
2
])
...
@@ -29,6 +41,12 @@ def test_processor_override(
...
@@ -29,6 +41,12 @@ def test_processor_override(
kwargs_on_init
:
bool
,
kwargs_on_init
:
bool
,
):
):
"""Ensure Qwen2VLMultiModalProcessor handles min/max pixels properly."""
"""Ensure Qwen2VLMultiModalProcessor handles min/max pixels properly."""
if
(
Version
(
TRANSFORMERS_VERSION
)
<
Version
(
"5.2.0"
)
and
"size"
in
mm_processor_kwargs
):
pytest
.
skip
(
"`size` ignored by `Qwen2VLProcessor.__call__`"
)
ctx
=
build_model_context
(
ctx
=
build_model_context
(
model_id
,
model_id
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
...
@@ -60,21 +78,34 @@ def test_processor_override(
...
@@ -60,21 +78,34 @@ def test_processor_override(
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"Qwen/Qwen2-VL-2B-Instruct"
])
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"Qwen/Qwen2-VL-2B-Instruct"
])
@
pytest
.
mark
.
parametrize
(
"max_pixels"
,
[
1280
*
28
*
28
,
1283
*
28
*
28
])
@
pytest
.
mark
.
parametrize
(
"mm_processor_kwargs"
,
[
{
"min_pixels"
:
28
*
28
,
"max_pixels"
:
1280
*
28
*
28
},
{
"min_pixels"
:
28
*
28
,
"max_pixels"
:
1283
*
28
*
28
},
{
"size"
:
{
"shortest_edge"
:
28
*
28
,
"longest_edge"
:
1280
*
28
*
28
}},
{
"size"
:
{
"shortest_edge"
:
28
*
28
,
"longest_edge"
:
1283
*
28
*
28
}},
],
)
def
test_get_image_size_with_most_features
(
def
test_get_image_size_with_most_features
(
image_assets
:
ImageTestAssets
,
image_assets
:
ImageTestAssets
,
model_id
:
str
,
model_id
:
str
,
m
ax_pixels
:
int
,
m
m_processor_kwargs
:
dict
[
str
,
object
]
,
):
):
if
(
Version
(
TRANSFORMERS_VERSION
)
<
Version
(
"5.2.0"
)
and
"size"
in
mm_processor_kwargs
):
pytest
.
skip
(
"`size` ignored by `Qwen2VLProcessor.__call__`"
)
ctx
=
build_model_context
(
ctx
=
build_model_context
(
model_id
,
model_id
,
mm_processor_kwargs
=
{
"max_pixels"
:
max_pixels
}
,
mm_processor_kwargs
=
mm_processor_kwargs
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
"image"
:
1
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model_config
)
hf_processor_mm_kwargs
:
dict
[
str
,
object
]
=
{}
hf_processor
=
processor
.
info
.
get_hf_processor
(
**
mm_processor_kwargs
)
hf_processor
=
processor
.
info
.
get_hf_processor
(
**
hf_processor_mm_kwargs
)
merge_size
=
processor
.
info
.
get_hf_config
().
vision_config
.
spatial_merge_size
merge_size
=
processor
.
info
.
get_hf_config
().
vision_config
.
spatial_merge_size
max_image_size
=
processor
.
info
.
get_image_size_with_most_features
()
max_image_size
=
processor
.
info
.
get_image_size_with_most_features
()
...
@@ -82,7 +113,7 @@ def test_get_image_size_with_most_features(
...
@@ -82,7 +113,7 @@ def test_get_image_size_with_most_features(
image_width
=
max_image_size
.
width
,
image_width
=
max_image_size
.
width
,
image_height
=
max_image_size
.
height
,
image_height
=
max_image_size
.
height
,
image_processor
=
hf_processor
.
image_processor
,
image_processor
=
hf_processor
.
image_processor
,
mm_kwargs
=
hf
_processor_
mm_
kwargs
,
mm_kwargs
=
mm
_processor_kwargs
,
)
)
prompt
=
"<|vision_start|><|image_pad|><|vision_end|>"
prompt
=
"<|vision_start|><|image_pad|><|vision_end|>"
...
@@ -91,7 +122,7 @@ def test_get_image_size_with_most_features(
...
@@ -91,7 +122,7 @@ def test_get_image_size_with_most_features(
processed_inputs
=
processor
(
processed_inputs
=
processor
(
prompt
,
prompt
,
mm_items
=
processor
.
info
.
parse_mm_data
(
mm_data
),
mm_items
=
processor
.
info
.
parse_mm_data
(
mm_data
),
hf_processor_mm_kwargs
=
hf
_processor_
mm_
kwargs
,
hf_processor_mm_kwargs
=
mm
_processor_kwargs
,
)
)
grid_thw
=
processed_inputs
[
"mm_kwargs"
].
get_data
()[
"image_grid_thw"
].
tolist
()
grid_thw
=
processed_inputs
[
"mm_kwargs"
].
get_data
()[
"image_grid_thw"
].
tolist
()
t
,
h
,
w
=
grid_thw
[
0
]
t
,
h
,
w
=
grid_thw
[
0
]
...
...
vllm/model_executor/models/ernie45_vl.py
View file @
845ee348
...
@@ -829,16 +829,31 @@ class Ernie4_5_VLProcessingInfo(BaseProcessingInfo):
...
@@ -829,16 +829,31 @@ class Ernie4_5_VLProcessingInfo(BaseProcessingInfo):
spatial_conv_size
=
hf_config
.
spatial_conv_size
spatial_conv_size
=
hf_config
.
spatial_conv_size
temporal_conv_size
=
hf_config
.
temporal_conv_size
temporal_conv_size
=
hf_config
.
temporal_conv_size
if
self
.
ctx
.
model_config
.
trust_remote_code
:
# Defined in HF Hub repo
min_pixels_key
=
"min_pixels"
max_pixels_key
=
"max_pixels"
else
:
# Defined in Transformers library (requires v5.0 or above)
min_pixels_key
=
"shortest_edge"
max_pixels_key
=
"longest_edge"
mm_kwargs
=
self
.
ctx
.
get_merged_mm_kwargs
(
mm_kwargs
)
mm_kwargs
=
self
.
ctx
.
get_merged_mm_kwargs
(
mm_kwargs
)
size
=
mm_kwargs
.
get
(
"size"
,
image_processor
.
size
)
size
=
image_processor
.
size
if
override_size
:
=
mm_kwargs
.
get
(
"size"
):
size
=
size
|
override_size
if
(
override_min_pixels
:
=
mm_kwargs
.
get
(
"min_pixels"
))
is
not
None
:
size
=
size
|
{
min_pixels_key
:
override_min_pixels
}
if
(
override_max_pixels
:
=
mm_kwargs
.
get
(
"max_pixels"
))
is
not
None
:
size
=
size
|
{
max_pixels_key
:
override_max_pixels
}
if
do_resize
:
if
do_resize
:
resized_height
,
resized_width
=
smart_resize
(
resized_height
,
resized_width
=
smart_resize
(
height
=
image_height
,
height
=
image_height
,
width
=
image_width
,
width
=
image_width
,
factor
=
patch_size
*
spatial_conv_size
,
factor
=
patch_size
*
spatial_conv_size
,
min_pixels
=
size
[
"
min_pixels
"
],
min_pixels
=
size
[
min_pixels
_key
],
max_pixels
=
size
[
"
max_pixels
"
],
max_pixels
=
size
[
max_pixels
_key
],
)
)
preprocessed_size
=
ImageSize
(
width
=
resized_width
,
height
=
resized_height
)
preprocessed_size
=
ImageSize
(
width
=
resized_width
,
height
=
resized_height
)
else
:
else
:
...
...
vllm/model_executor/models/hunyuan_vision.py
View file @
845ee348
...
@@ -636,7 +636,13 @@ class HunYuanVLProcessingInfo(BaseProcessingInfo):
...
@@ -636,7 +636,13 @@ class HunYuanVLProcessingInfo(BaseProcessingInfo):
spatial_merge_size
=
vision_config
.
spatial_merge_size
spatial_merge_size
=
vision_config
.
spatial_merge_size
mm_kwargs
=
self
.
ctx
.
get_merged_mm_kwargs
(
mm_kwargs
)
mm_kwargs
=
self
.
ctx
.
get_merged_mm_kwargs
(
mm_kwargs
)
size
=
mm_kwargs
.
get
(
"size"
,
image_processor
.
size
)
size
=
image_processor
.
size
if
override_size
:
=
mm_kwargs
.
get
(
"size"
):
size
=
size
|
override_size
if
(
override_min_pixels
:
=
mm_kwargs
.
get
(
"min_pixels"
))
is
not
None
:
size
=
size
|
{
"shortest_edge"
:
override_min_pixels
}
if
(
override_max_pixels
:
=
mm_kwargs
.
get
(
"max_pixels"
))
is
not
None
:
size
=
size
|
{
"longest_edge"
:
override_max_pixels
}
if
do_resize
:
if
do_resize
:
resized_height
,
resized_width
=
smart_resize
(
resized_height
,
resized_width
=
smart_resize
(
...
...
vllm/model_executor/models/keye.py
View file @
845ee348
...
@@ -1021,7 +1021,13 @@ class KeyeProcessingInfo(BaseProcessingInfo):
...
@@ -1021,7 +1021,13 @@ class KeyeProcessingInfo(BaseProcessingInfo):
temporal_patch_size
=
1
temporal_patch_size
=
1
mm_kwargs
=
self
.
ctx
.
get_merged_mm_kwargs
(
mm_kwargs
)
mm_kwargs
=
self
.
ctx
.
get_merged_mm_kwargs
(
mm_kwargs
)
size
=
mm_kwargs
.
get
(
"size"
,
image_processor
.
size
)
size
=
image_processor
.
size
if
override_size
:
=
mm_kwargs
.
get
(
"size"
):
size
=
size
|
override_size
if
(
override_min_pixels
:
=
mm_kwargs
.
get
(
"min_pixels"
))
is
not
None
:
size
=
size
|
{
"min_pixels"
:
override_min_pixels
}
if
(
override_max_pixels
:
=
mm_kwargs
.
get
(
"max_pixels"
))
is
not
None
:
size
=
size
|
{
"max_pixels"
:
override_max_pixels
}
if
do_resize
:
if
do_resize
:
resized_height
,
resized_width
=
smart_resize
(
resized_height
,
resized_width
=
smart_resize
(
...
...
vllm/model_executor/models/paddleocr_vl.py
View file @
845ee348
...
@@ -155,15 +155,30 @@ class PaddleOCRVLProcessingInfo(BaseProcessingInfo):
...
@@ -155,15 +155,30 @@ class PaddleOCRVLProcessingInfo(BaseProcessingInfo):
patch_size
=
vision_config
.
patch_size
patch_size
=
vision_config
.
patch_size
merge_size
=
vision_config
.
spatial_merge_size
merge_size
=
vision_config
.
spatial_merge_size
if
self
.
ctx
.
model_config
.
trust_remote_code
:
# Defined in HF Hub repo
min_pixels_key
=
"min_pixels"
max_pixels_key
=
"max_pixels"
else
:
# Defined in Transformers library (requires v5.0 or above)
min_pixels_key
=
"shortest_edge"
max_pixels_key
=
"longest_edge"
mm_kwargs
=
self
.
ctx
.
get_merged_mm_kwargs
(
mm_kwargs
)
mm_kwargs
=
self
.
ctx
.
get_merged_mm_kwargs
(
mm_kwargs
)
size
=
mm_kwargs
.
get
(
"size"
,
image_processor
.
size
)
size
=
image_processor
.
size
if
override_size
:
=
mm_kwargs
.
get
(
"size"
):
size
=
size
|
override_size
if
(
override_min_pixels
:
=
mm_kwargs
.
get
(
"min_pixels"
))
is
not
None
:
size
=
size
|
{
min_pixels_key
:
override_min_pixels
}
if
(
override_max_pixels
:
=
mm_kwargs
.
get
(
"max_pixels"
))
is
not
None
:
size
=
size
|
{
max_pixels_key
:
override_max_pixels
}
resized_height
,
resized_width
=
smart_resize
(
resized_height
,
resized_width
=
smart_resize
(
height
=
image_height
,
height
=
image_height
,
width
=
image_width
,
width
=
image_width
,
factor
=
patch_size
*
merge_size
,
factor
=
patch_size
*
merge_size
,
min_pixels
=
size
[
"
min_pixels
"
],
min_pixels
=
size
[
min_pixels
_key
],
max_pixels
=
size
[
"
max_pixels
"
],
max_pixels
=
size
[
max_pixels
_key
],
)
)
preprocessed_size
=
ImageSize
(
width
=
resized_width
,
height
=
resized_height
)
preprocessed_size
=
ImageSize
(
width
=
resized_width
,
height
=
resized_height
)
...
...
vllm/model_executor/models/qwen2_vl.py
View file @
845ee348
...
@@ -843,7 +843,13 @@ class Qwen2VLProcessingInfo(BaseProcessingInfo):
...
@@ -843,7 +843,13 @@ class Qwen2VLProcessingInfo(BaseProcessingInfo):
temporal_patch_size
=
vision_config
.
temporal_patch_size
temporal_patch_size
=
vision_config
.
temporal_patch_size
mm_kwargs
=
self
.
ctx
.
get_merged_mm_kwargs
(
mm_kwargs
)
mm_kwargs
=
self
.
ctx
.
get_merged_mm_kwargs
(
mm_kwargs
)
size
=
mm_kwargs
.
get
(
"size"
,
image_processor
.
size
)
size
=
image_processor
.
size
if
override_size
:
=
mm_kwargs
.
get
(
"size"
):
size
=
size
|
override_size
if
(
override_min_pixels
:
=
mm_kwargs
.
get
(
"min_pixels"
))
is
not
None
:
size
=
size
|
{
"shortest_edge"
:
override_min_pixels
}
if
(
override_max_pixels
:
=
mm_kwargs
.
get
(
"max_pixels"
))
is
not
None
:
size
=
size
|
{
"longest_edge"
:
override_max_pixels
}
if
do_resize
:
if
do_resize
:
resized_height
,
resized_width
=
smart_resize
(
resized_height
,
resized_width
=
smart_resize
(
...
@@ -930,7 +936,14 @@ class Qwen2VLProcessingInfo(BaseProcessingInfo):
...
@@ -930,7 +936,14 @@ class Qwen2VLProcessingInfo(BaseProcessingInfo):
image_processor
=
self
.
get_image_processor
()
image_processor
=
self
.
get_image_processor
()
mm_kwargs
=
self
.
ctx
.
get_merged_mm_kwargs
({})
mm_kwargs
=
self
.
ctx
.
get_merged_mm_kwargs
({})
size
=
mm_kwargs
.
get
(
"size"
,
image_processor
.
size
)
size
=
image_processor
.
size
if
override_size
:
=
mm_kwargs
.
get
(
"size"
):
size
=
size
|
override_size
if
(
override_min_pixels
:
=
mm_kwargs
.
get
(
"min_pixels"
))
is
not
None
:
size
=
size
|
{
"shortest_edge"
:
override_min_pixels
}
if
(
override_max_pixels
:
=
mm_kwargs
.
get
(
"max_pixels"
))
is
not
None
:
size
=
size
|
{
"longest_edge"
:
override_max_pixels
}
max_pixels
=
size
[
"longest_edge"
]
max_pixels
=
size
[
"longest_edge"
]
unit
=
patch_size
*
merge_size
unit
=
patch_size
*
merge_size
...
...
vllm/model_executor/models/qwen3_vl.py
View file @
845ee348
...
@@ -647,7 +647,13 @@ class Qwen3VLProcessingInfo(Qwen2VLProcessingInfo):
...
@@ -647,7 +647,13 @@ class Qwen3VLProcessingInfo(Qwen2VLProcessingInfo):
temporal_patch_size
=
vision_config
.
temporal_patch_size
temporal_patch_size
=
vision_config
.
temporal_patch_size
mm_kwargs
=
self
.
ctx
.
get_merged_mm_kwargs
(
mm_kwargs
)
mm_kwargs
=
self
.
ctx
.
get_merged_mm_kwargs
(
mm_kwargs
)
size
=
mm_kwargs
.
get
(
"size"
,
image_processor
.
size
)
size
=
image_processor
.
size
if
override_size
:
=
mm_kwargs
.
get
(
"size"
):
size
=
size
|
override_size
if
(
override_min_pixels
:
=
mm_kwargs
.
get
(
"min_pixels"
))
is
not
None
:
size
=
size
|
{
"shortest_edge"
:
override_min_pixels
}
if
(
override_max_pixels
:
=
mm_kwargs
.
get
(
"max_pixels"
))
is
not
None
:
size
=
size
|
{
"longest_edge"
:
override_max_pixels
}
if
do_resize
:
if
do_resize
:
if
is_video
:
if
is_video
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment