Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
999df95b
Unverified
Commit
999df95b
authored
Nov 07, 2024
by
Jiahao Li
Committed by
GitHub
Nov 07, 2024
Browse files
[Bugfix] Make image processor respect `mm_processor_kwargs` for Qwen2-VL (#10112)
Signed-off-by:
Jiahao Li
<
liplus17@163.com
>
parent
a6f332d0
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
23 additions
and
10 deletions
+23
-10
vllm/model_executor/models/qwen2_vl.py
vllm/model_executor/models/qwen2_vl.py
+23
-10
No files found.
vllm/model_executor/models/qwen2_vl.py
View file @
999df95b
...
...
@@ -22,8 +22,8 @@
# limitations under the License.
"""Inference-only Qwen2-VL model compatible with HuggingFace weights."""
from
functools
import
partial
from
typing
import
(
Any
,
Callable
,
Iterable
,
List
,
Literal
,
Mapping
,
Optional
,
Tuple
,
Type
,
TypedDict
,
Union
)
from
typing
import
(
Any
,
Callable
,
Dict
,
Iterable
,
List
,
Literal
,
Mapping
,
Optional
,
Tuple
,
Type
,
TypedDict
,
Union
)
import
torch
import
torch.nn
as
nn
...
...
@@ -558,6 +558,17 @@ class Qwen2VisionTransformer(nn.Module):
# === Vision input helpers === #
def
get_mm_processor_kwargs
(
min_pixels
:
Optional
[
int
]
=
None
,
max_pixels
:
Optional
[
int
]
=
None
)
->
Dict
[
str
,
int
]:
mm_processor_kwargs
=
{}
if
min_pixels
:
mm_processor_kwargs
[
"min_pixels"
]
=
min_pixels
if
max_pixels
:
mm_processor_kwargs
[
"max_pixels"
]
=
max_pixels
return
mm_processor_kwargs
def
mm_input_mapper_for_qwen2_vl
(
ctx
:
InputContext
,
data
:
MultiModalData
[
object
],
...
...
@@ -575,12 +586,8 @@ def mm_input_mapper_for_qwen2_vl(
model_config
=
ctx
.
model_config
# Handle mm processor kwargs; we pass these at creation time
# because preprocess() in transformers doesn't expose them
mm_processor_kwargs
=
{}
if
min_pixels
:
mm_processor_kwargs
[
"min_pixels"
]
=
min_pixels
if
max_pixels
:
mm_processor_kwargs
[
"max_pixels"
]
=
max_pixels
mm_processor_kwargs
=
get_mm_processor_kwargs
(
min_pixels
=
min_pixels
,
max_pixels
=
max_pixels
)
image_processor
=
cached_get_image_processor
(
model_config
.
model
,
trust_remote_code
=
model_config
.
trust_remote_code
,
...
...
@@ -683,7 +690,10 @@ def get_max_qwen2_vl_mm_tokens(ctx: InputContext,
*
,
min_pixels
=
None
,
max_pixels
=
None
)
->
int
:
image_processor
=
cached_get_image_processor
(
ctx
.
model_config
.
model
)
mm_processor_kwargs
=
get_mm_processor_kwargs
(
min_pixels
=
min_pixels
,
max_pixels
=
max_pixels
)
image_processor
=
cached_get_image_processor
(
ctx
.
model_config
.
model
,
**
mm_processor_kwargs
)
max_resized_height
,
max_resized_width
,
max_llm_image_tokens
=
\
_get_max_image_info
(
image_processor
,
data_type_key
=
data_type_key
,
mm_count
=
1
,
min_pixels
=
min_pixels
,
...
...
@@ -705,7 +715,10 @@ def dummy_data_for_qwen2_vl(
min_pixels
:
Optional
[
int
]
=
None
,
max_pixels
:
Optional
[
int
]
=
None
)
->
Tuple
[
SequenceData
,
Optional
[
MultiModalDataDict
]]:
image_processor
=
cached_get_image_processor
(
ctx
.
model_config
.
model
)
mm_processor_kwargs
=
get_mm_processor_kwargs
(
min_pixels
=
min_pixels
,
max_pixels
=
max_pixels
)
image_processor
=
cached_get_image_processor
(
ctx
.
model_config
.
model
,
**
mm_processor_kwargs
)
num_images
=
mm_counts
[
"image"
]
max_resized_height
,
max_resized_width
,
max_llm_image_tokens
=
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment