Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bb354e6b
Unverified
Commit
bb354e6b
authored
Jan 14, 2025
by
Cyrus Leung
Committed by
GitHub
Jan 14, 2025
Browse files
[Bugfix] Fix various bugs in multi-modal processor (#12031)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
ff39141a
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
62 additions
and
51 deletions
+62
-51
tests/multimodal/test_processing.py
tests/multimodal/test_processing.py
+19
-0
vllm/multimodal/processing.py
vllm/multimodal/processing.py
+39
-50
vllm/multimodal/registry.py
vllm/multimodal/registry.py
+4
-1
No files found.
tests/multimodal/test_processing.py
View file @
bb354e6b
...
@@ -421,6 +421,8 @@ def test_find_replace_tokens(
...
@@ -421,6 +421,8 @@ def test_find_replace_tokens(
"pattern_1"
:
[
32000
,
32000
],
"pattern_1"
:
[
32000
,
32000
],
"pattern_2"
:
[],
"pattern_2"
:
[],
"pattern_3"
:
[
1550
,
918
,
1550
],
"pattern_3"
:
[
1550
,
918
,
1550
],
# Test different modalities having the same tokens (32000)
"pattern_4"
:
[
32000
],
},
},
],
],
)
)
...
@@ -438,6 +440,14 @@ def test_find_replace_tokens(
...
@@ -438,6 +440,14 @@ def test_find_replace_tokens(
replacement
=
[
32000
,
32000
],
replacement
=
[
32000
,
32000
],
),
),
],
],
"pattern_4"
:
[
PlaceholderInfo
(
modality
=
"pattern_4"
,
item_idx
=
0
,
start_idx
=
3
,
replacement
=
[
32000
],
),
],
}
}
),
),
...
@@ -466,6 +476,7 @@ def test_find_replace_tokens(
...
@@ -466,6 +476,7 @@ def test_find_replace_tokens(
replacement
=
[
1550
,
918
,
1550
],
replacement
=
[
1550
,
918
,
1550
],
),
),
],
],
# No match for pattern_4 as it has lower priority than pattern_1
}
}
),
),
(
(
...
@@ -485,6 +496,14 @@ def test_find_replace_tokens(
...
@@ -485,6 +496,14 @@ def test_find_replace_tokens(
replacement
=
[
32000
,
32000
],
replacement
=
[
32000
,
32000
],
),
),
],
],
"pattern_4"
:
[
PlaceholderInfo
(
modality
=
"pattern_4"
,
item_idx
=
0
,
start_idx
=
5
,
replacement
=
[
32000
],
),
],
"pattern_3"
:
[
"pattern_3"
:
[
PlaceholderInfo
(
PlaceholderInfo
(
modality
=
"pattern_3"
,
modality
=
"pattern_3"
,
...
...
vllm/multimodal/processing.py
View file @
bb354e6b
...
@@ -404,22 +404,32 @@ def replace_text_matches(
...
@@ -404,22 +404,32 @@ def replace_text_matches(
return
""
.
join
(
texts
)
return
""
.
join
(
texts
)
def
_iter_modality_placeholders
(
def
_iter_placeholders
(
mm_prompt_repls
:
Mapping
[
str
,
Sequence
[
BoundPromptReplacement
]],
prompt
:
list
[
int
],
prompt
:
list
[
int
],
modality
:
str
,
mm_item_counts
:
Mapping
[
str
,
int
],
modality_repls
:
Sequence
[
BoundPromptReplacement
],
modal_item_count
:
int
,
)
->
Iterable
[
PlaceholderInfo
]:
)
->
Iterable
[
PlaceholderInfo
]:
if
modal_item_count
==
0
:
"""
return
Yield each set of placeholder tokens found in :code:`prompt`.
Matches are exclusive even when multiple modalities share
the same placeholder tokens. In that case, the modality that
appears earlier in `mm_prompt_repls` takes priority.
Note that empty matches are ignored.
"""
prompt_len
=
len
(
prompt
)
prompt_len
=
len
(
prompt
)
item_idx
=
0
item_idx
_by_modality
=
defaultdict
[
str
,
int
](
lambda
:
0
)
start_idx
=
0
start_idx
=
0
while
start_idx
<
prompt_len
:
while
start_idx
<
prompt_len
:
found
=
False
found
=
False
for
modality
,
modality_repls
in
mm_prompt_repls
.
items
():
item_idx
=
item_idx_by_modality
[
modality
]
if
item_idx
>=
mm_item_counts
.
get
(
modality
,
0
):
continue
for
repl_info
in
modality_repls
:
for
repl_info
in
modality_repls
:
replacement
=
repl_info
.
get_replacement
(
item_idx
)
replacement
=
repl_info
.
get_replacement
(
item_idx
)
repl_tokens
=
replacement
.
token_ids
repl_tokens
=
replacement
.
token_ids
...
@@ -437,40 +447,19 @@ def _iter_modality_placeholders(
...
@@ -437,40 +447,19 @@ def _iter_modality_placeholders(
replacement
=
repl_tokens
,
replacement
=
repl_tokens
,
)
)
item_idx
+=
1
if
item_idx
>=
modal_item_count
:
return
# Exclude overlapping matches
# Exclude overlapping matches
start_idx
=
end_idx
start_idx
=
end_idx
item_idx_by_modality
[
modality
]
+=
1
found
=
True
found
=
True
break
break
if
found
:
break
# Go back to the outer while loop
if
not
found
:
if
not
found
:
start_idx
+=
1
start_idx
+=
1
def
_iter_placeholders
(
mm_prompt_repls
:
Mapping
[
str
,
Sequence
[
BoundPromptReplacement
]],
prompt
:
list
[
int
],
mm_item_counts
:
Mapping
[
str
,
int
],
)
->
Iterable
[
PlaceholderInfo
]:
"""
For each modality, yield each set of placeholder tokens found in
:code:`prompt`.
Note that empty matches are ignored.
"""
for
modality
,
modal_item_count
in
mm_item_counts
.
items
():
if
modality
in
mm_prompt_repls
:
yield
from
_iter_modality_placeholders
(
prompt
,
modality
,
mm_prompt_repls
[
modality
],
modal_item_count
,
)
def
find_mm_placeholders
(
def
find_mm_placeholders
(
mm_prompt_repls
:
Mapping
[
str
,
Sequence
[
BoundPromptReplacement
]],
mm_prompt_repls
:
Mapping
[
str
,
Sequence
[
BoundPromptReplacement
]],
prompt
:
list
[
int
],
prompt
:
list
[
int
],
...
@@ -1156,7 +1145,7 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
...
@@ -1156,7 +1145,7 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
# If HF processor already inserts placeholder tokens,
# If HF processor already inserts placeholder tokens,
# there is no need for us to insert them
# there is no need for us to insert them
if
all
(
len
(
repls
)
==
0
for
repls
in
mm_missing_repls
.
item
s
()):
if
all
(
len
(
repls
)
==
0
for
repls
in
mm_missing_repls
.
value
s
()):
tokenizer
=
self
.
info
.
get_tokenizer
()
tokenizer
=
self
.
info
.
get_tokenizer
()
prompt
=
decode_tokens
(
tokenizer
,
prompt_ids
)
prompt
=
decode_tokens
(
tokenizer
,
prompt_ids
)
mm_placeholders
=
hf_mm_placeholders
mm_placeholders
=
hf_mm_placeholders
...
...
vllm/multimodal/registry.py
View file @
bb354e6b
...
@@ -259,7 +259,10 @@ class MultiModalRegistry:
...
@@ -259,7 +259,10 @@ class MultiModalRegistry:
This is currently directly used only in V1.
This is currently directly used only in V1.
"""
"""
if
self
.
has_processor
(
model_config
):
if
self
.
has_processor
(
model_config
):
tokenizer
=
cached_get_tokenizer
(
model_config
.
tokenizer
)
tokenizer
=
cached_get_tokenizer
(
model_config
.
tokenizer
,
trust_remote_code
=
model_config
.
trust_remote_code
,
)
processor
=
self
.
create_processor
(
model_config
,
tokenizer
)
processor
=
self
.
create_processor
(
model_config
,
tokenizer
)
seq_len
=
model_config
.
max_model_len
seq_len
=
model_config
.
max_model_len
return
processor
.
info
.
get_mm_max_tokens_per_item
(
seq_len
)
return
processor
.
info
.
get_mm_max_tokens_per_item
(
seq_len
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment