Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a5d11a54
Unverified
Commit
a5d11a54
authored
Apr 10, 2025
by
Cyrus Leung
Committed by
GitHub
Apr 10, 2025
Browse files
[Bugfix] Fix validation error for text-only Mllama 3.2 (#16377)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
3d4c8775
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
39 additions
and
30 deletions
+39
-30
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+17
-13
vllm/model_executor/models/mllama.py
vllm/model_executor/models/mllama.py
+4
-1
vllm/v1/engine/processor.py
vllm/v1/engine/processor.py
+18
-16
No files found.
vllm/engine/llm_engine.py
View file @
a5d11a54
...
@@ -2046,27 +2046,31 @@ class LLMEngine:
...
@@ -2046,27 +2046,31 @@ class LLMEngine:
*
,
*
,
prompt_type
:
Literal
[
"encoder"
,
"decoder"
],
prompt_type
:
Literal
[
"encoder"
,
"decoder"
],
):
):
if
prompt_type
==
"encoder"
and
self
.
tokenizer
is
not
None
:
model_config
=
self
.
model_config
tokenizer
=
self
.
tokenizer
.
get_lora_tokenizer
(
lora_request
)
tokenizer
=
(
None
if
self
.
tokenizer
is
None
else
model_config
=
self
.
model_config
self
.
tokenizer
.
get_lora_tokenizer
(
lora_request
))
if
model_config
.
is_multimodal_model
:
prompt_ids
=
prompt_inputs
[
"prompt_token_ids"
]
if
not
prompt_ids
:
if
prompt_type
==
"encoder"
and
model_config
.
is_multimodal_model
:
pass
# Mllama may have empty encoder inputs for text-only data
else
:
raise
ValueError
(
f
"The
{
prompt_type
}
prompt cannot be empty"
)
max_prompt_len
=
self
.
model_config
.
max_model_len
if
len
(
prompt_ids
)
>=
max_prompt_len
:
if
prompt_type
==
"encoder"
and
model_config
.
is_multimodal_model
:
mm_registry
=
self
.
input_preprocessor
.
mm_registry
mm_registry
=
self
.
input_preprocessor
.
mm_registry
mm_processor
=
mm_registry
.
create_processor
(
mm_processor
=
mm_registry
.
create_processor
(
model_config
,
tokenizer
=
tokenizer
)
model_config
,
tokenizer
=
tokenizer
or
object
(),
# Dummy if no tokenizer
)
assert
isinstance
(
mm_processor
,
EncDecMultiModalProcessor
)
assert
isinstance
(
mm_processor
,
EncDecMultiModalProcessor
)
if
mm_processor
.
pad_dummy_encoder_prompt
:
if
mm_processor
.
pad_dummy_encoder_prompt
:
return
# Skip encoder length check for Whisper
return
# Skip encoder length check for Whisper
prompt_ids
=
prompt_inputs
[
"prompt_token_ids"
]
if
model_config
.
is_multimodal_model
:
if
not
prompt_ids
:
raise
ValueError
(
f
"The
{
prompt_type
}
prompt cannot be empty"
)
max_prompt_len
=
self
.
model_config
.
max_model_len
if
len
(
prompt_ids
)
>=
max_prompt_len
:
if
self
.
model_config
.
is_multimodal_model
:
suggestion
=
(
suggestion
=
(
"Make sure that `max_model_len` is no smaller than the "
"Make sure that `max_model_len` is no smaller than the "
"number of text tokens plus multimodal tokens. For image "
"number of text tokens plus multimodal tokens. For image "
...
...
vllm/model_executor/models/mllama.py
View file @
a5d11a54
...
@@ -211,6 +211,9 @@ class MllamaMultiModalProcessor(EncDecMultiModalProcessor[MllamaProcessingInfo]
...
@@ -211,6 +211,9 @@ class MllamaMultiModalProcessor(EncDecMultiModalProcessor[MllamaProcessingInfo]
# }
# }
if
mm_data
:
if
mm_data
:
hf_processor
=
self
.
info
.
get_hf_processor
()
image_token
:
str
=
hf_processor
.
image_token
# Since only the last group of consecutive images
# Since only the last group of consecutive images
# are attended by the decoded tokens, we only need to
# are attended by the decoded tokens, we only need to
# get the number of tokens for those images.
# get the number of tokens for those images.
...
@@ -227,7 +230,7 @@ class MllamaMultiModalProcessor(EncDecMultiModalProcessor[MllamaProcessingInfo]
...
@@ -227,7 +230,7 @@ class MllamaMultiModalProcessor(EncDecMultiModalProcessor[MllamaProcessingInfo]
num_tokens
=
decode_tiles
*
token_per_chunk
num_tokens
=
decode_tiles
*
token_per_chunk
mm_inputs
[
"encoder_prompt_token_ids"
]
=
[
image_token_id
mm_inputs
[
"encoder_prompt_token_ids"
]
=
[
image_token_id
]
*
num_tokens
]
*
num_tokens
mm_inputs
[
"encoder_prompt"
]
=
"<|
image
|>"
*
num_tokens
mm_inputs
[
"encoder_prompt"
]
=
image
_token
*
num_tokens
return
mm_inputs
return
mm_inputs
...
...
vllm/v1/engine/processor.py
View file @
a5d11a54
...
@@ -315,32 +315,34 @@ class Processor:
...
@@ -315,32 +315,34 @@ class Processor:
*
,
*
,
prompt_type
:
Literal
[
"encoder"
,
"decoder"
],
prompt_type
:
Literal
[
"encoder"
,
"decoder"
],
):
):
model_config
=
self
.
model_config
tokenizer
=
self
.
tokenizer
.
get_lora_tokenizer
(
lora_request
)
tokenizer
=
self
.
tokenizer
.
get_lora_tokenizer
(
lora_request
)
if
prompt_type
==
"encoder"
:
model_config
=
self
.
model_config
if
model_config
.
is_multimodal_model
:
mm_registry
=
self
.
input_preprocessor
.
mm_registry
mm_processor
=
mm_registry
.
create_processor
(
model_config
,
tokenizer
=
tokenizer
)
assert
isinstance
(
mm_processor
,
EncDecMultiModalProcessor
)
if
mm_processor
.
pad_dummy_encoder_prompt
:
return
# Skip encoder length check for Whisper
prompt_ids
=
prompt_inputs
[
"prompt_token_ids"
]
prompt_ids
=
prompt_inputs
[
"prompt_token_ids"
]
if
not
prompt_ids
:
if
not
prompt_ids
:
raise
ValueError
(
f
"The
{
prompt_type
}
prompt cannot be empty"
)
if
prompt_type
==
"encoder"
and
model_config
.
is_multimodal_model
:
pass
# Mllama may have empty encoder inputs for text-only data
else
:
raise
ValueError
(
f
"The
{
prompt_type
}
prompt cannot be empty"
)
max_input_id
=
max
(
prompt_ids
)
max_input_id
=
max
(
prompt_ids
,
default
=
0
)
if
max_input_id
>
tokenizer
.
max_token_id
:
if
max_input_id
>
tokenizer
.
max_token_id
:
raise
ValueError
(
f
"Token id
{
max_input_id
}
is out of vocabulary"
)
raise
ValueError
(
f
"Token id
{
max_input_id
}
is out of vocabulary"
)
max_prompt_len
=
self
.
model_config
.
max_model_len
max_prompt_len
=
self
.
model_config
.
max_model_len
if
len
(
prompt_ids
)
>=
max_prompt_len
:
if
len
(
prompt_ids
)
>=
max_prompt_len
:
if
self
.
model_config
.
is_multimodal_model
:
if
prompt_type
==
"encoder"
and
model_config
.
is_multimodal_model
:
mm_registry
=
self
.
input_preprocessor
.
mm_registry
mm_processor
=
mm_registry
.
create_processor
(
model_config
,
tokenizer
=
tokenizer
,
)
assert
isinstance
(
mm_processor
,
EncDecMultiModalProcessor
)
if
mm_processor
.
pad_dummy_encoder_prompt
:
return
# Skip encoder length check for Whisper
if
model_config
.
is_multimodal_model
:
suggestion
=
(
suggestion
=
(
"Make sure that `max_model_len` is no smaller than the "
"Make sure that `max_model_len` is no smaller than the "
"number of text tokens plus multimodal tokens. For image "
"number of text tokens plus multimodal tokens. For image "
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment