Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4abed65c
Unverified
Commit
4abed65c
authored
Aug 30, 2024
by
Cyrus Leung
Committed by
GitHub
Aug 29, 2024
Browse files
[VLM] Disallow overflowing `max_model_len` for multimodal models (#7998)
parent
0c785d34
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
35 additions
and
3 deletions
+35
-3
tests/models/test_llava.py
tests/models/test_llava.py
+17
-0
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+18
-3
No files found.
tests/models/test_llava.py
View file @
4abed65c
...
...
@@ -179,3 +179,20 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
num_logprobs
=
num_logprobs
,
tensor_parallel_size
=
1
,
)
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
def
test_context_length_too_short
(
vllm_runner
,
image_assets
,
model
):
images
=
[
asset
.
pil_image
for
asset
in
image_assets
]
with
pytest
.
raises
(
ValueError
,
match
=
"too long to fit into the model"
):
vllm_model
=
vllm_runner
(
model
,
max_model_len
=
128
,
# LLaVA has a feature size of 576
enforce_eager
=
True
,
)
with
vllm_model
:
vllm_model
.
generate_greedy
([
HF_IMAGE_PROMPTS
[
0
]],
max_tokens
=
1
,
images
=
[
images
[
0
]])
vllm/engine/llm_engine.py
View file @
4abed65c
...
...
@@ -2010,7 +2010,22 @@ class LLMEngine:
def
_validate_model_inputs
(
self
,
inputs
:
Union
[
LLMInputs
,
EncoderDecoderLLMInputs
]):
prompt_key
=
"encoder_prompt_token_ids"
\
if
self
.
is_encoder_decoder_model
()
else
"prompt_token_ids"
if
not
inputs
.
get
(
prompt_key
):
if
self
.
is_encoder_decoder_model
():
prompt_ids
=
inputs
.
get
(
"encoder_prompt_token_ids"
)
else
:
prompt_ids
=
inputs
.
get
(
"prompt_token_ids"
)
if
prompt_ids
is
None
or
len
(
prompt_ids
)
==
0
:
raise
ValueError
(
"Prompt cannot be empty"
)
if
self
.
model_config
.
multimodal_config
is
not
None
:
max_prompt_len
=
self
.
model_config
.
max_model_len
if
len
(
prompt_ids
)
>
max_prompt_len
:
raise
ValueError
(
f
"The prompt (total length
{
len
(
prompt_ids
)
}
) is too long "
f
"to fit into the model (context length
{
max_prompt_len
}
). "
"Make sure that `max_model_len` is no smaller than the "
"number of text tokens plus multimodal tokens. For image "
"inputs, the number of image tokens depends on the number "
"of images, and possibly their aspect ratios as well."
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment