Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
61de3ef7
Unverified
Commit
61de3ef7
authored
Apr 10, 2025
by
Ye (Charlotte) Qi
Committed by
GitHub
Apr 10, 2025
Browse files
[Model] Remove image mm limit for LLaMa4 (#16365)
Signed-off-by:
Ye (Charlotte) Qi
<
yeq@meta.com
>
parent
ec1f9c8c
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
26 additions
and
7 deletions
+26
-7
examples/offline_inference/vision_language_multi_image.py
examples/offline_inference/vision_language_multi_image.py
+23
-6
vllm/model_executor/models/mllama4.py
vllm/model_executor/models/mllama4.py
+3
-1
No files found.
examples/offline_inference/vision_language_multi_image.py
View file @
61de3ef7
...
...
@@ -22,6 +22,16 @@ QUESTION = "What is the content of each image?"
IMAGE_URLS
=
[
"https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg"
,
"https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg"
,
"https://upload.wikimedia.org/wikipedia/commons/2/26/Ultramarine_Flycatcher_%28Ficedula_superciliaris%29_Naggar%2C_Himachal_Pradesh%2C_2013_%28cropped%29.JPG"
,
"https://upload.wikimedia.org/wikipedia/commons/thumb/e/e5/Anim1754_-_Flickr_-_NOAA_Photo_Library_%281%29.jpg/2560px-Anim1754_-_Flickr_-_NOAA_Photo_Library_%281%29.jpg"
,
"https://upload.wikimedia.org/wikipedia/commons/d/d4/Starfish%2C_Caswell_Bay_-_geograph.org.uk_-_409413.jpg"
,
"https://upload.wikimedia.org/wikipedia/commons/6/69/Grapevinesnail_01.jpg"
,
"https://upload.wikimedia.org/wikipedia/commons/thumb/0/0b/Texas_invasive_Musk_Thistle_1.jpg/1920px-Texas_invasive_Musk_Thistle_1.jpg"
,
"https://upload.wikimedia.org/wikipedia/commons/thumb/7/7a/Huskiesatrest.jpg/2880px-Huskiesatrest.jpg"
,
"https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Orange_tabby_cat_sitting_on_fallen_leaves-Hisashi-01A.jpg/1920px-Orange_tabby_cat_sitting_on_fallen_leaves-Hisashi-01A.jpg"
,
"https://upload.wikimedia.org/wikipedia/commons/3/30/George_the_amazing_guinea_pig.jpg"
,
"https://upload.wikimedia.org/wikipedia/commons/thumb/1/1f/Oryctolagus_cuniculus_Rcdo.jpg/1920px-Oryctolagus_cuniculus_Rcdo.jpg"
,
"https://upload.wikimedia.org/wikipedia/commons/9/98/Horse-and-pony.jpg"
,
]
...
...
@@ -285,8 +295,7 @@ def load_llama4(question: str, image_urls: list[str]) -> ModelRequestData:
engine_args
=
EngineArgs
(
model
=
model_name
,
max_model_len
=
8192
,
max_num_seqs
=
4
,
max_model_len
=
131072
,
tensor_parallel_size
=
8
,
limit_mm_per_prompt
=
{
"image"
:
len
(
image_urls
)},
)
...
...
@@ -660,7 +669,7 @@ def run_generate(model, question: str, image_urls: list[str],
llm
.
llm_engine
.
add_lora
(
lora_request
=
lora_request
)
sampling_params
=
SamplingParams
(
temperature
=
0.0
,
max_tokens
=
128
,
max_tokens
=
256
,
stop_token_ids
=
req_data
.
stop_token_ids
)
outputs
=
llm
.
generate
(
...
...
@@ -694,7 +703,7 @@ def run_chat(model: str, question: str, image_urls: list[str],
llm
.
llm_engine
.
add_lora
(
lora_request
=
lora_request
)
sampling_params
=
SamplingParams
(
temperature
=
0.0
,
max_tokens
=
128
,
max_tokens
=
256
,
stop_token_ids
=
req_data
.
stop_token_ids
)
outputs
=
llm
.
chat
(
[{
...
...
@@ -729,10 +738,12 @@ def main(args: Namespace):
method
=
args
.
method
seed
=
args
.
seed
image_urls
=
IMAGE_URLS
[:
args
.
num_images
]
if
method
==
"generate"
:
run_generate
(
model
,
QUESTION
,
IMAGE_URLS
,
seed
)
run_generate
(
model
,
QUESTION
,
image_urls
,
seed
)
elif
method
==
"chat"
:
run_chat
(
model
,
QUESTION
,
IMAGE_URLS
,
seed
)
run_chat
(
model
,
QUESTION
,
image_urls
,
seed
)
else
:
raise
ValueError
(
f
"Invalid method:
{
method
}
"
)
...
...
@@ -757,6 +768,12 @@ if __name__ == "__main__":
type
=
int
,
default
=
None
,
help
=
"Set the seed when initializing `vllm.LLM`."
)
parser
.
add_argument
(
"--num-images"
,
"-n"
,
choices
=
list
(
range
(
1
,
13
)),
# 12 is the max number of images
default
=
2
,
help
=
"Number of images to use for the demo."
)
args
=
parser
.
parse_args
()
main
(
args
)
vllm/model_executor/models/mllama4.py
View file @
61de3ef7
...
...
@@ -477,7 +477,9 @@ class Mllama4ProcessingInfo(BaseProcessingInfo):
**
kwargs
)
def
get_supported_mm_limits
(
self
)
->
Mapping
[
str
,
Optional
[
int
]]:
return
{
"image"
:
10
}
# Although vLLM can support more images from an infra capability
# perspective, we do not recommend using >10 images in practice.
return
{
"image"
:
None
}
@
staticmethod
def
get_patch_per_chunk
(
vision_config
:
Llama4VisionConfig
)
->
int
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment