Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
aed84686
Unverified
Commit
aed84686
authored
Jun 17, 2025
by
Isotr0py
Committed by
GitHub
Jun 17, 2025
Browse files
[Doc] Add missing llava family multi-image examples (#19698)
Signed-off-by:
Isotr0py
<
2037008807@qq.com
>
parent
5c76b9cd
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
103 additions
and
0 deletions
+103
-0
examples/offline_inference/vision_language_multi_image.py
examples/offline_inference/vision_language_multi_image.py
+103
-0
No files found.
examples/offline_inference/vision_language_multi_image.py
View file @
aed84686
...
@@ -289,6 +289,106 @@ def load_internvl(question: str, image_urls: list[str]) -> ModelRequestData:
...
@@ -289,6 +289,106 @@ def load_internvl(question: str, image_urls: list[str]) -> ModelRequestData:
)
)
def
load_llava
(
question
:
str
,
image_urls
:
list
[
str
])
->
ModelRequestData
:
# NOTE: CAUTION! Original Llava models wasn't really trained on multi-image inputs,
# it will generate poor response for multi-image inputs!
model_name
=
"llava-hf/llava-1.5-7b-hf"
engine_args
=
EngineArgs
(
model
=
model_name
,
max_num_seqs
=
16
,
limit_mm_per_prompt
=
{
"image"
:
len
(
image_urls
)},
)
placeholders
=
[{
"type"
:
"image"
,
"image"
:
url
}
for
url
in
image_urls
]
messages
=
[
{
"role"
:
"user"
,
"content"
:
[
*
placeholders
,
{
"type"
:
"text"
,
"text"
:
question
},
],
}
]
processor
=
AutoProcessor
.
from_pretrained
(
model_name
)
prompt
=
processor
.
apply_chat_template
(
messages
,
tokenize
=
False
,
add_generation_prompt
=
True
)
return
ModelRequestData
(
engine_args
=
engine_args
,
prompt
=
prompt
,
image_data
=
[
fetch_image
(
url
)
for
url
in
image_urls
],
)
def
load_llava_next
(
question
:
str
,
image_urls
:
list
[
str
])
->
ModelRequestData
:
model_name
=
"llava-hf/llava-v1.6-mistral-7b-hf"
engine_args
=
EngineArgs
(
model
=
model_name
,
max_model_len
=
8192
,
max_num_seqs
=
16
,
limit_mm_per_prompt
=
{
"image"
:
len
(
image_urls
)},
)
placeholders
=
[{
"type"
:
"image"
,
"image"
:
url
}
for
url
in
image_urls
]
messages
=
[
{
"role"
:
"user"
,
"content"
:
[
*
placeholders
,
{
"type"
:
"text"
,
"text"
:
question
},
],
}
]
processor
=
AutoProcessor
.
from_pretrained
(
model_name
)
prompt
=
processor
.
apply_chat_template
(
messages
,
tokenize
=
False
,
add_generation_prompt
=
True
)
return
ModelRequestData
(
engine_args
=
engine_args
,
prompt
=
prompt
,
image_data
=
[
fetch_image
(
url
)
for
url
in
image_urls
],
)
def
load_llava_onevision
(
question
:
str
,
image_urls
:
list
[
str
])
->
ModelRequestData
:
model_name
=
"llava-hf/llava-onevision-qwen2-7b-ov-hf"
engine_args
=
EngineArgs
(
model
=
model_name
,
max_model_len
=
16384
,
max_num_seqs
=
16
,
limit_mm_per_prompt
=
{
"image"
:
len
(
image_urls
)},
)
placeholders
=
[{
"type"
:
"image"
,
"image"
:
url
}
for
url
in
image_urls
]
messages
=
[
{
"role"
:
"user"
,
"content"
:
[
*
placeholders
,
{
"type"
:
"text"
,
"text"
:
question
},
],
}
]
processor
=
AutoProcessor
.
from_pretrained
(
model_name
)
prompt
=
processor
.
apply_chat_template
(
messages
,
tokenize
=
False
,
add_generation_prompt
=
True
)
return
ModelRequestData
(
engine_args
=
engine_args
,
prompt
=
prompt
,
image_data
=
[
fetch_image
(
url
)
for
url
in
image_urls
],
)
def
load_llama4
(
question
:
str
,
image_urls
:
list
[
str
])
->
ModelRequestData
:
def
load_llama4
(
question
:
str
,
image_urls
:
list
[
str
])
->
ModelRequestData
:
model_name
=
"meta-llama/Llama-4-Scout-17B-16E-Instruct"
model_name
=
"meta-llama/Llama-4-Scout-17B-16E-Instruct"
...
@@ -737,6 +837,9 @@ model_example_map = {
...
@@ -737,6 +837,9 @@ model_example_map = {
"idefics3"
:
load_idefics3
,
"idefics3"
:
load_idefics3
,
"internvl_chat"
:
load_internvl
,
"internvl_chat"
:
load_internvl
,
"kimi_vl"
:
load_kimi_vl
,
"kimi_vl"
:
load_kimi_vl
,
"llava"
:
load_llava
,
"llava-next"
:
load_llava_next
,
"llava-onevision"
:
load_llava_onevision
,
"llama4"
:
load_llama4
,
"llama4"
:
load_llama4
,
"mistral3"
:
load_mistral3
,
"mistral3"
:
load_mistral3
,
"mllama"
:
load_mllama
,
"mllama"
:
load_mllama
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment