Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f0fe4fe8
Unverified
Commit
f0fe4fe8
authored
Oct 14, 2024
by
Xiang Xu
Committed by
GitHub
Oct 14, 2024
Browse files
[Model] Make llama3.2 support multiple and interleaved images (#9095)
parent
4d31cd42
Changes
3
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
384 additions
and
42 deletions
+384
-42
examples/offline_inference_vision_language_multi_image.py
examples/offline_inference_vision_language_multi_image.py
+23
-0
tests/models/encoder_decoder/vision_language/test_mllama.py
tests/models/encoder_decoder/vision_language/test_mllama.py
+82
-3
vllm/model_executor/models/mllama.py
vllm/model_executor/models/mllama.py
+279
-39
No files found.
examples/offline_inference_vision_language_multi_image.py
View file @
f0fe4fe8
...
...
@@ -234,12 +234,35 @@ def load_qwen2_vl(question, image_urls: List[str]) -> ModelRequestData:
)
def
load_mllama
(
question
,
image_urls
:
List
[
str
])
->
ModelRequestData
:
model_name
=
"meta-llama/Llama-3.2-11B-Vision-Instruct"
# The configuration below has been confirmed to launch on a single L40 GPU.
llm
=
LLM
(
model
=
model_name
,
max_model_len
=
4096
,
max_num_seqs
=
16
,
enforce_eager
=
True
,
limit_mm_per_prompt
=
{
"image"
:
len
(
image_urls
)},
)
prompt
=
f
"<|image|><|image|><|begin_of_text|>
{
question
}
"
return
ModelRequestData
(
llm
=
llm
,
prompt
=
prompt
,
stop_token_ids
=
None
,
image_data
=
[
fetch_image
(
url
)
for
url
in
image_urls
],
chat_template
=
None
,
)
model_example_map
=
{
"phi3_v"
:
load_phi3v
,
"internvl_chat"
:
load_internvl
,
"NVLM_D"
:
load_nvlm_d
,
"qwen2_vl"
:
load_qwen2_vl
,
"qwen_vl_chat"
:
load_qwenvl_chat
,
"mllama"
:
load_mllama
,
}
...
...
tests/models/encoder_decoder/vision_language/test_mllama.py
View file @
f0fe4fe8
...
...
@@ -12,7 +12,7 @@ from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
from
....utils
import
large_gpu_test
from
...utils
import
check_logprobs_close
_LIMIT_IMAGE_PER_PROMPT
=
1
_LIMIT_IMAGE_PER_PROMPT
=
3
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
"stop_sign"
:
...
...
@@ -244,8 +244,9 @@ def _run_test(
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"bfloat16"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
def
test_models
(
hf_runner
,
vllm_runner
,
image_assets
,
model
,
sizes
,
dtype
,
max_tokens
,
num_logprobs
)
->
None
:
def
test_models_single_leading_image
(
hf_runner
,
vllm_runner
,
image_assets
,
model
,
sizes
,
dtype
,
max_tokens
,
num_logprobs
)
->
None
:
run_test
(
hf_runner
,
vllm_runner
,
...
...
@@ -257,3 +258,81 @@ def test_models(hf_runner, vllm_runner, image_assets, model, sizes, dtype,
num_logprobs
=
num_logprobs
,
tensor_parallel_size
=
1
,
)
@
large_gpu_test
(
min_gb
=
48
)
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"bfloat16"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
def
test_models_multi_leading_images
(
hf_runner
,
vllm_runner
,
image_assets
,
model
,
dtype
,
max_tokens
,
num_logprobs
)
->
None
:
stop_sign
=
image_assets
[
0
].
pil_image
cherry_blossom
=
image_assets
[
1
].
pil_image
inputs
=
[(
[
"<|image|><|image|><|begin_of_text|>Describe 2 images."
,
# noqa: E501
"<|image|><|image|><|begin_of_text|>Describe 2 images."
,
# noqa: E501
"<|image|><|image|><|image|><|begin_of_text|>Describe 3 images."
,
# noqa: E501
],
[
[
stop_sign
,
cherry_blossom
],
# Images with different sizes.
[
stop_sign
.
resize
((
512
,
512
)),
stop_sign
,
],
[
stop_sign
,
stop_sign
.
resize
((
512
,
1536
)),
cherry_blossom
.
resize
((
512
,
1024
)),
],
])]
_run_test
(
hf_runner
,
vllm_runner
,
inputs
,
model
,
dtype
=
dtype
,
max_tokens
=
max_tokens
,
num_logprobs
=
num_logprobs
,
tensor_parallel_size
=
1
,
)
@
large_gpu_test
(
min_gb
=
48
)
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"bfloat16"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
def
test_models_interleaved_images
(
hf_runner
,
vllm_runner
,
image_assets
,
model
,
dtype
,
max_tokens
,
num_logprobs
)
->
None
:
stop_sign
=
image_assets
[
0
].
pil_image
cherry_blossom
=
image_assets
[
1
].
pil_image
inputs
=
[(
[
"<|begin_of_text|>The content of the image <|image|> is"
,
# noqa: E501
"<|begin_of_text|>Between the first image <|image|> and the second image<|image|>, "
# noqa: E501
"which is a stop sign and which is a cherry blossom?"
,
# noqa: E501
],
[
[
stop_sign
],
[
stop_sign
,
cherry_blossom
],
])]
_run_test
(
hf_runner
,
vllm_runner
,
inputs
,
model
,
dtype
=
dtype
,
max_tokens
=
max_tokens
,
num_logprobs
=
num_logprobs
,
tensor_parallel_size
=
1
,
)
vllm/model_executor/models/mllama.py
View file @
f0fe4fe8
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment