Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d7543862
Unverified
Commit
d7543862
authored
May 02, 2025
by
Cyrus Leung
Committed by
GitHub
May 02, 2025
Browse files
[Misc] Rename assets for testing (#17575)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
c777df79
Changes
28
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
107 additions
and
112 deletions
+107
-112
examples/offline_inference/qwen2_5_omni/only_thinker.py
examples/offline_inference/qwen2_5_omni/only_thinker.py
+2
-2
examples/offline_inference/vision_language.py
examples/offline_inference/vision_language.py
+1
-1
tests/conftest.py
tests/conftest.py
+21
-35
tests/models/multimodal/generation/test_common.py
tests/models/multimodal/generation/test_common.py
+11
-10
tests/models/multimodal/generation/test_florence2.py
tests/models/multimodal/generation/test_florence2.py
+2
-2
tests/models/multimodal/generation/test_granite_speech.py
tests/models/multimodal/generation/test_granite_speech.py
+5
-4
tests/models/multimodal/generation/test_interleaved.py
tests/models/multimodal/generation/test_interleaved.py
+1
-1
tests/models/multimodal/generation/test_mllama.py
tests/models/multimodal/generation/test_mllama.py
+8
-8
tests/models/multimodal/generation/test_qwen2_vl.py
tests/models/multimodal/generation/test_qwen2_vl.py
+1
-1
tests/models/multimodal/generation/test_ultravox.py
tests/models/multimodal/generation/test_ultravox.py
+25
-19
tests/models/multimodal/generation/vlm_utils/builders.py
tests/models/multimodal/generation/vlm_utils/builders.py
+5
-5
tests/models/multimodal/generation/vlm_utils/model_utils.py
tests/models/multimodal/generation/vlm_utils/model_utils.py
+4
-4
tests/models/multimodal/generation/vlm_utils/runners.py
tests/models/multimodal/generation/vlm_utils/runners.py
+6
-5
tests/models/multimodal/generation/vlm_utils/types.py
tests/models/multimodal/generation/vlm_utils/types.py
+3
-3
tests/models/multimodal/pooling/test_intern_vit.py
tests/models/multimodal/pooling/test_intern_vit.py
+2
-2
tests/models/multimodal/processing/test_h2ovl.py
tests/models/multimodal/processing/test_h2ovl.py
+2
-2
tests/models/multimodal/processing/test_idefics3.py
tests/models/multimodal/processing/test_idefics3.py
+2
-2
tests/models/multimodal/processing/test_internvl.py
tests/models/multimodal/processing/test_internvl.py
+2
-2
tests/models/multimodal/processing/test_llama4.py
tests/models/multimodal/processing/test_llama4.py
+2
-2
tests/models/multimodal/processing/test_minimax_vl_01.py
tests/models/multimodal/processing/test_minimax_vl_01.py
+2
-2
No files found.
examples/offline_inference/qwen2_5_omni/only_thinker.py
View file @
d7543862
...
@@ -47,7 +47,7 @@ def get_mixed_modalities_query() -> QueryResult:
...
@@ -47,7 +47,7 @@ def get_mixed_modalities_query() -> QueryResult:
"image"
:
"image"
:
ImageAsset
(
"cherry_blossom"
).
pil_image
.
convert
(
"RGB"
),
ImageAsset
(
"cherry_blossom"
).
pil_image
.
convert
(
"RGB"
),
"video"
:
"video"
:
VideoAsset
(
name
=
"
sample_demo_1
"
,
num_frames
=
16
).
np_ndarrays
,
VideoAsset
(
name
=
"
baby_reading
"
,
num_frames
=
16
).
np_ndarrays
,
},
},
},
},
limit_mm_per_prompt
=
{
limit_mm_per_prompt
=
{
...
@@ -65,7 +65,7 @@ def get_use_audio_in_video_query() -> QueryResult:
...
@@ -65,7 +65,7 @@ def get_use_audio_in_video_query() -> QueryResult:
"<|im_start|>user
\n
<|vision_bos|><|VIDEO|><|vision_eos|>"
"<|im_start|>user
\n
<|vision_bos|><|VIDEO|><|vision_eos|>"
f
"
{
question
}
<|im_end|>
\n
"
f
"
{
question
}
<|im_end|>
\n
"
f
"<|im_start|>assistant
\n
"
)
f
"<|im_start|>assistant
\n
"
)
asset
=
VideoAsset
(
name
=
"
sample_demo_1
"
,
num_frames
=
16
)
asset
=
VideoAsset
(
name
=
"
baby_reading
"
,
num_frames
=
16
)
audio
=
asset
.
get_audio
(
sampling_rate
=
16000
)
audio
=
asset
.
get_audio
(
sampling_rate
=
16000
)
assert
not
envs
.
VLLM_USE_V1
,
(
"V1 does not support use_audio_in_video. "
assert
not
envs
.
VLLM_USE_V1
,
(
"V1 does not support use_audio_in_video. "
"Please launch this example with "
"Please launch this example with "
...
...
examples/offline_inference/vision_language.py
View file @
d7543862
...
@@ -1109,7 +1109,7 @@ def get_multi_modal_input(args):
...
@@ -1109,7 +1109,7 @@ def get_multi_modal_input(args):
if
args
.
modality
==
"video"
:
if
args
.
modality
==
"video"
:
# Input video and question
# Input video and question
video
=
VideoAsset
(
name
=
"
sample_demo_1
"
,
video
=
VideoAsset
(
name
=
"
baby_reading
"
,
num_frames
=
args
.
num_frames
).
np_ndarrays
num_frames
=
args
.
num_frames
).
np_ndarrays
vid_questions
=
[
"Why is this video funny?"
]
vid_questions
=
[
"Why is this video funny?"
]
...
...
tests/conftest.py
View file @
d7543862
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
json
import
json
import
os
import
os
import
tempfile
import
tempfile
from
collections
import
UserList
from
enum
import
Enum
from
enum
import
Enum
from
typing
import
Any
,
Callable
,
Optional
,
TypedDict
,
TypeVar
,
Union
from
typing
import
Any
,
Callable
,
Optional
,
TypedDict
,
TypeVar
,
Union
...
@@ -58,16 +56,12 @@ def _read_prompts(filename: str) -> list[str]:
...
@@ -58,16 +56,12 @@ def _read_prompts(filename: str) -> list[str]:
return
prompts
return
prompts
class
_
ImageAssetPrompts
(
TypedDict
):
class
ImageAssetPrompts
(
TypedDict
):
stop_sign
:
str
stop_sign
:
str
cherry_blossom
:
str
cherry_blossom
:
str
class
_ImageAssetsBase
(
UserList
[
ImageAsset
]):
class
ImageTestAssets
(
list
[
ImageAsset
]):
pass
class
_ImageAssets
(
_ImageAssetsBase
):
def
__init__
(
self
)
->
None
:
def
__init__
(
self
)
->
None
:
super
().
__init__
([
super
().
__init__
([
...
@@ -75,7 +69,7 @@ class _ImageAssets(_ImageAssetsBase):
...
@@ -75,7 +69,7 @@ class _ImageAssets(_ImageAssetsBase):
ImageAsset
(
"cherry_blossom"
),
ImageAsset
(
"cherry_blossom"
),
])
])
def
prompts
(
self
,
prompts
:
_
ImageAssetPrompts
)
->
list
[
str
]:
def
prompts
(
self
,
prompts
:
ImageAssetPrompts
)
->
list
[
str
]:
"""
"""
Convenience method to define the prompt for each test image.
Convenience method to define the prompt for each test image.
...
@@ -85,35 +79,27 @@ class _ImageAssets(_ImageAssetsBase):
...
@@ -85,35 +79,27 @@ class _ImageAssets(_ImageAssetsBase):
return
[
prompts
[
"stop_sign"
],
prompts
[
"cherry_blossom"
]]
return
[
prompts
[
"stop_sign"
],
prompts
[
"cherry_blossom"
]]
class
_VideoAssetPrompts
(
TypedDict
):
class
VideoAssetPrompts
(
TypedDict
):
sample_demo_1
:
str
baby_reading
:
str
class
_VideoAssetsBase
(
UserList
[
VideoAsset
]):
pass
class
VideoTestAssets
(
list
[
VideoAsset
]):
class
_VideoAssets
(
_VideoAssetsBase
):
def
__init__
(
self
)
->
None
:
def
__init__
(
self
)
->
None
:
super
().
__init__
([
super
().
__init__
([
VideoAsset
(
"
sample_demo_1
"
),
VideoAsset
(
"
baby_reading
"
),
])
])
def
prompts
(
self
,
prompts
:
_
VideoAssetPrompts
)
->
list
[
str
]:
def
prompts
(
self
,
prompts
:
VideoAssetPrompts
)
->
list
[
str
]:
return
[
prompts
[
"
sample_demo_1
"
]]
return
[
prompts
[
"
baby_reading
"
]]
class
_
AudioAssetPrompts
(
TypedDict
):
class
AudioAssetPrompts
(
TypedDict
):
mary_had_lamb
:
str
mary_had_lamb
:
str
winning_call
:
str
winning_call
:
str
class
_AudioAssetsBase
(
UserList
[
AudioAsset
]):
class
AudioTestAssets
(
list
[
AudioAsset
]):
pass
class
_AudioAssets
(
_AudioAssetsBase
):
def
__init__
(
self
)
->
None
:
def
__init__
(
self
)
->
None
:
super
().
__init__
([
super
().
__init__
([
...
@@ -121,16 +107,16 @@ class _AudioAssets(_AudioAssetsBase):
...
@@ -121,16 +107,16 @@ class _AudioAssets(_AudioAssetsBase):
AudioAsset
(
"winning_call"
),
AudioAsset
(
"winning_call"
),
])
])
def
prompts
(
self
,
prompts
:
_
AudioAssetPrompts
)
->
list
[
str
]:
def
prompts
(
self
,
prompts
:
AudioAssetPrompts
)
->
list
[
str
]:
return
[
prompts
[
"mary_had_lamb"
],
prompts
[
"winning_call"
]]
return
[
prompts
[
"mary_had_lamb"
],
prompts
[
"winning_call"
]]
IMAGE_ASSETS
=
_
ImageAssets
()
IMAGE_ASSETS
=
Image
Test
Assets
()
"""Singleton instance of :class:`
_
ImageAssets`."""
"""Singleton instance of :class:`Image
Test
Assets`."""
VIDEO_ASSETS
=
_
VideoAssets
()
VIDEO_ASSETS
=
Video
Test
Assets
()
"""Singleton instance of :class:`
_
VideoAssets`."""
"""Singleton instance of :class:`Video
Test
Assets`."""
AUDIO_ASSETS
=
_
AudioAssets
()
AUDIO_ASSETS
=
Audio
Test
Assets
()
"""Singleton instance of :class:`
_
AudioAssets`."""
"""Singleton instance of :class:`Audio
Test
Assets`."""
@
pytest
.
fixture
(
scope
=
"function"
,
autouse
=
True
)
@
pytest
.
fixture
(
scope
=
"function"
,
autouse
=
True
)
...
@@ -278,17 +264,17 @@ def example_long_prompts() -> list[str]:
...
@@ -278,17 +264,17 @@ def example_long_prompts() -> list[str]:
@
pytest
.
fixture
(
scope
=
"session"
)
@
pytest
.
fixture
(
scope
=
"session"
)
def
image_assets
()
->
_
ImageAssets
:
def
image_assets
()
->
Image
Test
Assets
:
return
IMAGE_ASSETS
return
IMAGE_ASSETS
@
pytest
.
fixture
(
scope
=
"session"
)
@
pytest
.
fixture
(
scope
=
"session"
)
def
video_assets
()
->
_
VideoAssets
:
def
video_assets
()
->
Video
Test
Assets
:
return
VIDEO_ASSETS
return
VIDEO_ASSETS
@
pytest
.
fixture
(
scope
=
"session"
)
@
pytest
.
fixture
(
scope
=
"session"
)
def
audio_assets
()
->
_
AudioAssets
:
def
audio_assets
()
->
Audio
Test
Assets
:
return
AUDIO_ASSETS
return
AUDIO_ASSETS
...
...
tests/models/multimodal/generation/test_common.py
View file @
d7543862
...
@@ -13,8 +13,8 @@ from transformers import AutoModelForImageTextToText, AutoModelForVision2Seq
...
@@ -13,8 +13,8 @@ from transformers import AutoModelForImageTextToText, AutoModelForVision2Seq
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.utils
import
identity
from
vllm.utils
import
identity
from
....conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_
ImageAssets
,
from
....conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
Image
Test
Assets
,
_
VideoAssets
)
Video
Test
Assets
,
VllmRunner
)
from
....utils
import
(
create_new_process_for_each_test
,
large_gpu_mark
,
from
....utils
import
(
create_new_process_for_each_test
,
large_gpu_mark
,
multi_gpu_marks
)
multi_gpu_marks
)
from
...utils
import
check_outputs_equal
from
...utils
import
check_outputs_equal
...
@@ -691,7 +691,7 @@ def test_single_image_models(tmp_path: PosixPath, model_type: str,
...
@@ -691,7 +691,7 @@ def test_single_image_models(tmp_path: PosixPath, model_type: str,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
type
[
HfRunner
],
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
image_assets
:
_
ImageAssets
,
monkeypatch
):
image_assets
:
Image
Test
Assets
,
monkeypatch
):
if
model_type
in
REQUIRES_V0_MODELS
:
if
model_type
in
REQUIRES_V0_MODELS
:
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
...
@@ -716,7 +716,7 @@ def test_multi_image_models(tmp_path: PosixPath, model_type: str,
...
@@ -716,7 +716,7 @@ def test_multi_image_models(tmp_path: PosixPath, model_type: str,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
type
[
HfRunner
],
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
image_assets
:
_
ImageAssets
,
monkeypatch
):
image_assets
:
Image
Test
Assets
,
monkeypatch
):
if
model_type
in
REQUIRES_V0_MODELS
:
if
model_type
in
REQUIRES_V0_MODELS
:
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
...
@@ -741,7 +741,7 @@ def test_image_embedding_models(model_type: str,
...
@@ -741,7 +741,7 @@ def test_image_embedding_models(model_type: str,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
type
[
HfRunner
],
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
image_assets
:
_
ImageAssets
,
monkeypatch
):
image_assets
:
Image
Test
Assets
,
monkeypatch
):
if
model_type
in
REQUIRES_V0_MODELS
:
if
model_type
in
REQUIRES_V0_MODELS
:
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
...
@@ -763,7 +763,7 @@ def test_image_embedding_models(model_type: str,
...
@@ -763,7 +763,7 @@ def test_image_embedding_models(model_type: str,
))
))
def
test_video_models
(
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
def
test_video_models
(
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
video_assets
:
_
VideoAssets
,
monkeypatch
):
video_assets
:
Video
Test
Assets
,
monkeypatch
):
if
model_type
in
REQUIRES_V0_MODELS
:
if
model_type
in
REQUIRES_V0_MODELS
:
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
...
@@ -814,7 +814,7 @@ def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
...
@@ -814,7 +814,7 @@ def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
type
[
HfRunner
],
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
image_assets
:
_
ImageAssets
,
monkeypatch
):
image_assets
:
Image
Test
Assets
,
monkeypatch
):
if
model_type
in
REQUIRES_V0_MODELS
:
if
model_type
in
REQUIRES_V0_MODELS
:
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
...
@@ -840,7 +840,7 @@ def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
...
@@ -840,7 +840,7 @@ def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
type
[
HfRunner
],
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
image_assets
:
_
ImageAssets
,
monkeypatch
):
image_assets
:
Image
Test
Assets
,
monkeypatch
):
if
model_type
in
REQUIRES_V0_MODELS
:
if
model_type
in
REQUIRES_V0_MODELS
:
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
...
@@ -866,7 +866,8 @@ def test_image_embedding_models_heavy(model_type: str,
...
@@ -866,7 +866,8 @@ def test_image_embedding_models_heavy(model_type: str,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
type
[
HfRunner
],
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
image_assets
:
_ImageAssets
,
monkeypatch
):
image_assets
:
ImageTestAssets
,
monkeypatch
):
if
model_type
in
REQUIRES_V0_MODELS
:
if
model_type
in
REQUIRES_V0_MODELS
:
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
...
@@ -889,7 +890,7 @@ def test_image_embedding_models_heavy(model_type: str,
...
@@ -889,7 +890,7 @@ def test_image_embedding_models_heavy(model_type: str,
def
test_video_models_heavy
(
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
def
test_video_models_heavy
(
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
type
[
HfRunner
],
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
video_assets
:
_
VideoAssets
,
monkeypatch
):
video_assets
:
Video
Test
Assets
,
monkeypatch
):
if
model_type
in
REQUIRES_V0_MODELS
:
if
model_type
in
REQUIRES_V0_MODELS
:
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
...
...
tests/models/multimodal/generation/test_florence2.py
View file @
d7543862
...
@@ -9,7 +9,7 @@ from vllm.inputs.data import ExplicitEncoderDecoderPrompt, TextPrompt
...
@@ -9,7 +9,7 @@ from vllm.inputs.data import ExplicitEncoderDecoderPrompt, TextPrompt
from
vllm.multimodal.image
import
rescale_image_size
from
vllm.multimodal.image
import
rescale_image_size
from
vllm.sequence
import
SampleLogprobs
from
vllm.sequence
import
SampleLogprobs
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_
ImageAssets
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
Image
Test
Assets
,
VllmRunner
from
...utils
import
check_logprobs_close
from
...utils
import
check_logprobs_close
MODELS
=
[
"microsoft/Florence-2-base"
]
MODELS
=
[
"microsoft/Florence-2-base"
]
...
@@ -118,7 +118,7 @@ def run_test(
...
@@ -118,7 +118,7 @@ def run_test(
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
64
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
64
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
def
test_models
(
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
def
test_models
(
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
image_assets
:
_
ImageAssets
,
model
:
str
,
image_assets
:
Image
Test
Assets
,
model
:
str
,
size_factors
:
list
[
int
],
dtype
:
str
,
max_tokens
:
int
,
size_factors
:
list
[
int
],
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
)
->
None
:
num_logprobs
:
int
)
->
None
:
images
=
[
asset
.
pil_image
for
asset
in
image_assets
]
images
=
[
asset
.
pil_image
for
asset
in
image_assets
]
...
...
tests/models/multimodal/generation/test_granite_speech.py
View file @
d7543862
...
@@ -9,7 +9,8 @@ from transformers import AutoModelForSpeechSeq2Seq
...
@@ -9,7 +9,8 @@ from transformers import AutoModelForSpeechSeq2Seq
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.sequence
import
SampleLogprobs
from
vllm.sequence
import
SampleLogprobs
from
....conftest
import
HfRunner
,
PromptAudioInput
,
VllmRunner
,
_AudioAssets
from
....conftest
import
(
AudioTestAssets
,
HfRunner
,
PromptAudioInput
,
VllmRunner
)
from
...registry
import
HF_EXAMPLE_MODELS
from
...registry
import
HF_EXAMPLE_MODELS
from
...utils
import
check_logprobs_close
from
...utils
import
check_logprobs_close
...
@@ -116,9 +117,9 @@ def run_test(
...
@@ -116,9 +117,9 @@ def run_test(
@
pytest
.
mark
.
parametrize
(
"max_model_len"
,
[
2048
])
@
pytest
.
mark
.
parametrize
(
"max_model_len"
,
[
2048
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
10
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
10
])
def
test_models
(
hf_runner
,
vllm_runner
,
model
:
str
,
audio_assets
:
_AudioAssets
,
def
test_models
(
hf_runner
,
vllm_runner
,
model
:
str
,
dtype
:
str
,
max_model_len
:
int
,
max_tokens
:
int
,
audio_assets
:
AudioTestAssets
,
dtype
:
str
,
max_model_len
:
int
,
num_logprobs
:
int
)
->
None
:
max_tokens
:
int
,
num_logprobs
:
int
)
->
None
:
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model
)
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_info
.
check_transformers_version
(
on_fail
=
"skip"
)
model_info
.
check_transformers_version
(
on_fail
=
"skip"
)
...
...
tests/models/multimodal/generation/test_interleaved.py
View file @
d7543862
...
@@ -29,7 +29,7 @@ def test_models(vllm_runner, model, dtype: str, max_tokens: int) -> None:
...
@@ -29,7 +29,7 @@ def test_models(vllm_runner, model, dtype: str, max_tokens: int) -> None:
image_cherry
=
ImageAsset
(
"cherry_blossom"
).
pil_image
.
convert
(
"RGB"
)
image_cherry
=
ImageAsset
(
"cherry_blossom"
).
pil_image
.
convert
(
"RGB"
)
image_stop
=
ImageAsset
(
"stop_sign"
).
pil_image
.
convert
(
"RGB"
)
image_stop
=
ImageAsset
(
"stop_sign"
).
pil_image
.
convert
(
"RGB"
)
images
=
[
image_cherry
,
image_stop
]
images
=
[
image_cherry
,
image_stop
]
video
=
VideoAsset
(
name
=
"
sample_demo_1
"
,
num_frames
=
16
).
np_ndarrays
video
=
VideoAsset
(
name
=
"
baby_reading
"
,
num_frames
=
16
).
np_ndarrays
inputs
=
[
inputs
=
[
(
(
...
...
tests/models/multimodal/generation/test_mllama.py
View file @
d7543862
...
@@ -14,8 +14,8 @@ from vllm.model_executor.models.mllama import MllamaForConditionalGeneration
...
@@ -14,8 +14,8 @@ from vllm.model_executor.models.mllama import MllamaForConditionalGeneration
from
vllm.multimodal.image
import
rescale_image_size
from
vllm.multimodal.image
import
rescale_image_size
from
vllm.sequence
import
SampleLogprobs
from
vllm.sequence
import
SampleLogprobs
from
....conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
,
from
....conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
ImageTestAssets
,
_ImageAssets
)
PromptImageInput
,
VllmRunner
)
from
....quantization.utils
import
is_quant_method_supported
from
....quantization.utils
import
is_quant_method_supported
from
....utils
import
(
create_new_process_for_each_test
,
large_gpu_test
,
from
....utils
import
(
create_new_process_for_each_test
,
large_gpu_test
,
multi_gpu_test
)
multi_gpu_test
)
...
@@ -90,7 +90,7 @@ def vllm_to_hf_output(vllm_output: tuple[list[int], str,
...
@@ -90,7 +90,7 @@ def vllm_to_hf_output(vllm_output: tuple[list[int], str,
def
_get_inputs
(
def
_get_inputs
(
image_assets
:
_
ImageAssets
,
image_assets
:
Image
Test
Assets
,
*
,
*
,
size_factors
:
Optional
[
list
[
float
]]
=
None
,
size_factors
:
Optional
[
list
[
float
]]
=
None
,
sizes
:
Optional
[
list
[
tuple
[
int
,
int
]]]
=
None
,
sizes
:
Optional
[
list
[
tuple
[
int
,
int
]]]
=
None
,
...
@@ -126,7 +126,7 @@ def _get_inputs(
...
@@ -126,7 +126,7 @@ def _get_inputs(
def
run_test
(
def
run_test
(
hf_runner
:
type
[
HfRunner
],
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
image_assets
:
_
ImageAssets
,
image_assets
:
Image
Test
Assets
,
model
:
str
,
model
:
str
,
*
,
*
,
size_factors
:
list
[
float
],
size_factors
:
list
[
float
],
...
@@ -143,7 +143,7 @@ def run_test(
...
@@ -143,7 +143,7 @@ def run_test(
def
run_test
(
def
run_test
(
hf_runner
:
type
[
HfRunner
],
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
image_assets
:
_
ImageAssets
,
image_assets
:
Image
Test
Assets
,
model
:
str
,
model
:
str
,
*
,
*
,
sizes
:
list
[
tuple
[
int
,
int
]],
sizes
:
list
[
tuple
[
int
,
int
]],
...
@@ -159,7 +159,7 @@ def run_test(
...
@@ -159,7 +159,7 @@ def run_test(
def
run_test
(
def
run_test
(
hf_runner
:
type
[
HfRunner
],
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
image_assets
:
_
ImageAssets
,
image_assets
:
Image
Test
Assets
,
model
:
str
,
model
:
str
,
*
,
*
,
size_factors
:
Optional
[
list
[
float
]]
=
None
,
size_factors
:
Optional
[
list
[
float
]]
=
None
,
...
@@ -433,7 +433,7 @@ def test_models_distributed(
...
@@ -433,7 +433,7 @@ def test_models_distributed(
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"bitsandbytes"
),
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"bitsandbytes"
),
reason
=
'bitsandbytes is not supported on this GPU type.'
)
reason
=
'bitsandbytes is not supported on this GPU type.'
)
def
test_bnb_regression
(
def
test_bnb_regression
(
image_assets
:
_
ImageAssets
,
image_assets
:
Image
Test
Assets
,
model
:
str
,
model
:
str
,
dtype
:
str
,
dtype
:
str
,
max_tokens
:
int
,
max_tokens
:
int
,
...
@@ -473,7 +473,7 @@ def test_bnb_regression(
...
@@ -473,7 +473,7 @@ def test_bnb_regression(
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"bfloat16"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"bfloat16"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
32
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
32
])
def
test_explicit_implicit_prompt
(
def
test_explicit_implicit_prompt
(
image_assets
:
_
ImageAssets
,
image_assets
:
Image
Test
Assets
,
model
:
str
,
model
:
str
,
dtype
:
str
,
dtype
:
str
,
max_tokens
:
int
,
max_tokens
:
int
,
...
...
tests/models/multimodal/generation/test_qwen2_vl.py
View file @
d7543862
...
@@ -50,7 +50,7 @@ IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
...
@@ -50,7 +50,7 @@ IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
})
})
VIDEO_PROMPTS
=
VIDEO_ASSETS
.
prompts
({
VIDEO_PROMPTS
=
VIDEO_ASSETS
.
prompts
({
"
sample_demo_1
"
:
"
baby_reading
"
:
qwen2_vl_chat_template
(
qwen2_vl_chat_template
(
VIDEO_PLACEHOLDER
,
VIDEO_PLACEHOLDER
,
"Describe this video with a short sentence "
,
"Describe this video with a short sentence "
,
...
...
tests/models/multimodal/generation/test_ultravox.py
View file @
d7543862
...
@@ -11,13 +11,22 @@ from transformers import AutoModel, AutoTokenizer
...
@@ -11,13 +11,22 @@ from transformers import AutoModel, AutoTokenizer
from
vllm.multimodal.audio
import
resample_audio_librosa
from
vllm.multimodal.audio
import
resample_audio_librosa
from
vllm.sequence
import
SampleLogprobs
from
vllm.sequence
import
SampleLogprobs
from
....conftest
import
HfRunner
,
VllmRunner
,
_AudioAssets
from
....conftest
import
AUDIO_ASSETS
,
AudioTestAssets
,
HfRunner
,
VllmRunner
from
....utils
import
RemoteOpenAIServer
from
....utils
import
RemoteOpenAIServer
from
...registry
import
HF_EXAMPLE_MODELS
from
...registry
import
HF_EXAMPLE_MODELS
from
...utils
import
check_logprobs_close
from
...utils
import
check_logprobs_close
MODEL_NAME
=
"fixie-ai/ultravox-v0_5-llama-3_2-1b"
MODEL_NAME
=
"fixie-ai/ultravox-v0_5-llama-3_2-1b"
AUDIO_PROMPTS
=
AUDIO_ASSETS
.
prompts
({
"mary_had_lamb"
:
"Transcribe this into English."
,
"winning_call"
:
"What is happening in this audio clip?"
,
})
MULTI_AUDIO_PROMPT
=
"Describe each of the audios above."
AudioTuple
=
tuple
[
np
.
ndarray
,
int
]
AudioTuple
=
tuple
[
np
.
ndarray
,
int
]
VLLM_PLACEHOLDER
=
"<|audio|>"
VLLM_PLACEHOLDER
=
"<|audio|>"
...
@@ -31,12 +40,6 @@ CHUNKED_PREFILL_KWARGS = {
...
@@ -31,12 +40,6 @@ CHUNKED_PREFILL_KWARGS = {
}
}
@
pytest
.
fixture
(
scope
=
"module"
,
params
=
(
"mary_had_lamb"
,
"winning_call"
))
def
audio
(
request
):
from
vllm.assets.audio
import
AudioAsset
return
AudioAsset
(
request
.
param
)
def
params_kwargs_to_cli_args
(
params_kwargs
:
dict
[
str
,
Any
])
->
list
[
str
]:
def
params_kwargs_to_cli_args
(
params_kwargs
:
dict
[
str
,
Any
])
->
list
[
str
]:
"""Convert kwargs to CLI args."""
"""Convert kwargs to CLI args."""
args
=
[]
args
=
[]
...
@@ -53,7 +56,7 @@ def params_kwargs_to_cli_args(params_kwargs: dict[str, Any]) -> list[str]:
...
@@ -53,7 +56,7 @@ def params_kwargs_to_cli_args(params_kwargs: dict[str, Any]) -> list[str]:
pytest
.
param
({},
marks
=
pytest
.
mark
.
cpu_model
),
pytest
.
param
({},
marks
=
pytest
.
mark
.
cpu_model
),
pytest
.
param
(
CHUNKED_PREFILL_KWARGS
),
pytest
.
param
(
CHUNKED_PREFILL_KWARGS
),
])
])
def
server
(
request
,
audio_assets
:
_
AudioAssets
):
def
server
(
request
,
audio_assets
:
Audio
Test
Assets
):
args
=
[
args
=
[
"--dtype"
,
"bfloat16"
,
"--max-model-len"
,
"4096"
,
"--enforce-eager"
,
"--dtype"
,
"bfloat16"
,
"--max-model-len"
,
"4096"
,
"--enforce-eager"
,
"--limit-mm-per-prompt"
,
"--limit-mm-per-prompt"
,
...
@@ -199,15 +202,19 @@ def run_multi_audio_test(
...
@@ -199,15 +202,19 @@ def run_multi_audio_test(
pytest
.
param
({},
marks
=
pytest
.
mark
.
cpu_model
),
pytest
.
param
({},
marks
=
pytest
.
mark
.
cpu_model
),
pytest
.
param
(
CHUNKED_PREFILL_KWARGS
),
pytest
.
param
(
CHUNKED_PREFILL_KWARGS
),
])
])
def
test_models
(
hf_runner
,
vllm_runner
,
audio
,
dtype
:
str
,
max_tokens
:
int
,
def
test_models
(
hf_runner
,
vllm_runner
,
audio_assets
:
AudioTestAssets
,
num_logprobs
:
int
,
vllm_kwargs
:
dict
)
->
None
:
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
,
vllm_kwargs
:
dict
)
->
None
:
audio_inputs
=
[(
_get_prompt
(
1
,
audio
,
VLLM_PLACEHOLDER
),
_get_prompt
(
1
,
audio
,
HF_PLACEHOLDER
),
audio
.
audio_and_sample_rate
,
)
for
audio
in
audio_assets
]
vllm_prompt
=
_get_prompt
(
1
,
"Describe the audio above."
,
VLLM_PLACEHOLDER
)
hf_prompt
=
_get_prompt
(
1
,
"Describe the audio above."
,
HF_PLACEHOLDER
)
run_test
(
run_test
(
hf_runner
,
hf_runner
,
vllm_runner
,
vllm_runner
,
[(
vllm_prompt
,
hf_prompt
,
audio
.
audio_and_sample_rate
)]
,
audio_inputs
,
MODEL_NAME
,
MODEL_NAME
,
dtype
=
dtype
,
dtype
=
dtype
,
max_tokens
=
max_tokens
,
max_tokens
=
max_tokens
,
...
@@ -224,13 +231,12 @@ def test_models(hf_runner, vllm_runner, audio, dtype: str, max_tokens: int,
...
@@ -224,13 +231,12 @@ def test_models(hf_runner, vllm_runner, audio, dtype: str, max_tokens: int,
pytest
.
param
({},
marks
=
pytest
.
mark
.
cpu_model
),
pytest
.
param
({},
marks
=
pytest
.
mark
.
cpu_model
),
pytest
.
param
(
CHUNKED_PREFILL_KWARGS
),
pytest
.
param
(
CHUNKED_PREFILL_KWARGS
),
])
])
def
test_models_with_multiple_audios
(
vllm_runner
,
audio_assets
:
_AudioAssets
,
def
test_models_with_multiple_audios
(
vllm_runner
,
dtype
:
str
,
max_tokens
:
int
,
audio_assets
:
AudioTestAssets
,
dtype
:
str
,
num_logprobs
:
int
,
max_tokens
:
int
,
num_logprobs
:
int
,
vllm_kwargs
:
dict
)
->
None
:
vllm_kwargs
:
dict
)
->
None
:
vllm_prompt
=
_get_prompt
(
len
(
audio_assets
),
vllm_prompt
=
_get_prompt
(
len
(
audio_assets
),
MULTI_AUDIO_PROMPT
,
"Describe each of the audios above."
,
VLLM_PLACEHOLDER
)
VLLM_PLACEHOLDER
)
run_multi_audio_test
(
run_multi_audio_test
(
vllm_runner
,
vllm_runner
,
...
@@ -245,7 +251,7 @@ def test_models_with_multiple_audios(vllm_runner, audio_assets: _AudioAssets,
...
@@ -245,7 +251,7 @@ def test_models_with_multiple_audios(vllm_runner, audio_assets: _AudioAssets,
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_online_serving
(
client
,
audio_assets
:
_
AudioAssets
):
async
def
test_online_serving
(
client
,
audio_assets
:
Audio
Test
Assets
):
"""Exercises online serving with/without chunked prefill enabled."""
"""Exercises online serving with/without chunked prefill enabled."""
messages
=
[{
messages
=
[{
...
...
tests/models/multimodal/generation/vlm_utils/builders.py
View file @
d7543862
...
@@ -11,7 +11,7 @@ from vllm.multimodal.image import rescale_image_size
...
@@ -11,7 +11,7 @@ from vllm.multimodal.image import rescale_image_size
from
vllm.multimodal.video
import
(
rescale_video_size
,
resize_video
,
from
vllm.multimodal.video
import
(
rescale_video_size
,
resize_video
,
sample_frames_from_video
)
sample_frames_from_video
)
from
.....conftest
import
_
ImageAssets
,
_
VideoAssets
from
.....conftest
import
Image
Test
Assets
,
Video
Test
Assets
from
.types
import
(
SINGLE_IMAGE_BASE_PROMPTS
,
TEST_IMG_PLACEHOLDER
,
from
.types
import
(
SINGLE_IMAGE_BASE_PROMPTS
,
TEST_IMG_PLACEHOLDER
,
TEST_VIDEO_PLACEHOLDER
,
VIDEO_BASE_PROMPT
,
TEST_VIDEO_PLACEHOLDER
,
VIDEO_BASE_PROMPT
,
ImageSizeWrapper
,
SizeType
,
VLMTestInfo
)
ImageSizeWrapper
,
SizeType
,
VLMTestInfo
)
...
@@ -69,7 +69,7 @@ def get_model_prompts(base_prompts: Iterable[str],
...
@@ -69,7 +69,7 @@ def get_model_prompts(base_prompts: Iterable[str],
def
build_single_image_inputs_from_test_info
(
def
build_single_image_inputs_from_test_info
(
test_info
:
VLMTestInfo
,
test_info
:
VLMTestInfo
,
image_assets
:
_
ImageAssets
,
image_assets
:
Image
Test
Assets
,
size_wrapper
:
ImageSizeWrapper
,
size_wrapper
:
ImageSizeWrapper
,
tmp_path
:
Optional
[
PosixPath
]
=
None
):
tmp_path
:
Optional
[
PosixPath
]
=
None
):
if
test_info
.
prompt_formatter
is
None
:
if
test_info
.
prompt_formatter
is
None
:
...
@@ -116,7 +116,7 @@ def build_single_image_inputs(images, model_prompts,
...
@@ -116,7 +116,7 @@ def build_single_image_inputs(images, model_prompts,
def
build_multi_image_inputs_from_test_info
(
def
build_multi_image_inputs_from_test_info
(
test_info
:
VLMTestInfo
,
test_info
:
VLMTestInfo
,
image_assets
:
_
ImageAssets
,
image_assets
:
Image
Test
Assets
,
size_wrapper
:
ImageSizeWrapper
,
size_wrapper
:
ImageSizeWrapper
,
tmp_path
:
Optional
[
PosixPath
]
=
None
):
tmp_path
:
Optional
[
PosixPath
]
=
None
):
if
test_info
.
prompt_formatter
is
None
:
if
test_info
.
prompt_formatter
is
None
:
...
@@ -159,7 +159,7 @@ def build_multi_image_inputs(image_lists, model_prompts,
...
@@ -159,7 +159,7 @@ def build_multi_image_inputs(image_lists, model_prompts,
def
build_embedding_inputs_from_test_info
(
def
build_embedding_inputs_from_test_info
(
test_info
:
VLMTestInfo
,
test_info
:
VLMTestInfo
,
image_assets
:
_
ImageAssets
,
image_assets
:
Image
Test
Assets
,
size_wrapper
:
ImageSizeWrapper
,
size_wrapper
:
ImageSizeWrapper
,
):
):
# These conditions will always be true if invoked through filtering,
# These conditions will always be true if invoked through filtering,
...
@@ -192,7 +192,7 @@ def build_embedding_inputs_from_test_info(
...
@@ -192,7 +192,7 @@ def build_embedding_inputs_from_test_info(
def
build_video_inputs_from_test_info
(
def
build_video_inputs_from_test_info
(
test_info
:
VLMTestInfo
,
test_info
:
VLMTestInfo
,
video_assets
:
_
VideoAssets
,
video_assets
:
Video
Test
Assets
,
size_wrapper
:
ImageSizeWrapper
,
size_wrapper
:
ImageSizeWrapper
,
num_frames
:
int
,
num_frames
:
int
,
):
):
...
...
tests/models/multimodal/generation/vlm_utils/model_utils.py
View file @
d7543862
...
@@ -16,7 +16,7 @@ from transformers import (AutoConfig, AutoTokenizer, BatchFeature,
...
@@ -16,7 +16,7 @@ from transformers import (AutoConfig, AutoTokenizer, BatchFeature,
from
vllm.sequence
import
SampleLogprobs
from
vllm.sequence
import
SampleLogprobs
from
vllm.transformers_utils.tokenizer
import
patch_padding_side
from
vllm.transformers_utils.tokenizer
import
patch_padding_side
from
.....conftest
import
HfRunner
,
ImageAsset
,
_
ImageAssets
from
.....conftest
import
HfRunner
,
ImageAsset
,
Image
Test
Assets
from
.types
import
RunnerOutput
from
.types
import
RunnerOutput
...
@@ -238,14 +238,14 @@ def minimax_vl_01_hf_output(hf_output: RunnerOutput,
...
@@ -238,14 +238,14 @@ def minimax_vl_01_hf_output(hf_output: RunnerOutput,
####### Functions for converting image assets to embeddings
####### Functions for converting image assets to embeddings
def
get_llava_embeddings
(
image_assets
:
_
ImageAssets
):
def
get_llava_embeddings
(
image_assets
:
Image
Test
Assets
):
return
[
asset
.
image_embeds
for
asset
in
image_assets
]
return
[
asset
.
image_embeds
for
asset
in
image_assets
]
####### Prompt path encoders for models that need models on disk
####### Prompt path encoders for models that need models on disk
def
qwen_prompt_path_encoder
(
def
qwen_prompt_path_encoder
(
tmp_path
:
PosixPath
,
prompt
:
str
,
assets
:
Union
[
list
[
ImageAsset
],
tmp_path
:
PosixPath
,
prompt
:
str
,
_
ImageAssets
])
->
str
:
assets
:
Union
[
list
[
ImageAsset
],
Image
Test
Assets
])
->
str
:
"""Given a temporary dir path, export one or more image assets into the
"""Given a temporary dir path, export one or more image assets into the
tempdir & replace its contents with the local path to the string so that
tempdir & replace its contents with the local path to the string so that
the HF version of Qwen-VL can resolve the path and load the image in its
the HF version of Qwen-VL can resolve the path and load the image in its
...
...
tests/models/multimodal/generation/vlm_utils/runners.py
View file @
d7543862
...
@@ -4,7 +4,8 @@ types / modalities.
...
@@ -4,7 +4,8 @@ types / modalities.
"""
"""
from
pathlib
import
PosixPath
from
pathlib
import
PosixPath
from
.....conftest
import
HfRunner
,
VllmRunner
,
_ImageAssets
,
_VideoAssets
from
.....conftest
import
(
HfRunner
,
ImageTestAssets
,
VideoTestAssets
,
VllmRunner
)
from
.
import
builders
,
core
from
.
import
builders
,
core
from
.types
import
ExpandableVLMTestArgs
,
VLMTestInfo
from
.types
import
ExpandableVLMTestArgs
,
VLMTestInfo
...
@@ -14,7 +15,7 @@ def run_single_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
...
@@ -14,7 +15,7 @@ def run_single_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
type
[
HfRunner
],
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
image_assets
:
_
ImageAssets
):
image_assets
:
Image
Test
Assets
):
assert
test_case
.
size_wrapper
is
not
None
assert
test_case
.
size_wrapper
is
not
None
inputs
=
builders
.
build_single_image_inputs_from_test_info
(
inputs
=
builders
.
build_single_image_inputs_from_test_info
(
model_test_info
,
image_assets
,
test_case
.
size_wrapper
,
tmp_path
)
model_test_info
,
image_assets
,
test_case
.
size_wrapper
,
tmp_path
)
...
@@ -37,7 +38,7 @@ def run_multi_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
...
@@ -37,7 +38,7 @@ def run_multi_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
type
[
HfRunner
],
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
image_assets
:
_
ImageAssets
):
image_assets
:
Image
Test
Assets
):
assert
test_case
.
size_wrapper
is
not
None
assert
test_case
.
size_wrapper
is
not
None
inputs
=
builders
.
build_multi_image_inputs_from_test_info
(
inputs
=
builders
.
build_multi_image_inputs_from_test_info
(
model_test_info
,
image_assets
,
test_case
.
size_wrapper
,
tmp_path
)
model_test_info
,
image_assets
,
test_case
.
size_wrapper
,
tmp_path
)
...
@@ -60,7 +61,7 @@ def run_embedding_test(*, model_test_info: VLMTestInfo,
...
@@ -60,7 +61,7 @@ def run_embedding_test(*, model_test_info: VLMTestInfo,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
type
[
HfRunner
],
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
image_assets
:
_
ImageAssets
):
image_assets
:
Image
Test
Assets
):
assert
test_case
.
size_wrapper
is
not
None
assert
test_case
.
size_wrapper
is
not
None
inputs
,
vllm_embeddings
=
builders
.
build_embedding_inputs_from_test_info
(
inputs
,
vllm_embeddings
=
builders
.
build_embedding_inputs_from_test_info
(
model_test_info
,
image_assets
,
test_case
.
size_wrapper
)
model_test_info
,
image_assets
,
test_case
.
size_wrapper
)
...
@@ -86,7 +87,7 @@ def run_video_test(
...
@@ -86,7 +87,7 @@ def run_video_test(
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
type
[
HfRunner
],
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
video_assets
:
_
VideoAssets
,
video_assets
:
Video
Test
Assets
,
):
):
assert
test_case
.
size_wrapper
is
not
None
assert
test_case
.
size_wrapper
is
not
None
assert
test_case
.
num_video_frames
is
not
None
assert
test_case
.
num_video_frames
is
not
None
...
...
tests/models/multimodal/generation/vlm_utils/types.py
View file @
d7543862
...
@@ -15,7 +15,7 @@ from vllm.config import TaskOption
...
@@ -15,7 +15,7 @@ from vllm.config import TaskOption
from
vllm.sequence
import
SampleLogprobs
from
vllm.sequence
import
SampleLogprobs
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
.....conftest
import
IMAGE_ASSETS
,
HfRunner
,
ImageAsset
,
_
ImageAssets
from
.....conftest
import
IMAGE_ASSETS
,
HfRunner
,
ImageAsset
,
Image
Test
Assets
from
....utils
import
check_logprobs_close
from
....utils
import
check_logprobs_close
# meta image tag; will be replaced by the appropriate tag for the model
# meta image tag; will be replaced by the appropriate tag for the model
...
@@ -85,7 +85,7 @@ class VLMTestInfo(NamedTuple):
...
@@ -85,7 +85,7 @@ class VLMTestInfo(NamedTuple):
# Function for converting ImageAssets to image embeddings;
# Function for converting ImageAssets to image embeddings;
# We need to define this explicitly for embedding tests
# We need to define this explicitly for embedding tests
convert_assets_to_embeddings
:
Optional
[
Callable
[[
_
ImageAssets
],
convert_assets_to_embeddings
:
Optional
[
Callable
[[
Image
Test
Assets
],
torch
.
Tensor
]]
=
None
torch
.
Tensor
]]
=
None
# Exposed options for vLLM runner; we change these in a several tests,
# Exposed options for vLLM runner; we change these in a several tests,
...
@@ -141,7 +141,7 @@ class VLMTestInfo(NamedTuple):
...
@@ -141,7 +141,7 @@ class VLMTestInfo(NamedTuple):
# for Qwen-VL, which requires encoding the image path / url into the prompt
# for Qwen-VL, which requires encoding the image path / url into the prompt
# for HF runner
# for HF runner
prompt_path_encoder
:
Optional
[
prompt_path_encoder
:
Optional
[
Callable
[[
PosixPath
,
str
,
Union
[
list
[
ImageAsset
],
_
ImageAssets
]],
Callable
[[
PosixPath
,
str
,
Union
[
list
[
ImageAsset
],
Image
Test
Assets
]],
str
]]
=
None
# noqa: E501
str
]]
=
None
# noqa: E501
# Allows configuring a test to run with custom inputs
# Allows configuring a test to run with custom inputs
...
...
tests/models/multimodal/pooling/test_intern_vit.py
View file @
d7543862
...
@@ -7,7 +7,7 @@ from transformers import AutoConfig, AutoModel, CLIPImageProcessor
...
@@ -7,7 +7,7 @@ from transformers import AutoConfig, AutoModel, CLIPImageProcessor
from
vllm.distributed
import
cleanup_dist_env_and_memory
from
vllm.distributed
import
cleanup_dist_env_and_memory
from
....conftest
import
_
ImageAssets
from
....conftest
import
Image
Test
Assets
# we use snapshot_download to prevent conflicts between
# we use snapshot_download to prevent conflicts between
# dynamic_module and trust_remote_code for hf_runner
# dynamic_module and trust_remote_code for hf_runner
...
@@ -15,7 +15,7 @@ DOWNLOAD_PATTERN = ["*.json", "*.py", "*.safetensors", "*.txt", "*.model"]
...
@@ -15,7 +15,7 @@ DOWNLOAD_PATTERN = ["*.json", "*.py", "*.safetensors", "*.txt", "*.model"]
def
run_intern_vit_test
(
def
run_intern_vit_test
(
image_assets
:
_
ImageAssets
,
image_assets
:
Image
Test
Assets
,
model_id
:
str
,
model_id
:
str
,
*
,
*
,
dtype
:
str
,
dtype
:
str
,
...
...
tests/models/multimodal/processing/test_h2ovl.py
View file @
d7543862
...
@@ -11,7 +11,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
...
@@ -11,7 +11,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
from
vllm.multimodal.image
import
rescale_image_size
from
vllm.multimodal.image
import
rescale_image_size
from
vllm.multimodal.processing
import
BaseMultiModalProcessor
from
vllm.multimodal.processing
import
BaseMultiModalProcessor
from
....conftest
import
_
ImageAssets
from
....conftest
import
Image
Test
Assets
from
...utils
import
build_model_context
from
...utils
import
build_model_context
...
@@ -137,7 +137,7 @@ def _run_check(
...
@@ -137,7 +137,7 @@ def _run_check(
@
pytest
.
mark
.
parametrize
(
"kwargs_on_init"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"kwargs_on_init"
,
[
True
,
False
])
def
test_processor_override
(
def
test_processor_override
(
model_id
:
str
,
model_id
:
str
,
image_assets
:
_
ImageAssets
,
image_assets
:
Image
Test
Assets
,
size_factors
:
list
[
int
],
size_factors
:
list
[
int
],
min_dynamic_patch
:
int
,
min_dynamic_patch
:
int
,
max_dynamic_patch
:
int
,
max_dynamic_patch
:
int
,
...
...
tests/models/multimodal/processing/test_idefics3.py
View file @
d7543862
...
@@ -5,7 +5,7 @@ from transformers import Idefics3Config
...
@@ -5,7 +5,7 @@ from transformers import Idefics3Config
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
....conftest
import
_
ImageAssets
from
....conftest
import
Image
Test
Assets
from
...utils
import
build_model_context
from
...utils
import
build_model_context
...
@@ -21,7 +21,7 @@ from ...utils import build_model_context
...
@@ -21,7 +21,7 @@ from ...utils import build_model_context
@
pytest
.
mark
.
parametrize
(
"num_imgs"
,
[
1
,
2
])
@
pytest
.
mark
.
parametrize
(
"num_imgs"
,
[
1
,
2
])
@
pytest
.
mark
.
parametrize
(
"kwargs_on_init"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"kwargs_on_init"
,
[
True
,
False
])
def
test_processor_override
(
def
test_processor_override
(
image_assets
:
_
ImageAssets
,
image_assets
:
Image
Test
Assets
,
model_id
:
str
,
model_id
:
str
,
mm_processor_kwargs
:
dict
[
str
,
object
],
mm_processor_kwargs
:
dict
[
str
,
object
],
expected_toks_per_img
:
int
,
expected_toks_per_img
:
int
,
...
...
tests/models/multimodal/processing/test_internvl.py
View file @
d7543862
...
@@ -11,7 +11,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
...
@@ -11,7 +11,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
from
vllm.multimodal.image
import
rescale_image_size
from
vllm.multimodal.image
import
rescale_image_size
from
vllm.multimodal.processing
import
BaseMultiModalProcessor
from
vllm.multimodal.processing
import
BaseMultiModalProcessor
from
....conftest
import
_
ImageAssets
from
....conftest
import
Image
Test
Assets
from
...utils
import
build_model_context
from
...utils
import
build_model_context
...
@@ -94,7 +94,7 @@ def _run_check(
...
@@ -94,7 +94,7 @@ def _run_check(
@
pytest
.
mark
.
parametrize
(
"kwargs_on_init"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"kwargs_on_init"
,
[
True
,
False
])
def
test_processor_override
(
def
test_processor_override
(
model_id
:
str
,
model_id
:
str
,
image_assets
:
_
ImageAssets
,
image_assets
:
Image
Test
Assets
,
size_factors
:
list
[
int
],
size_factors
:
list
[
int
],
min_dynamic_patch
:
int
,
min_dynamic_patch
:
int
,
max_dynamic_patch
:
int
,
max_dynamic_patch
:
int
,
...
...
tests/models/multimodal/processing/test_llama4.py
View file @
d7543862
...
@@ -6,7 +6,7 @@ import pytest
...
@@ -6,7 +6,7 @@ import pytest
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.transformers_utils.tokenizer
import
encode_tokens
from
vllm.transformers_utils.tokenizer
import
encode_tokens
from
....conftest
import
_
ImageAssets
from
....conftest
import
Image
Test
Assets
from
...utils
import
build_model_context
from
...utils
import
build_model_context
...
@@ -17,7 +17,7 @@ from ...utils import build_model_context
...
@@ -17,7 +17,7 @@ from ...utils import build_model_context
@
pytest
.
mark
.
parametrize
(
"disable_mm_preprocessor_cache"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"disable_mm_preprocessor_cache"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"tokenized_prompt"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"tokenized_prompt"
,
[
True
,
False
])
def
test_processor_override
(
def
test_processor_override
(
image_assets
:
_
ImageAssets
,
image_assets
:
Image
Test
Assets
,
model_id
:
str
,
model_id
:
str
,
mm_processor_kwargs
:
dict
,
mm_processor_kwargs
:
dict
,
num_imgs
:
int
,
num_imgs
:
int
,
...
...
tests/models/multimodal/processing/test_minimax_vl_01.py
View file @
d7543862
...
@@ -7,14 +7,14 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
...
@@ -7,14 +7,14 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
from
vllm.multimodal.parse
import
ImageSize
from
vllm.multimodal.parse
import
ImageSize
from
vllm.multimodal.processing
import
BaseMultiModalProcessor
from
vllm.multimodal.processing
import
BaseMultiModalProcessor
from
....conftest
import
_
ImageAssets
from
....conftest
import
Image
Test
Assets
from
...utils
import
build_model_context
from
...utils
import
build_model_context
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"MiniMaxAI/MiniMax-VL-01"
])
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"MiniMaxAI/MiniMax-VL-01"
])
@
pytest
.
mark
.
parametrize
(
"num_imgs"
,
[
1
,
2
])
@
pytest
.
mark
.
parametrize
(
"num_imgs"
,
[
1
,
2
])
def
test_processor_override
(
def
test_processor_override
(
image_assets
:
_
ImageAssets
,
image_assets
:
Image
Test
Assets
,
model_id
:
str
,
model_id
:
str
,
num_imgs
:
int
,
num_imgs
:
int
,
):
):
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment