Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f89d0e11
Unverified
Commit
f89d0e11
authored
May 02, 2025
by
Cyrus Leung
Committed by
GitHub
May 01, 2025
Browse files
[Misc] Continue refactoring model tests (#17573)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
b4003d11
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
20 additions
and
16 deletions
+20
-16
examples/offline_inference/qwen2_5_omni/only_thinker.py
examples/offline_inference/qwen2_5_omni/only_thinker.py
+2
-3
examples/offline_inference/vision_language.py
examples/offline_inference/vision_language.py
+1
-1
tests/conftest.py
tests/conftest.py
+9
-1
tests/models/multimodal/generation/test_interleaved.py
tests/models/multimodal/generation/test_interleaved.py
+1
-1
tests/models/multimodal/pooling/test_intern_vit.py
tests/models/multimodal/pooling/test_intern_vit.py
+3
-6
vllm/assets/video.py
vllm/assets/video.py
+4
-4
No files found.
examples/offline_inference/qwen2_5_omni/only_thinker.py
View file @
f89d0e11
...
...
@@ -47,8 +47,7 @@ def get_mixed_modalities_query() -> QueryResult:
"image"
:
ImageAsset
(
"cherry_blossom"
).
pil_image
.
convert
(
"RGB"
),
"video"
:
VideoAsset
(
name
=
"sample_demo_1.mp4"
,
num_frames
=
16
).
np_ndarrays
,
VideoAsset
(
name
=
"sample_demo_1"
,
num_frames
=
16
).
np_ndarrays
,
},
},
limit_mm_per_prompt
=
{
...
...
@@ -66,7 +65,7 @@ def get_use_audio_in_video_query() -> QueryResult:
"<|im_start|>user
\n
<|vision_bos|><|VIDEO|><|vision_eos|>"
f
"
{
question
}
<|im_end|>
\n
"
f
"<|im_start|>assistant
\n
"
)
asset
=
VideoAsset
(
name
=
"sample_demo_1
.mp4
"
,
num_frames
=
16
)
asset
=
VideoAsset
(
name
=
"sample_demo_1"
,
num_frames
=
16
)
audio
=
asset
.
get_audio
(
sampling_rate
=
16000
)
assert
not
envs
.
VLLM_USE_V1
,
(
"V1 does not support use_audio_in_video. "
"Please launch this example with "
...
...
examples/offline_inference/vision_language.py
View file @
f89d0e11
...
...
@@ -1109,7 +1109,7 @@ def get_multi_modal_input(args):
if
args
.
modality
==
"video"
:
# Input video and question
video
=
VideoAsset
(
name
=
"sample_demo_1
.mp4
"
,
video
=
VideoAsset
(
name
=
"sample_demo_1"
,
num_frames
=
args
.
num_frames
).
np_ndarrays
vid_questions
=
[
"Why is this video funny?"
]
...
...
tests/conftest.py
View file @
f89d0e11
...
...
@@ -97,13 +97,18 @@ class _VideoAssets(_VideoAssetsBase):
def
__init__
(
self
)
->
None
:
super
().
__init__
([
VideoAsset
(
"sample_demo_1
.mp4
"
),
VideoAsset
(
"sample_demo_1"
),
])
def
prompts
(
self
,
prompts
:
_VideoAssetPrompts
)
->
list
[
str
]:
return
[
prompts
[
"sample_demo_1"
]]
class
_AudioAssetPrompts
(
TypedDict
):
mary_had_lamb
:
str
winning_call
:
str
class
_AudioAssetsBase
(
UserList
[
AudioAsset
]):
pass
...
...
@@ -116,6 +121,9 @@ class _AudioAssets(_AudioAssetsBase):
AudioAsset
(
"winning_call"
),
])
def
prompts
(
self
,
prompts
:
_AudioAssetPrompts
)
->
list
[
str
]:
return
[
prompts
[
"mary_had_lamb"
],
prompts
[
"winning_call"
]]
IMAGE_ASSETS
=
_ImageAssets
()
"""Singleton instance of :class:`_ImageAssets`."""
...
...
tests/models/multimodal/generation/test_interleaved.py
View file @
f89d0e11
...
...
@@ -29,7 +29,7 @@ def test_models(vllm_runner, model, dtype: str, max_tokens: int) -> None:
image_cherry
=
ImageAsset
(
"cherry_blossom"
).
pil_image
.
convert
(
"RGB"
)
image_stop
=
ImageAsset
(
"stop_sign"
).
pil_image
.
convert
(
"RGB"
)
images
=
[
image_cherry
,
image_stop
]
video
=
VideoAsset
(
name
=
"sample_demo_1
.mp4
"
,
num_frames
=
16
).
np_ndarrays
video
=
VideoAsset
(
name
=
"sample_demo_1"
,
num_frames
=
16
).
np_ndarrays
inputs
=
[
(
...
...
tests/models/multimodal/
generation
/test_intern_vit.py
→
tests/models/multimodal/
pooling
/test_intern_vit.py
View file @
f89d0e11
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Optional
import
pytest
import
torch
import
torch.nn
as
nn
from
huggingface_hub
import
snapshot_download
from
transformers
import
AutoConfig
,
AutoModel
,
CLIPImageProcessor
from
vllm.distributed
import
cleanup_dist_env_and_memory
from
....conftest
import
_ImageAssets
# we use snapshot_download to prevent conflicts between
...
...
@@ -20,7 +19,6 @@ def run_intern_vit_test(
model_id
:
str
,
*
,
dtype
:
str
,
distributed_executor_backend
:
Optional
[
str
]
=
None
,
):
model
=
snapshot_download
(
model_id
,
allow_patterns
=
DOWNLOAD_PATTERN
)
...
...
@@ -43,7 +41,6 @@ def run_intern_vit_test(
for
pixel_value
in
pixel_values
]
from
vllm.distributed
import
cleanup_dist_env_and_memory
from
vllm.model_executor.models.intern_vit
import
InternVisionModel
vllm_model
=
InternVisionModel
(
config
)
vllm_model
.
load_weights
(
hf_model
.
state_dict
().
items
())
...
...
@@ -71,7 +68,7 @@ def run_intern_vit_test(
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
half
])
@
torch
.
inference_mode
()
def
test_models
(
dist_init
,
image_assets
,
model_id
,
dtype
:
str
)
->
None
:
def
test_models
(
image_assets
,
model_id
,
dtype
:
str
)
->
None
:
run_intern_vit_test
(
image_assets
,
model_id
,
...
...
vllm/assets/video.py
View file @
f89d0e11
...
...
@@ -78,18 +78,18 @@ def video_to_pil_images_list(path: str,
@
dataclass
(
frozen
=
True
)
class
VideoAsset
:
name
:
Literal
[
"sample_demo_1
.mp4
"
]
name
:
Literal
[
"sample_demo_1"
]
num_frames
:
int
=
-
1
@
property
def
pil_images
(
self
)
->
list
[
Image
.
Image
]:
video_path
=
download_video_asset
(
self
.
name
)
video_path
=
download_video_asset
(
self
.
name
+
".mp4"
)
ret
=
video_to_pil_images_list
(
video_path
,
self
.
num_frames
)
return
ret
@
property
def
np_ndarrays
(
self
)
->
npt
.
NDArray
:
video_path
=
download_video_asset
(
self
.
name
)
video_path
=
download_video_asset
(
self
.
name
+
".mp4"
)
ret
=
video_to_ndarrays
(
video_path
,
self
.
num_frames
)
return
ret
...
...
@@ -99,5 +99,5 @@ class VideoAsset:
See also: examples/offline_inference/qwen2_5_omni/only_thinker.py
"""
video_path
=
download_video_asset
(
self
.
name
)
video_path
=
download_video_asset
(
self
.
name
+
".mp4"
)
return
librosa
.
load
(
video_path
,
sr
=
sampling_rate
)[
0
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment