Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
18e519ec
Unverified
Commit
18e519ec
authored
Jul 19, 2025
by
Isotr0py
Committed by
GitHub
Jul 19, 2025
Browse files
[Bugfix] Fix ndarray video color from VideoAsset (#21064)
Signed-off-by:
Isotr0py
<
2037008807@qq.com
>
parent
1eaff278
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
130 additions
and
28 deletions
+130
-28
tests/multimodal/test_video.py
tests/multimodal/test_video.py
+80
-23
tests/multimodal/utils.py
tests/multimodal/utils.py
+46
-0
vllm/assets/video.py
vllm/assets/video.py
+4
-5
No files found.
tests/multimodal/test_video.py
View file @
18e519ec
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
tempfile
from
pathlib
import
Path
import
numpy
as
np
import
numpy
as
np
import
numpy.typing
as
npt
import
numpy.typing
as
npt
import
pytest
import
pytest
from
PIL
import
Image
from
vllm
import
envs
from
vllm.assets.base
import
get_vllm_public_assets
from
vllm.assets.video
import
video_to_ndarrays
,
video_to_pil_images_list
from
vllm.multimodal.image
import
ImageMediaIO
from
vllm.multimodal.image
import
ImageMediaIO
from
vllm.multimodal.video
import
(
VIDEO_LOADER_REGISTRY
,
VideoLoader
,
from
vllm.multimodal.video
import
(
VIDEO_LOADER_REGISTRY
,
VideoLoader
,
VideoMediaIO
)
VideoMediaIO
)
from
.utils
import
cosine_similarity
,
create_video_from_image
,
normalize_image
NUM_FRAMES
=
10
NUM_FRAMES
=
10
FAKE_OUTPUT_1
=
np
.
random
.
rand
(
NUM_FRAMES
,
1280
,
720
,
3
)
FAKE_OUTPUT_1
=
np
.
random
.
rand
(
NUM_FRAMES
,
1280
,
720
,
3
)
FAKE_OUTPUT_2
=
np
.
random
.
rand
(
NUM_FRAMES
,
1280
,
720
,
3
)
FAKE_OUTPUT_2
=
np
.
random
.
rand
(
NUM_FRAMES
,
1280
,
720
,
3
)
...
@@ -59,8 +67,9 @@ class Assert10Frames1FPSVideoLoader(VideoLoader):
...
@@ -59,8 +67,9 @@ class Assert10Frames1FPSVideoLoader(VideoLoader):
return
FAKE_OUTPUT_2
return
FAKE_OUTPUT_2
def
test_video_media_io_kwargs
():
def
test_video_media_io_kwargs
(
monkeypatch
:
pytest
.
MonkeyPatch
):
envs
.
VLLM_VIDEO_LOADER_BACKEND
=
"assert_10_frames_1_fps"
with
monkeypatch
.
context
()
as
m
:
m
.
setenv
(
"VLLM_VIDEO_LOADER_BACKEND"
,
"assert_10_frames_1_fps"
)
imageio
=
ImageMediaIO
()
imageio
=
ImageMediaIO
()
# Verify that different args pass/fail assertions as expected.
# Verify that different args pass/fail assertions as expected.
...
@@ -86,3 +95,51 @@ def test_video_media_io_kwargs():
...
@@ -86,3 +95,51 @@ def test_video_media_io_kwargs():
with
pytest
.
raises
(
AssertionError
,
match
=
"bad fps"
):
with
pytest
.
raises
(
AssertionError
,
match
=
"bad fps"
):
videoio
=
VideoMediaIO
(
imageio
,
**
{
"num_frames"
:
10
,
"fps"
:
2.0
})
videoio
=
VideoMediaIO
(
imageio
,
**
{
"num_frames"
:
10
,
"fps"
:
2.0
})
_
=
videoio
.
load_bytes
(
b
"test"
)
_
=
videoio
.
load_bytes
(
b
"test"
)
@
pytest
.
mark
.
parametrize
(
"is_color"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"fourcc, ext"
,
[(
"mp4v"
,
"mp4"
),
(
"XVID"
,
"avi"
)])
def
test_opencv_video_io_colorspace
(
is_color
:
bool
,
fourcc
:
str
,
ext
:
str
):
"""
Test all functions that use OpenCV for video I/O return RGB format.
Both RGB and grayscale videos are tested.
"""
image_path
=
get_vllm_public_assets
(
filename
=
"stop_sign.jpg"
,
s3_prefix
=
"vision_model_images"
)
image
=
Image
.
open
(
image_path
)
with
tempfile
.
TemporaryDirectory
()
as
tmpdir
:
if
not
is_color
:
image_path
=
f
"
{
tmpdir
}
/test_grayscale_image.png"
image
=
image
.
convert
(
"L"
)
image
.
save
(
image_path
)
# Convert to gray RGB for comparison
image
=
image
.
convert
(
"RGB"
)
video_path
=
f
"
{
tmpdir
}
/test_RGB_video.
{
ext
}
"
create_video_from_image
(
image_path
,
video_path
,
num_frames
=
2
,
is_color
=
is_color
,
fourcc
=
fourcc
,
)
frames
=
video_to_ndarrays
(
video_path
)
for
frame
in
frames
:
sim
=
cosine_similarity
(
normalize_image
(
np
.
array
(
frame
)),
normalize_image
(
np
.
array
(
image
)))
assert
np
.
sum
(
np
.
isnan
(
sim
))
/
sim
.
size
<
0.001
assert
np
.
nanmean
(
sim
)
>
0.99
pil_frames
=
video_to_pil_images_list
(
video_path
)
for
frame
in
pil_frames
:
sim
=
cosine_similarity
(
normalize_image
(
np
.
array
(
frame
)),
normalize_image
(
np
.
array
(
image
)))
assert
np
.
sum
(
np
.
isnan
(
sim
))
/
sim
.
size
<
0.001
assert
np
.
nanmean
(
sim
)
>
0.99
io_frames
,
_
=
VideoMediaIO
(
ImageMediaIO
()).
load_file
(
Path
(
video_path
))
for
frame
in
io_frames
:
sim
=
cosine_similarity
(
normalize_image
(
np
.
array
(
frame
)),
normalize_image
(
np
.
array
(
image
)))
assert
np
.
sum
(
np
.
isnan
(
sim
))
/
sim
.
size
<
0.001
assert
np
.
nanmean
(
sim
)
>
0.99
tests/multimodal/utils.py
View file @
18e519ec
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
cv2
import
numpy
as
np
import
numpy
as
np
import
numpy.typing
as
npt
from
PIL
import
Image
from
PIL
import
Image
...
@@ -31,3 +33,47 @@ def random_audio(
...
@@ -31,3 +33,47 @@ def random_audio(
):
):
audio_len
=
rng
.
randint
(
min_len
,
max_len
)
audio_len
=
rng
.
randint
(
min_len
,
max_len
)
return
rng
.
rand
(
audio_len
),
sr
return
rng
.
rand
(
audio_len
),
sr
def
create_video_from_image
(
image_path
:
str
,
video_path
:
str
,
num_frames
:
int
=
10
,
fps
:
float
=
1.0
,
is_color
:
bool
=
True
,
fourcc
:
str
=
"mp4v"
,
):
image
=
cv2
.
imread
(
image_path
)
if
not
is_color
:
# Convert to grayscale if is_color is False
image
=
cv2
.
cvtColor
(
image
,
cv2
.
COLOR_BGR2GRAY
)
height
,
width
=
image
.
shape
else
:
height
,
width
,
_
=
image
.
shape
video_writer
=
cv2
.
VideoWriter
(
video_path
,
cv2
.
VideoWriter_fourcc
(
*
fourcc
),
fps
,
(
width
,
height
),
isColor
=
is_color
,
)
for
_
in
range
(
num_frames
):
video_writer
.
write
(
image
)
video_writer
.
release
()
return
video_path
def
cosine_similarity
(
A
:
npt
.
NDArray
,
B
:
npt
.
NDArray
,
axis
:
int
=
-
1
)
->
npt
.
NDArray
:
"""Compute cosine similarity between two vectors."""
return
(
np
.
sum
(
A
*
B
,
axis
=
axis
)
/
(
np
.
linalg
.
norm
(
A
,
axis
=
axis
)
*
np
.
linalg
.
norm
(
B
,
axis
=
axis
)))
def
normalize_image
(
image
:
npt
.
NDArray
)
->
npt
.
NDArray
:
"""Normalize image to [0, 1] range."""
return
image
.
astype
(
np
.
float32
)
/
255.0
\ No newline at end of file
vllm/assets/video.py
View file @
18e519ec
...
@@ -59,7 +59,9 @@ def video_to_ndarrays(path: str, num_frames: int = -1) -> npt.NDArray:
...
@@ -59,7 +59,9 @@ def video_to_ndarrays(path: str, num_frames: int = -1) -> npt.NDArray:
if
idx
in
frame_indices
:
# only decompress needed
if
idx
in
frame_indices
:
# only decompress needed
ret
,
frame
=
cap
.
retrieve
()
ret
,
frame
=
cap
.
retrieve
()
if
ret
:
if
ret
:
frames
.
append
(
frame
)
# OpenCV uses BGR format, we need to convert it to RGB
# for PIL and transformers compatibility
frames
.
append
(
cv2
.
cvtColor
(
frame
,
cv2
.
COLOR_BGR2RGB
))
frames
=
np
.
stack
(
frames
)
frames
=
np
.
stack
(
frames
)
if
len
(
frames
)
<
num_frames
:
if
len
(
frames
)
<
num_frames
:
...
@@ -71,10 +73,7 @@ def video_to_ndarrays(path: str, num_frames: int = -1) -> npt.NDArray:
...
@@ -71,10 +73,7 @@ def video_to_ndarrays(path: str, num_frames: int = -1) -> npt.NDArray:
def
video_to_pil_images_list
(
path
:
str
,
def
video_to_pil_images_list
(
path
:
str
,
num_frames
:
int
=
-
1
)
->
list
[
Image
.
Image
]:
num_frames
:
int
=
-
1
)
->
list
[
Image
.
Image
]:
frames
=
video_to_ndarrays
(
path
,
num_frames
)
frames
=
video_to_ndarrays
(
path
,
num_frames
)
return
[
return
[
Image
.
fromarray
(
frame
)
for
frame
in
frames
]
Image
.
fromarray
(
cv2
.
cvtColor
(
frame
,
cv2
.
COLOR_BGR2RGB
))
for
frame
in
frames
]
def
video_get_metadata
(
path
:
str
)
->
dict
[
str
,
Any
]:
def
video_get_metadata
(
path
:
str
)
->
dict
[
str
,
Any
]:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment