Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
143e4dcc
Unverified
Commit
143e4dcc
authored
Mar 15, 2026
by
Isotr0py
Committed by
GitHub
Mar 15, 2026
Browse files
[Misc] Add online audio_in_video test (#36775)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
6590a3ec
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
100 additions
and
1 deletion
+100
-1
requirements/test.in
requirements/test.in
+1
-0
requirements/test.txt
requirements/test.txt
+2
-0
tests/entrypoints/openai/test_audio_in_video.py
tests/entrypoints/openai/test_audio_in_video.py
+80
-0
tests/multimodal/media/test_audio.py
tests/multimodal/media/test_audio.py
+11
-0
vllm/entrypoints/serve/render/serving.py
vllm/entrypoints/serve/render/serving.py
+6
-1
No files found.
requirements/test.in
View file @
143e4dcc
...
...
@@ -10,6 +10,7 @@ pytest-cov
# testing utils
albumentations # required for Nemotron Parse in test_common.py
av # required for audio_in_video tests
backoff # required for phi4mm test
blobfile # required for kimi-vl test
einops # required for MPT, qwen-vl
...
...
requirements/test.txt
View file @
143e4dcc
...
...
@@ -62,6 +62,8 @@ attrs==24.2.0
# referencing
audioread==3.0.1
# via librosa
av==16.1.0
# via -r requirements/test.in
backoff==2.2.1
# via
# -r requirements/test.in
...
...
tests/entrypoints/openai/test_audio_in_video.py
0 → 100644
View file @
143e4dcc
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
base64
import
json
import
openai
import
pytest
import
pytest_asyncio
from
...conftest
import
VideoTestAssets
from
...utils
import
RemoteOpenAIServer
MODEL_NAME
=
"Qwen/Qwen2.5-Omni-3B"
@
pytest
.
fixture
def
server
():
args
=
[
"--max-model-len"
,
"8192"
,
"--enforce-eager"
,
"--limit-mm-per-prompt"
,
json
.
dumps
({
"audio"
:
1
,
"video"
:
1
}),
]
with
RemoteOpenAIServer
(
MODEL_NAME
,
args
,
)
as
remote_server
:
yield
remote_server
@
pytest_asyncio
.
fixture
async
def
client
(
server
):
async
with
server
.
get_async_client
()
as
async_client
:
yield
async_client
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
asyncio
async
def
test_online_audio_in_video
(
client
:
openai
.
AsyncOpenAI
,
video_assets
:
VideoTestAssets
):
"""Test video input with `audio_in_video=True`"""
# we don't use video_urls above because they missed audio stream.
video_path
=
video_assets
[
0
].
video_path
with
open
(
video_path
,
"rb"
)
as
f
:
video_base64
=
base64
.
b64encode
(
f
.
read
()).
decode
(
"utf-8"
)
messages
=
[
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"text"
,
"text"
:
"What's in this video?"
},
{
"type"
:
"video_url"
,
"video_url"
:
{
"url"
:
f
"data:video/mp4;base64,
{
video_base64
}
"
},
},
],
}
]
# multi-turn to test mm processor cache as well
for
_
in
range
(
2
):
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
16
,
extra_body
=
{
"mm_processor_kwargs"
:
{
"use_audio_in_video"
:
True
,
}
},
)
assert
len
(
chat_completion
.
choices
)
==
1
choice
=
chat_completion
.
choices
[
0
]
assert
choice
.
finish_reason
==
"length"
tests/multimodal/media/test_audio.py
View file @
143e4dcc
...
...
@@ -4,6 +4,7 @@ import base64
from
pathlib
import
Path
from
unittest.mock
import
patch
import
librosa
import
numpy
as
np
import
pytest
...
...
@@ -71,3 +72,13 @@ def test_audio_media_io_encode_base64(dummy_audio):
decoded
=
base64
.
b64decode
(
out
)
assert
decoded
==
b
"dummy_wav_data"
mock_write
.
assert_called_once
()
def
test_audio_media_io_from_video
(
video_assets
):
audio_io
=
AudioMediaIO
()
video_path
=
video_assets
[
0
].
video_path
with
open
(
video_path
,
"rb"
)
as
f
:
audio
,
sr
=
audio_io
.
load_bytes
(
f
.
read
())
audio_ref
,
sr_ref
=
librosa
.
load
(
video_path
,
sr
=
None
)
assert
sr
==
sr_ref
np
.
testing
.
assert_allclose
(
audio_ref
,
audio
,
atol
=
1e-4
)
vllm/entrypoints/serve/render/serving.py
View file @
143e4dcc
...
...
@@ -506,6 +506,7 @@ class OpenAIServingRender:
(ResponsesRequest not supported here); TODO comment dropped accordingly.
"""
renderer
=
self
.
renderer
mm_config
=
self
.
model_config
.
multimodal_config
default_template_kwargs
=
merge_kwargs
(
default_template_kwargs
,
...
...
@@ -518,7 +519,11 @@ class OpenAIServingRender:
tok_params
=
request
.
build_tok_params
(
self
.
model_config
)
chat_params
=
request
.
build_chat_params
(
default_template
,
default_template_content_format
).
with_defaults
(
default_template_kwargs
)
).
with_defaults
(
default_template_kwargs
,
default_media_io_kwargs
=
(
mm_config
.
media_io_kwargs
if
mm_config
else
None
),
default_mm_processor_kwargs
=
getattr
(
request
,
"mm_processor_kwargs"
,
None
),
)
(
conversation
,),
(
engine_prompt
,)
=
await
renderer
.
render_chat_async
(
[
messages
],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment