Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
143e4dcc
Unverified
Commit
143e4dcc
authored
Mar 15, 2026
by
Isotr0py
Committed by
GitHub
Mar 15, 2026
Browse files
[Misc] Add online audio_in_video test (#36775)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
6590a3ec
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
100 additions
and
1 deletion
+100
-1
requirements/test.in
requirements/test.in
+1
-0
requirements/test.txt
requirements/test.txt
+2
-0
tests/entrypoints/openai/test_audio_in_video.py
tests/entrypoints/openai/test_audio_in_video.py
+80
-0
tests/multimodal/media/test_audio.py
tests/multimodal/media/test_audio.py
+11
-0
vllm/entrypoints/serve/render/serving.py
vllm/entrypoints/serve/render/serving.py
+6
-1
No files found.
requirements/test.in
View file @
143e4dcc
...
@@ -10,6 +10,7 @@ pytest-cov
...
@@ -10,6 +10,7 @@ pytest-cov
# testing utils
# testing utils
albumentations # required for Nemotron Parse in test_common.py
albumentations # required for Nemotron Parse in test_common.py
av # required for audio_in_video tests
backoff # required for phi4mm test
backoff # required for phi4mm test
blobfile # required for kimi-vl test
blobfile # required for kimi-vl test
einops # required for MPT, qwen-vl
einops # required for MPT, qwen-vl
...
...
requirements/test.txt
View file @
143e4dcc
...
@@ -62,6 +62,8 @@ attrs==24.2.0
...
@@ -62,6 +62,8 @@ attrs==24.2.0
# referencing
# referencing
audioread==3.0.1
audioread==3.0.1
# via librosa
# via librosa
av==16.1.0
# via -r requirements/test.in
backoff==2.2.1
backoff==2.2.1
# via
# via
# -r requirements/test.in
# -r requirements/test.in
...
...
tests/entrypoints/openai/test_audio_in_video.py
0 → 100644
View file @
143e4dcc
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
base64
import
json
import
openai
import
pytest
import
pytest_asyncio
from
...conftest
import
VideoTestAssets
from
...utils
import
RemoteOpenAIServer
MODEL_NAME
=
"Qwen/Qwen2.5-Omni-3B"
@
pytest
.
fixture
def
server
():
args
=
[
"--max-model-len"
,
"8192"
,
"--enforce-eager"
,
"--limit-mm-per-prompt"
,
json
.
dumps
({
"audio"
:
1
,
"video"
:
1
}),
]
with
RemoteOpenAIServer
(
MODEL_NAME
,
args
,
)
as
remote_server
:
yield
remote_server
@
pytest_asyncio
.
fixture
async
def
client
(
server
):
async
with
server
.
get_async_client
()
as
async_client
:
yield
async_client
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
asyncio
async
def
test_online_audio_in_video
(
client
:
openai
.
AsyncOpenAI
,
video_assets
:
VideoTestAssets
):
"""Test video input with `audio_in_video=True`"""
# we don't use video_urls above because they missed audio stream.
video_path
=
video_assets
[
0
].
video_path
with
open
(
video_path
,
"rb"
)
as
f
:
video_base64
=
base64
.
b64encode
(
f
.
read
()).
decode
(
"utf-8"
)
messages
=
[
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"text"
,
"text"
:
"What's in this video?"
},
{
"type"
:
"video_url"
,
"video_url"
:
{
"url"
:
f
"data:video/mp4;base64,
{
video_base64
}
"
},
},
],
}
]
# multi-turn to test mm processor cache as well
for
_
in
range
(
2
):
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
16
,
extra_body
=
{
"mm_processor_kwargs"
:
{
"use_audio_in_video"
:
True
,
}
},
)
assert
len
(
chat_completion
.
choices
)
==
1
choice
=
chat_completion
.
choices
[
0
]
assert
choice
.
finish_reason
==
"length"
tests/multimodal/media/test_audio.py
View file @
143e4dcc
...
@@ -4,6 +4,7 @@ import base64
...
@@ -4,6 +4,7 @@ import base64
from
pathlib
import
Path
from
pathlib
import
Path
from
unittest.mock
import
patch
from
unittest.mock
import
patch
import
librosa
import
numpy
as
np
import
numpy
as
np
import
pytest
import
pytest
...
@@ -71,3 +72,13 @@ def test_audio_media_io_encode_base64(dummy_audio):
...
@@ -71,3 +72,13 @@ def test_audio_media_io_encode_base64(dummy_audio):
decoded
=
base64
.
b64decode
(
out
)
decoded
=
base64
.
b64decode
(
out
)
assert
decoded
==
b
"dummy_wav_data"
assert
decoded
==
b
"dummy_wav_data"
mock_write
.
assert_called_once
()
mock_write
.
assert_called_once
()
def
test_audio_media_io_from_video
(
video_assets
):
audio_io
=
AudioMediaIO
()
video_path
=
video_assets
[
0
].
video_path
with
open
(
video_path
,
"rb"
)
as
f
:
audio
,
sr
=
audio_io
.
load_bytes
(
f
.
read
())
audio_ref
,
sr_ref
=
librosa
.
load
(
video_path
,
sr
=
None
)
assert
sr
==
sr_ref
np
.
testing
.
assert_allclose
(
audio_ref
,
audio
,
atol
=
1e-4
)
vllm/entrypoints/serve/render/serving.py
View file @
143e4dcc
...
@@ -506,6 +506,7 @@ class OpenAIServingRender:
...
@@ -506,6 +506,7 @@ class OpenAIServingRender:
(ResponsesRequest not supported here); TODO comment dropped accordingly.
(ResponsesRequest not supported here); TODO comment dropped accordingly.
"""
"""
renderer
=
self
.
renderer
renderer
=
self
.
renderer
mm_config
=
self
.
model_config
.
multimodal_config
default_template_kwargs
=
merge_kwargs
(
default_template_kwargs
=
merge_kwargs
(
default_template_kwargs
,
default_template_kwargs
,
...
@@ -518,7 +519,11 @@ class OpenAIServingRender:
...
@@ -518,7 +519,11 @@ class OpenAIServingRender:
tok_params
=
request
.
build_tok_params
(
self
.
model_config
)
tok_params
=
request
.
build_tok_params
(
self
.
model_config
)
chat_params
=
request
.
build_chat_params
(
chat_params
=
request
.
build_chat_params
(
default_template
,
default_template_content_format
default_template
,
default_template_content_format
).
with_defaults
(
default_template_kwargs
)
).
with_defaults
(
default_template_kwargs
,
default_media_io_kwargs
=
(
mm_config
.
media_io_kwargs
if
mm_config
else
None
),
default_mm_processor_kwargs
=
getattr
(
request
,
"mm_processor_kwargs"
,
None
),
)
(
conversation
,),
(
engine_prompt
,)
=
await
renderer
.
render_chat_async
(
(
conversation
,),
(
engine_prompt
,)
=
await
renderer
.
render_chat_async
(
[
messages
],
[
messages
],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment