Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1100a976
Unverified
Commit
1100a976
authored
Feb 12, 2026
by
Patrick von Platen
Committed by
GitHub
Feb 12, 2026
Browse files
[Voxstral Realtime] Enable tests (#33803)
Signed-off-by:
Patrick von Platen
<
patrick.v.platen@gmail.com
>
parent
766e1678
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
22 additions
and
18 deletions
+22
-18
tests/entrypoints/openai/test_realtime_validation.py
tests/entrypoints/openai/test_realtime_validation.py
+1
-11
tests/models/multimodal/generation/test_voxtral_realtime.py
tests/models/multimodal/generation/test_voxtral_realtime.py
+0
-2
tests/models/multimodal/processing/test_common.py
tests/models/multimodal/processing/test_common.py
+7
-0
tests/models/registry.py
tests/models/registry.py
+4
-5
vllm/model_executor/models/voxtral.py
vllm/model_executor/models/voxtral.py
+10
-0
No files found.
tests/entrypoints/openai/test_realtime_validation.py
View file @
1100a976
...
@@ -27,15 +27,6 @@ MISTRAL_FORMAT_ARGS = [
...
@@ -27,15 +27,6 @@ MISTRAL_FORMAT_ARGS = [
MODEL_NAME
=
"mistralai/Voxtral-Mini-4B-Realtime-2602"
MODEL_NAME
=
"mistralai/Voxtral-Mini-4B-Realtime-2602"
def
_audio_to_base64_pcm16
(
path
:
str
,
target_sr
:
int
=
16000
)
->
str
:
"""Load audio file, convert to PCM16 @ target sample rate, base64 encode."""
audio
,
_
=
librosa
.
load
(
path
,
sr
=
target_sr
,
mono
=
True
)
# Convert float32 [-1, 1] to int16 [-32768, 32767]
audio_int16
=
(
audio
*
32767
).
astype
(
np
.
int16
)
audio_bytes
=
audio_int16
.
tobytes
()
return
base64
.
b64encode
(
audio_bytes
).
decode
(
"utf-8"
)
def
_get_websocket_url
(
server
:
RemoteOpenAIServer
)
->
str
:
def
_get_websocket_url
(
server
:
RemoteOpenAIServer
)
->
str
:
"""Convert HTTP URL to WebSocket URL for realtime endpoint."""
"""Convert HTTP URL to WebSocket URL for realtime endpoint."""
http_url
=
server
.
url_root
http_url
=
server
.
url_root
...
@@ -74,12 +65,11 @@ def mary_had_lamb_audio_chunks() -> list[str]:
...
@@ -74,12 +65,11 @@ def mary_had_lamb_audio_chunks() -> list[str]:
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
@
pytest
.
mark
.
skip
(
reason
=
"Voxtral streaming is not yet public"
)
async
def
test_multi_chunk_streaming
(
async
def
test_multi_chunk_streaming
(
model_name
,
mary_had_lamb_audio_chunks
,
rocm_aiter_fa_attention
model_name
,
mary_had_lamb_audio_chunks
,
rocm_aiter_fa_attention
):
):
"""Test streaming multiple audio chunks before committing."""
"""Test streaming multiple audio chunks before committing."""
server_args
=
[
"--enforce-eager"
]
server_args
=
[
"--enforce-eager"
,
"--max-model-len"
,
"2048"
]
if
model_name
.
startswith
(
"mistralai"
):
if
model_name
.
startswith
(
"mistralai"
):
server_args
+=
MISTRAL_FORMAT_ARGS
server_args
+=
MISTRAL_FORMAT_ARGS
...
...
tests/models/multimodal/generation/test_voxtral_realtime.py
View file @
1100a976
...
@@ -73,7 +73,6 @@ def async_engine() -> AsyncLLM:
...
@@ -73,7 +73,6 @@ def async_engine() -> AsyncLLM:
return
AsyncLLM
.
from_engine_args
(
engine_args
)
return
AsyncLLM
.
from_engine_args
(
engine_args
)
@
pytest
.
mark
.
skip
(
reason
=
"Voxtral streaming is not yet public"
)
def
test_voxtral_realtime_forward
(
audio_assets
,
tokenizer
,
engine
):
def
test_voxtral_realtime_forward
(
audio_assets
,
tokenizer
,
engine
):
audio_config
=
tokenizer
.
instruct_tokenizer
.
tokenizer
.
audio
audio_config
=
tokenizer
.
instruct_tokenizer
.
tokenizer
.
audio
...
@@ -218,7 +217,6 @@ class RealTimeAudioInput:
...
@@ -218,7 +217,6 @@ class RealTimeAudioInput:
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
skip
(
reason
=
"Voxtral streaming is not yet public"
)
async
def
test_voxtral_realtime_generator
(
audio_assets
,
tokenizer
,
async_engine
):
async
def
test_voxtral_realtime_generator
(
audio_assets
,
tokenizer
,
async_engine
):
sampling_params
=
SamplingParams
(
temperature
=
0.0
,
max_tokens
=
1
)
sampling_params
=
SamplingParams
(
temperature
=
0.0
,
max_tokens
=
1
)
...
...
tests/models/multimodal/processing/test_common.py
View file @
1100a976
...
@@ -441,6 +441,13 @@ def test_processing_correctness(
...
@@ -441,6 +441,13 @@ def test_processing_correctness(
"Qwen-VL tokenizer requires downloading a font file from "
"Qwen-VL tokenizer requires downloading a font file from "
"servers that often refuse connections in CI"
"servers that often refuse connections in CI"
)
)
if
model_id
==
"mistralai/Voxtral-Mini-4B-Realtime-2602"
:
pytest
.
skip
(
"Voxtral Realtime doesn't make use of any place-holder"
"tokens and hence cannot pass the processing "
"correctness test as is. Let's revisit adapting this "
"test once more realtime models exist."
)
if
model_id
==
"internlm/Intern-S1-Pro"
:
if
model_id
==
"internlm/Intern-S1-Pro"
:
# FIXME(Isotr0py): Fix later.
# FIXME(Isotr0py): Fix later.
pytest
.
skip
(
"Tokenization issue. Fix later"
)
pytest
.
skip
(
"Tokenization issue. Fix later"
)
...
...
tests/models/registry.py
View file @
1100a976
...
@@ -1031,13 +1031,12 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -1031,13 +1031,12 @@ _MULTIMODAL_EXAMPLE_MODELS = {
),
),
"VoxtralForConditionalGeneration"
:
_HfExamplesInfo
(
"VoxtralForConditionalGeneration"
:
_HfExamplesInfo
(
"mistralai/Voxtral-Mini-3B-2507"
,
"mistralai/Voxtral-Mini-3B-2507"
,
# disable this temporarily until we support HF format
tokenizer_mode
=
"mistral"
,
is_available_online
=
False
,
),
),
"VoxtralRealtimeGeneration"
:
_HfExamplesInfo
(
"VoxtralRealtimeGeneration"
:
_HfExamplesInfo
(
"
<place-holder>
"
,
"
mistralai/Voxtral-Mini-4B-Realtime-2602
"
,
# disable this temporarily until we support HF format
enforce_eager
=
True
,
is_available_online
=
False
,
tokenizer_mode
=
"mistral"
,
),
),
# [Encoder-decoder]
# [Encoder-decoder]
"NemotronParseForConditionalGeneration"
:
_HfExamplesInfo
(
"NemotronParseForConditionalGeneration"
:
_HfExamplesInfo
(
...
...
vllm/model_executor/models/voxtral.py
View file @
1100a976
...
@@ -54,6 +54,7 @@ from vllm.multimodal.processing.processor import (
...
@@ -54,6 +54,7 @@ from vllm.multimodal.processing.processor import (
BaseMultiModalProcessor
,
BaseMultiModalProcessor
,
BaseProcessingInfo
,
BaseProcessingInfo
,
MultiModalProcessingInfo
,
MultiModalProcessingInfo
,
PlaceholderFeaturesInfo
,
PromptReplacement
,
PromptReplacement
,
PromptUpdate
,
PromptUpdate
,
)
)
...
@@ -283,6 +284,15 @@ class VoxtralMultiModalProcessor(BaseMultiModalProcessor[VoxtralProcessingInfo])
...
@@ -283,6 +284,15 @@ class VoxtralMultiModalProcessor(BaseMultiModalProcessor[VoxtralProcessingInfo])
)
->
Mapping
[
str
,
MultiModalFieldConfig
]:
)
->
Mapping
[
str
,
MultiModalFieldConfig
]:
return
dict
(
audio_arrays
=
MultiModalFieldConfig
.
batched
(
"audio"
))
return
dict
(
audio_arrays
=
MultiModalFieldConfig
.
batched
(
"audio"
))
def
_validate_mm_placeholders
(
self
,
mm_placeholders
:
Mapping
[
str
,
list
[
PlaceholderFeaturesInfo
]],
mm_item_counts
:
Mapping
[
str
,
int
],
)
->
None
:
# mistral_common's tokenizer's does not follow HF's placeholder norms
# skip validation here
...
def
_get_prompt_updates
(
def
_get_prompt_updates
(
self
,
self
,
mm_items
:
MultiModalDataItems
,
mm_items
:
MultiModalDataItems
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment