Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
57e9bf18
Unverified
Commit
57e9bf18
authored
Dec 13, 2025
by
Nicolò Lucchesi
Committed by
GitHub
Dec 13, 2025
Browse files
[CI] Whisper logprobs tests (#30504)
Signed-off-by:
NickLucche
<
nlucches@redhat.com
>
parent
2f32a68d
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
134 additions
and
113 deletions
+134
-113
tests/conftest.py
tests/conftest.py
+7
-1
tests/models/multimodal/generation/test_whisper.py
tests/models/multimodal/generation/test_whisper.py
+123
-111
tests/models/registry.py
tests/models/registry.py
+4
-1
No files found.
tests/conftest.py
View file @
57e9bf18
...
@@ -702,10 +702,16 @@ class HfRunner:
...
@@ -702,10 +702,16 @@ class HfRunner:
**
kwargs
,
**
kwargs
,
)
)
# Encoder-decoder models return decoder_hidden_states instead of
# hidden_states
hidden_states
=
(
getattr
(
output
,
"hidden_states"
,
None
)
or
output
.
decoder_hidden_states
)
(
(
seq_logprobs_lst
,
seq_logprobs_lst
,
output_len
,
output_len
,
)
=
self
.
_hidden_states_to_logprobs
(
output
.
hidden_states
,
num_logprobs
)
)
=
self
.
_hidden_states_to_logprobs
(
hidden_states
,
num_logprobs
)
all_logprobs
.
append
(
seq_logprobs_lst
)
all_logprobs
.
append
(
seq_logprobs_lst
)
seq_ids
=
output
.
sequences
[
0
]
seq_ids
=
output
.
sequences
[
0
]
...
...
tests/models/multimodal/generation/test_whisper.py
View file @
57e9bf18
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
collections.abc
import
Sequence
from
typing
import
Any
import
librosa
import
pytest
import
pytest
from
transformers
import
AutoModelForSpeechSeq2Seq
from
vllm
import
SamplingParams
from
vllm.assets.audio
import
AudioAsset
from
vllm.assets.audio
import
AudioAsset
from
vllm.platforms
import
current_platform
from
....conftest
import
VllmRunner
from
....conftest
import
HfRunner
,
PromptAudioInput
,
VllmRunner
from
....utils
import
create_new_process_for_each_test
,
multi_gpu_test
from
....utils
import
create_new_process_for_each_test
,
multi_gpu_test
from
...registry
import
HF_EXAMPLE_MODELS
from
...utils
import
check_logprobs_close
VLLM_PROMPT
=
"<|startoftranscript|><|en|><|transcribe|><|notimestamps|>"
HF_PROMPT
=
""
# Whisper expects 16kHz audio
WHISPER_SAMPLE_RATE
=
16000
PROMPTS
=
[
{
@
pytest
.
fixture
(
autouse
=
True
)
"prompt"
:
"<|startoftranscript|><|en|><|transcribe|><|notimestamps|>"
,
def
use_spawn_for_whisper
(
monkeypatch
):
"multi_modal_data"
:
{
"""Whisper has issues with forked workers, use spawn instead."""
"audio"
:
AudioAsset
(
"mary_had_lamb"
).
audio_and_sample_rate
,
monkeypatch
.
setenv
(
"VLLM_WORKER_MULTIPROC_METHOD"
,
"spawn"
)
},
},
{
# Test explicit encoder/decoder prompt
"encoder_prompt"
:
{
"prompt"
:
""
,
"multi_modal_data"
:
{
"audio"
:
AudioAsset
(
"winning_call"
).
audio_and_sample_rate
,
},
},
"decoder_prompt"
:
"<|startoftranscript|><|en|><|transcribe|><|notimestamps|>"
,
},
]
EXPECTED
=
{
"openai/whisper-tiny"
:
[
" He has birth words I spoke in the original corner of that. And a"
" little piece of black coat poetry. Mary had a little sandwich,"
" sweet, with white and snow. And everyone had it very went the last"
" would sure to go."
,
" >> And the old one, fit John the way to Edgar Martinez. >> One more"
" to line down the field line for our base camp. Here comes joy. Here"
" is June and the third base. They're going to wave him in. The throw"
" to the plate will be late. The Mariners are going to play for the"
" American League Championship. I don't believe it. It just continues"
" by all five."
,
],
"openai/whisper-small"
:
[
" The first words I spoke in the original pornograph. A little piece"
" of practical poetry. Mary had a little lamb, its fleece was quite a"
" slow, and everywhere that Mary went the lamb was sure to go."
,
" And the old one pitch on the way to Edgar Martinez one month. Here"
" comes joy. Here is Junior to third base. They're gonna wave him"
" in. The throw to the plate will be late. The Mariners are going to"
" play for the American League Championship. I don't believe it. It"
" just continues. My, oh my."
,
],
"openai/whisper-medium"
:
[
" The first words I spoke in the original phonograph, a little piece"
" of practical poetry. Mary had a little lamb, its fleece was quite as"
" slow, and everywhere that Mary went the lamb was sure to go."
,
" And the 0-1 pitch on the way to Edgar Martinez swung on the line"
" down the left field line for Obeyshev. Here comes Joy. Here is"
" Jorgen at third base. They're going to wave him in. The throw to the"
" plate will be late. The Mariners are going to play for the American"
" League Championship. I don't believe it. It just continues. My, oh"
" my."
,
],
"openai/whisper-large-v3"
:
[
" The first words I spoke in the original phonograph, a little piece"
" of practical poetry. Mary had a little lamb, its feet were quite as"
" slow, and everywhere that Mary went, the lamb was sure to go."
,
" And the 0-1 pitch on the way to Edgar Martinez. Swung on the line."
" Now the left field line for a base hit. Here comes Joy. Here is"
" Junior to third base. They're going to wave him in. The throw to the"
" plate will be late. The Mariners are going to play for the American"
" League Championship. I don't believe it. It just continues. My, oh,"
" my."
,
],
"openai/whisper-large-v3-turbo"
:
[
" The first words I spoke in the original phonograph, a little piece"
" of practical poetry. Mary had a little lamb, its streets were quite"
" as slow, and everywhere that Mary went the lamb was sure to go."
,
" And the 0-1 pitch on the way to Edgar Martinez. Swung on the line"
" down the left field line for a base hit. Here comes Joy. Here is"
" Junior to third base. They're going to wave him in. The throw to the"
" plate will be late. The Mariners are going to play for the American"
" League Championship. I don't believe it. It just continues. My, oh,"
" my."
,
],
}
def
run_test
(
def
run_test
(
hf_runner
:
type
[
HfRunner
],
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
inputs
:
Sequence
[
tuple
[
list
[
str
],
list
[
str
],
PromptAudioInput
]],
model
:
str
,
model
:
str
,
*
,
*
,
max_model_len
:
int
,
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
,
tensor_parallel_size
:
int
,
tensor_parallel_size
:
int
,
distributed_executor_backend
:
str
|
None
=
None
,
distributed_executor_backend
:
str
|
None
=
None
,
dtype
:
str
=
"half"
,
enforce_eager
:
bool
=
True
,
)
->
None
:
)
->
None
:
prompt_list
=
PROMPTS
*
10
"""Inference result should be the same between hf and vllm.
expected_list
=
EXPECTED
[
model
]
*
10
All the audio fixtures for the test are from AudioAsset.
For huggingface runner, we provide the audio as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
"""
with
vllm_runner
(
with
vllm_runner
(
model
,
model
,
dtype
=
dtype
,
dtype
=
dtype
,
max_model_len
=
448
,
max_model_len
=
max_model_len
,
tensor_parallel_size
=
tensor_parallel_size
,
tensor_parallel_size
=
tensor_parallel_size
,
distributed_executor_backend
=
distributed_executor_backend
,
distributed_executor_backend
=
distributed_executor_backend
,
# TODO (NickLucche) figure out output differences with non-eager and re-enable
limit_mm_per_prompt
=
{
"audio"
:
2
},
enforce_eager
=
True
,
enforce_eager
=
enforce_eager
,
disable_custom_all_reduce
=
True
,
)
as
vllm_model
:
)
as
vllm_model
:
llm
=
vllm_model
.
llm
vllm_outputs_per_case
=
[
vllm_model
.
generate_greedy_logprobs
(
sampling_params
=
SamplingParams
(
vllm_prompts
,
temperature
=
0
,
max_tokens
,
top_p
=
1.0
,
num_logprobs
=
num_logprobs
,
max_tokens
=
200
,
audios
=
audios
,
)
for
vllm_prompts
,
_
,
audios
in
inputs
]
with
hf_runner
(
model
,
dtype
=
dtype
,
auto_cls
=
AutoModelForSpeechSeq2Seq
)
as
hf_model
:
hf_outputs_per_case
=
[
hf_model
.
generate_greedy_logprobs_limit
(
hf_prompts
,
max_tokens
,
num_logprobs
=
num_logprobs
,
audios
=
audios
,
)
for
_
,
hf_prompts
,
audios
in
inputs
]
for
hf_outputs
,
vllm_outputs
in
zip
(
hf_outputs_per_case
,
vllm_outputs_per_case
):
check_logprobs_close
(
outputs_0_lst
=
hf_outputs
,
outputs_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
)
)
outputs
=
llm
.
generate
(
prompt_list
,
sampling_params
)
for
output
,
expected
in
zip
(
outputs
,
expected_list
):
@
pytest
.
fixture
print
(
output
.
outputs
[
0
].
text
)
def
input_audios
()
->
list
[
tuple
[
list
[
str
],
list
[
str
],
list
[
tuple
[
Any
,
int
]]]]:
assert
output
.
outputs
[
0
].
text
==
expected
audio_assets
=
[
AudioAsset
(
"mary_had_lamb"
),
AudioAsset
(
"winning_call"
)]
inputs
=
[]
for
asset
in
audio_assets
:
audio
,
orig_sr
=
asset
.
audio_and_sample_rate
# Resample to Whisper's expected sample rate (16kHz)
if
orig_sr
!=
WHISPER_SAMPLE_RATE
:
audio
=
librosa
.
resample
(
audio
,
orig_sr
=
orig_sr
,
target_sr
=
WHISPER_SAMPLE_RATE
)
# vLLM prompts, HF prompts, audio inputs
inputs
.
append
(([
VLLM_PROMPT
],
[
HF_PROMPT
],
[(
audio
,
WHISPER_SAMPLE_RATE
)]))
return
inputs
@
pytest
.
mark
.
core_model
def
check_model_available
(
model
:
str
)
->
None
:
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"openai/whisper-large-v3-turbo"
])
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
model_info
.
check_available_online
(
on_fail
=
"skip"
)
@
create_new_process_for_each_test
()
model_info
.
check_transformers_version
(
on_fail
=
"skip"
)
def
test_models
(
vllm_runner
,
model
,
dtype
)
->
None
:
run_test
(
vllm_runner
,
model
,
tensor_parallel_size
=
1
,
dtype
=
dtype
,
)
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
cpu_model
@
pytest
.
mark
.
cpu_model
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"openai/whisper-large-v3-turbo"
])
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"openai/whisper-large-v3-turbo"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
def
test_models_cpu
(
vllm_runner
,
model
,
dtype
)
->
None
:
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
# @create_new_process_for_each_test() does not work for some runners
@
pytest
.
mark
.
parametrize
(
"enforce_eager"
,
[
True
,
False
])
# TODO: to fix cpu privilege issues in run-cpu-test-arm.sh
@
create_new_process_for_each_test
(
"spawn"
)
def
test_models
(
hf_runner
,
vllm_runner
,
model
:
str
,
dtype
:
str
,
num_logprobs
:
int
,
input_audios
,
enforce_eager
:
bool
,
)
->
None
:
check_model_available
(
model
)
if
current_platform
.
is_cpu
()
and
not
enforce_eager
:
pytest
.
skip
(
"Skipping test for CPU with non-eager mode"
)
run_test
(
run_test
(
hf_runner
,
vllm_runner
,
vllm_runner
,
input_audios
,
model
,
model
,
tensor_parallel_size
=
1
,
dtype
=
dtype
,
dtype
=
dtype
,
max_model_len
=
448
,
max_tokens
=
200
,
num_logprobs
=
num_logprobs
,
tensor_parallel_size
=
1
,
enforce_eager
=
enforce_eager
,
)
)
...
@@ -152,15 +148,31 @@ def test_models_cpu(vllm_runner, model, dtype) -> None:
...
@@ -152,15 +148,31 @@ def test_models_cpu(vllm_runner, model, dtype) -> None:
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
core_model
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"openai/whisper-large-v3-turbo"
])
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"openai/whisper-large-v3-turbo"
])
@
pytest
.
mark
.
parametrize
(
"distributed_executor_backend"
,
[
"ray"
,
"mp"
])
@
pytest
.
mark
.
parametrize
(
"distributed_executor_backend"
,
[
"ray"
,
"mp"
])
@
create_new_process_for_each_test
()
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
200
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
@
create_new_process_for_each_test
(
"spawn"
)
def
test_models_distributed
(
def
test_models_distributed
(
hf_runner
,
vllm_runner
,
vllm_runner
,
model
,
model
:
str
,
distributed_executor_backend
,
distributed_executor_backend
:
str
,
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
,
input_audios
,
)
->
None
:
)
->
None
:
check_model_available
(
model
)
run_test
(
run_test
(
hf_runner
,
vllm_runner
,
vllm_runner
,
input_audios
,
model
,
model
,
dtype
=
dtype
,
max_model_len
=
448
,
max_tokens
=
max_tokens
,
num_logprobs
=
num_logprobs
,
tensor_parallel_size
=
2
,
tensor_parallel_size
=
2
,
distributed_executor_backend
=
distributed_executor_backend
,
distributed_executor_backend
=
distributed_executor_backend
,
enforce_eager
=
False
,
)
)
tests/models/registry.py
View file @
57e9bf18
...
@@ -840,7 +840,10 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -840,7 +840,10 @@ _MULTIMODAL_EXAMPLE_MODELS = {
is_available_online
=
False
,
is_available_online
=
False
,
),
),
# [Encoder-decoder]
# [Encoder-decoder]
"WhisperForConditionalGeneration"
:
_HfExamplesInfo
(
"openai/whisper-large-v3"
),
"WhisperForConditionalGeneration"
:
_HfExamplesInfo
(
"openai/whisper-large-v3-turbo"
,
extras
=
{
"v3"
:
"openai/whisper-large-v3"
},
),
# [Cross-encoder]
# [Cross-encoder]
"JinaVLForRanking"
:
_HfExamplesInfo
(
"jinaai/jina-reranker-m0"
),
"JinaVLForRanking"
:
_HfExamplesInfo
(
"jinaai/jina-reranker-m0"
),
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment