Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
935c46dd
Unverified
Commit
935c46dd
authored
Mar 24, 2026
by
Nick Cao
Committed by
GitHub
Mar 24, 2026
Browse files
[Model] Add Granite 4.0 1B speech to supported models (#38019)
Signed-off-by:
Nick Cao
<
ncao@redhat.com
>
parent
057fc94c
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
20 additions
and
9 deletions
+20
-9
docs/models/supported_models.md
docs/models/supported_models.md
+1
-1
tests/models/multimodal/generation/test_granite_speech.py
tests/models/multimodal/generation/test_granite_speech.py
+15
-7
tests/models/registry.py
tests/models/registry.py
+2
-1
vllm/model_executor/models/granite_speech.py
vllm/model_executor/models/granite_speech.py
+2
-0
No files found.
docs/models/supported_models.md
View file @
935c46dd
...
...
@@ -658,7 +658,7 @@ Speech2Text models trained specifically for Automatic Speech Recognition.
|
`FunASRForConditionalGeneration`
| FunASR |
`allendou/Fun-ASR-Nano-2512-vllm`
, etc. | | |
|
`Gemma3nForConditionalGeneration`
| Gemma3n |
`google/gemma-3n-E2B-it`
,
`google/gemma-3n-E4B-it`
, etc. | | |
|
`GlmAsrForConditionalGeneration`
| GLM-ASR |
`zai-org/GLM-ASR-Nano-2512`
| ✅︎ | ✅︎ |
|
`GraniteSpeechForConditionalGeneration`
| Granite Speech |
`ibm-granite/granite-
speech-3.3-2b
`
,
`ibm-granite/granite-speech-3.3-
8
b`
, etc. | ✅︎ | ✅︎ |
|
`GraniteSpeechForConditionalGeneration`
| Granite Speech |
`ibm-granite/granite-
4.0-1b-speech
`
,
`ibm-granite/granite-speech-3.3-
2
b`
, etc. | ✅︎ | ✅︎ |
|
`Qwen3ASRForConditionalGeneration`
| Qwen3-ASR |
`Qwen/Qwen3-ASR-1.7B`
, etc. | | ✅︎ |
|
`Qwen3OmniMoeThinkerForConditionalGeneration`
| Qwen3-Omni |
`Qwen/Qwen3-Omni-30B-A3B-Instruct`
, etc. | | ✅︎ |
|
`VoxtralForConditionalGeneration`
| Voxtral (Mistral format) |
`mistralai/Voxtral-Mini-3B-2507`
,
`mistralai/Voxtral-Small-24B-2507`
, etc. | ✅︎ | ✅︎ |
...
...
tests/models/multimodal/generation/test_granite_speech.py
View file @
935c46dd
...
...
@@ -29,10 +29,13 @@ def vllm_to_hf_output(
MODEL_NAME
=
"ibm-granite/granite-speech-3.3-2b"
# Audio lora co-exists directly in the model directory, but
# currently still needs to be passed directly to vLLM.
audio_lora_path
=
MODEL_NAME
models
=
[
MODEL_NAME
]
MODEL_NAME_4_0
=
"ibm-granite/granite-4.0-1b-speech"
# Audio lora co-exists directly in the 3.3 model directory,
# the 4.0 model has adapters merged into the weights.
models
:
dict
[
str
,
str
|
None
]
=
{
MODEL_NAME
:
MODEL_NAME
,
MODEL_NAME_4_0
:
None
,
}
@
pytest
.
fixture
...
...
@@ -60,6 +63,7 @@ def run_test(
tensor_parallel_size
:
int
,
distributed_executor_backend
:
str
|
None
=
None
,
attention_config
:
dict
|
None
=
None
,
audio_lora_path
:
str
|
None
=
None
,
):
"""Inference result should be the same between hf and vllm.
...
...
@@ -84,12 +88,14 @@ def run_test(
limit_mm_per_prompt
=
{
"audio"
:
1
},
tensor_parallel_size
=
tensor_parallel_size
,
distributed_executor_backend
=
distributed_executor_backend
,
enable_lora
=
Tru
e
,
enable_lora
=
audio_lora_path
is
not
Non
e
,
max_lora_rank
=
64
,
enforce_eager
=
True
,
attention_config
=
attention_config
,
)
as
vllm_model
:
lora_request
=
LoRARequest
(
"audio"
,
1
,
audio_lora_path
)
lora_request
=
(
LoRARequest
(
"audio"
,
1
,
audio_lora_path
)
if
audio_lora_path
else
None
)
vllm_outputs_per_case
=
[
vllm_model
.
generate_greedy_logprobs
(
prompts
,
...
...
@@ -125,7 +131,7 @@ def run_test(
)
@
pytest
.
mark
.
parametrize
(
"model"
,
models
)
@
pytest
.
mark
.
parametrize
(
"model
,audio_lora_path
"
,
models
.
items
()
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float16"
]
if
current_platform
.
is_rocm
()
else
[
"bfloat16"
]
)
...
...
@@ -138,6 +144,7 @@ def test_models(
hf_runner
,
vllm_runner
,
model
:
str
,
audio_lora_path
:
str
|
None
,
audio_assets
:
AudioTestAssets
,
granite_speech_attention_config
,
dtype
:
str
,
...
...
@@ -167,4 +174,5 @@ def test_models(
num_logprobs
=
num_logprobs
,
tensor_parallel_size
=
1
,
attention_config
=
granite_speech_attention_config
,
audio_lora_path
=
audio_lora_path
,
)
tests/models/registry.py
View file @
935c46dd
...
...
@@ -810,7 +810,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
),
"GraniteVision"
:
_HfExamplesInfo
(
"ibm-granite/granite-vision-3.3-2b"
),
"GraniteSpeechForConditionalGeneration"
:
_HfExamplesInfo
(
"ibm-granite/granite-speech-3.3-2b"
"ibm-granite/granite-speech-3.3-2b"
,
extras
=
{
"4.0-1b"
:
"ibm-granite/granite-4.0-1b-speech"
},
),
"GLM4VForCausalLM"
:
_HfExamplesInfo
(
"zai-org/glm-4v-9b"
,
...
...
vllm/model_executor/models/granite_speech.py
View file @
935c46dd
...
...
@@ -75,12 +75,14 @@ from .utils import AutoWeightsLoader, init_vllm_registered_model, maybe_prefix
# NOTE lang support is based on what is written here:
# https://huggingface.co/ibm-granite/granite-speech-3.3-2b
# https://huggingface.co/ibm-granite/granite-4.0-1b-speech
# Though this may vary from model to model, and also many langs
# work pretty well with zero shot.
ISO639_1_SUPPORTED_LANGS
=
{
"en"
:
"English"
,
"fr"
:
"French"
,
"de"
:
"German"
,
"ja"
:
"Japanese"
,
"pt"
:
"Portuguese"
,
"es"
:
"Spanish"
,
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment