"vscode:/vscode.git/clone" did not exist on "7feae92c1fa44a979e0d449927a0282e73030f19"
Unverified Commit 935c46dd authored by Nick Cao's avatar Nick Cao Committed by GitHub
Browse files

[Model] Add Granite 4.0 1B speech to supported models (#38019)


Signed-off-by: default avatarNick Cao <ncao@redhat.com>
parent 057fc94c
...@@ -658,7 +658,7 @@ Speech2Text models trained specifically for Automatic Speech Recognition. ...@@ -658,7 +658,7 @@ Speech2Text models trained specifically for Automatic Speech Recognition.
| `FunASRForConditionalGeneration` | FunASR | `allendou/Fun-ASR-Nano-2512-vllm`, etc. | | | | `FunASRForConditionalGeneration` | FunASR | `allendou/Fun-ASR-Nano-2512-vllm`, etc. | | |
| `Gemma3nForConditionalGeneration` | Gemma3n | `google/gemma-3n-E2B-it`, `google/gemma-3n-E4B-it`, etc. | | | | `Gemma3nForConditionalGeneration` | Gemma3n | `google/gemma-3n-E2B-it`, `google/gemma-3n-E4B-it`, etc. | | |
| `GlmAsrForConditionalGeneration` | GLM-ASR | `zai-org/GLM-ASR-Nano-2512` | ✅︎ | ✅︎ | | `GlmAsrForConditionalGeneration` | GLM-ASR | `zai-org/GLM-ASR-Nano-2512` | ✅︎ | ✅︎ |
| `GraniteSpeechForConditionalGeneration` | Granite Speech | `ibm-granite/granite-speech-3.3-2b`, `ibm-granite/granite-speech-3.3-8b`, etc. | ✅︎ | ✅︎ | | `GraniteSpeechForConditionalGeneration` | Granite Speech | `ibm-granite/granite-4.0-1b-speech`, `ibm-granite/granite-speech-3.3-2b`, etc. | ✅︎ | ✅︎ |
| `Qwen3ASRForConditionalGeneration` | Qwen3-ASR | `Qwen/Qwen3-ASR-1.7B`, etc. | | ✅︎ | | `Qwen3ASRForConditionalGeneration` | Qwen3-ASR | `Qwen/Qwen3-ASR-1.7B`, etc. | | ✅︎ |
| `Qwen3OmniMoeThinkerForConditionalGeneration` | Qwen3-Omni | `Qwen/Qwen3-Omni-30B-A3B-Instruct`, etc. | | ✅︎ | | `Qwen3OmniMoeThinkerForConditionalGeneration` | Qwen3-Omni | `Qwen/Qwen3-Omni-30B-A3B-Instruct`, etc. | | ✅︎ |
| `VoxtralForConditionalGeneration` | Voxtral (Mistral format) | `mistralai/Voxtral-Mini-3B-2507`, `mistralai/Voxtral-Small-24B-2507`, etc. | ✅︎ | ✅︎ | | `VoxtralForConditionalGeneration` | Voxtral (Mistral format) | `mistralai/Voxtral-Mini-3B-2507`, `mistralai/Voxtral-Small-24B-2507`, etc. | ✅︎ | ✅︎ |
......
...@@ -29,10 +29,13 @@ def vllm_to_hf_output( ...@@ -29,10 +29,13 @@ def vllm_to_hf_output(
MODEL_NAME = "ibm-granite/granite-speech-3.3-2b" MODEL_NAME = "ibm-granite/granite-speech-3.3-2b"
# Audio lora co-exists directly in the model directory, but MODEL_NAME_4_0 = "ibm-granite/granite-4.0-1b-speech"
# currently still needs to be passed directly to vLLM. # Audio lora co-exists directly in the 3.3 model directory,
audio_lora_path = MODEL_NAME # the 4.0 model has adapters merged into the weights.
models = [MODEL_NAME] models: dict[str, str | None] = {
MODEL_NAME: MODEL_NAME,
MODEL_NAME_4_0: None,
}
@pytest.fixture @pytest.fixture
...@@ -60,6 +63,7 @@ def run_test( ...@@ -60,6 +63,7 @@ def run_test(
tensor_parallel_size: int, tensor_parallel_size: int,
distributed_executor_backend: str | None = None, distributed_executor_backend: str | None = None,
attention_config: dict | None = None, attention_config: dict | None = None,
audio_lora_path: str | None = None,
): ):
"""Inference result should be the same between hf and vllm. """Inference result should be the same between hf and vllm.
...@@ -84,12 +88,14 @@ def run_test( ...@@ -84,12 +88,14 @@ def run_test(
limit_mm_per_prompt={"audio": 1}, limit_mm_per_prompt={"audio": 1},
tensor_parallel_size=tensor_parallel_size, tensor_parallel_size=tensor_parallel_size,
distributed_executor_backend=distributed_executor_backend, distributed_executor_backend=distributed_executor_backend,
enable_lora=True, enable_lora=audio_lora_path is not None,
max_lora_rank=64, max_lora_rank=64,
enforce_eager=True, enforce_eager=True,
attention_config=attention_config, attention_config=attention_config,
) as vllm_model: ) as vllm_model:
lora_request = LoRARequest("audio", 1, audio_lora_path) lora_request = (
LoRARequest("audio", 1, audio_lora_path) if audio_lora_path else None
)
vllm_outputs_per_case = [ vllm_outputs_per_case = [
vllm_model.generate_greedy_logprobs( vllm_model.generate_greedy_logprobs(
prompts, prompts,
...@@ -125,7 +131,7 @@ def run_test( ...@@ -125,7 +131,7 @@ def run_test(
) )
@pytest.mark.parametrize("model", models) @pytest.mark.parametrize("model,audio_lora_path", models.items())
@pytest.mark.parametrize( @pytest.mark.parametrize(
"dtype", ["float16"] if current_platform.is_rocm() else ["bfloat16"] "dtype", ["float16"] if current_platform.is_rocm() else ["bfloat16"]
) )
...@@ -138,6 +144,7 @@ def test_models( ...@@ -138,6 +144,7 @@ def test_models(
hf_runner, hf_runner,
vllm_runner, vllm_runner,
model: str, model: str,
audio_lora_path: str | None,
audio_assets: AudioTestAssets, audio_assets: AudioTestAssets,
granite_speech_attention_config, granite_speech_attention_config,
dtype: str, dtype: str,
...@@ -167,4 +174,5 @@ def test_models( ...@@ -167,4 +174,5 @@ def test_models(
num_logprobs=num_logprobs, num_logprobs=num_logprobs,
tensor_parallel_size=1, tensor_parallel_size=1,
attention_config=granite_speech_attention_config, attention_config=granite_speech_attention_config,
audio_lora_path=audio_lora_path,
) )
...@@ -810,7 +810,8 @@ _MULTIMODAL_EXAMPLE_MODELS = { ...@@ -810,7 +810,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
), ),
"GraniteVision": _HfExamplesInfo("ibm-granite/granite-vision-3.3-2b"), "GraniteVision": _HfExamplesInfo("ibm-granite/granite-vision-3.3-2b"),
"GraniteSpeechForConditionalGeneration": _HfExamplesInfo( "GraniteSpeechForConditionalGeneration": _HfExamplesInfo(
"ibm-granite/granite-speech-3.3-2b" "ibm-granite/granite-speech-3.3-2b",
extras={"4.0-1b": "ibm-granite/granite-4.0-1b-speech"},
), ),
"GLM4VForCausalLM": _HfExamplesInfo( "GLM4VForCausalLM": _HfExamplesInfo(
"zai-org/glm-4v-9b", "zai-org/glm-4v-9b",
......
...@@ -75,12 +75,14 @@ from .utils import AutoWeightsLoader, init_vllm_registered_model, maybe_prefix ...@@ -75,12 +75,14 @@ from .utils import AutoWeightsLoader, init_vllm_registered_model, maybe_prefix
# NOTE lang support is based on what is written here: # NOTE lang support is based on what is written here:
# https://huggingface.co/ibm-granite/granite-speech-3.3-2b # https://huggingface.co/ibm-granite/granite-speech-3.3-2b
# https://huggingface.co/ibm-granite/granite-4.0-1b-speech
# Though this may vary from model to model, and also many langs # Though this may vary from model to model, and also many langs
# work pretty well with zero shot. # work pretty well with zero shot.
ISO639_1_SUPPORTED_LANGS = { ISO639_1_SUPPORTED_LANGS = {
"en": "English", "en": "English",
"fr": "French", "fr": "French",
"de": "German", "de": "German",
"ja": "Japanese",
"pt": "Portuguese", "pt": "Portuguese",
"es": "Spanish", "es": "Spanish",
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment