[Bugfix] Fix granite speech shape validation (#21762)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

[Bugfix] Fix granite speech shape validation (#21762)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
e17a4d3b · Cyrus Leung · GitHub · ec261b02 · e17a4d3b
Unverified Commit e17a4d3b authored Jul 29, 2025 by Cyrus Leung Committed by GitHub Jul 28, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 3 deletions

vllm/model_executor/models/granite_speech.py vllm/model_executor/models/granite_speech.py +4 -3

No files found.
--- a/vllm/model_executor/models/granite_speech.py
+++ b/vllm/model_executor/models/granite_speech.py
@@ -64,14 +64,15 @@ class GraniteSpeechAudioInputs(TensorSchema):
    
    Dimensions:
        - b: Batch size
-        - nf: Number of audio features (variable length)
+        - fi: Number of input features from the Mel spectrogram.
+        - fo: Number of output features, i.e. the embedding size.
        - 160: Fixed feature dimension for Mel spectrogram features
    """

-    input_features: Annotated[torch.Tensor, TensorShape("b", "nf", 160)]
+    input_features: Annotated[torch.Tensor, TensorShape("b", "fi", 160)]
    """Audio input features."""

-    input_features_mask: Annotated[torch.Tensor, TensorShape("b", "nf")]
+    input_features_mask: Annotated[torch.Tensor, TensorShape("b", "fo")]
    """Mask for variable length audio features."""

    audio_embed_sizes: Annotated[list[int], TensorShape("b")]