[Bugfix] Fix interns1-vit qk norm code path (#27480)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>

[Bugfix] Fix interns1-vit qk norm code path (#27480)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
acc78aeb · Isotr0py · GitHub · 0f67d4d9 · acc78aeb
Unverified Commit acc78aeb authored Oct 25, 2025 by Isotr0py Committed by GitHub Oct 24, 2025
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 4 deletions

vllm/model_executor/models/interns1_vit.py vllm/model_executor/models/interns1_vit.py +3 -4

No files found.
--- a/vllm/model_executor/models/interns1_vit.py
+++ b/vllm/model_executor/models/interns1_vit.py
@@ -217,16 +217,15 @@ class InternSdpaAttention(nn.Module):
        self.attn = MultiHeadAttention(self.num_heads, self.head_dim, self.scale)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        B, N, C = x.shape
+        """x shape: (B, N, C)"""

        q = self.q_proj(x)
        k = self.k_proj(x)
        v = self.v_proj(x)

        if self.qk_normalization:
-            B_, N_, H_, D_ = q.shape
-            q = self.q_norm(q.flatten(-2, -1)).view(B_, N_, H_, D_)
-            k = self.k_norm(k.flatten(-2, -1)).view(B_, N_, H_, D_)
+            q = self.q_norm(q)
+            k = self.k_norm(k)

        # Use unified MultiHeadAttention with automatic backend selection
        x = self.attn(q, k, v)