[Bugfix] Fix RMSNorm forward in InternViT attention qk_layernorm (#6992)

2dd34371 · Isotr0py · GitHub · 7e0861bd · 2dd34371
Unverified Commit 2dd34371 authored Aug 02, 2024 by Isotr0py Committed by GitHub Aug 01, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 4 deletions

vllm/model_executor/models/intern_vit.py vllm/model_executor/models/intern_vit.py +4 -4

No files found.
--- a/vllm/model_executor/models/intern_vit.py
+++ b/vllm/model_executor/models/intern_vit.py
@@ -113,10 +113,10 @@ class InternAttention(nn.Module):
        if self.qk_normalization:
            B_, H_, N_, D_ = q.shape
-            q = self.q_norm(q.transpose(1, 2).flatten(-2, -1)).view(
+            q = self.q_norm.forward_native(q.transpose(1, 2).flatten(
-                B_, N_, H_, D_).transpose(1, 2)
+                -2, -1)).view(B_, N_, H_, D_).transpose(1, 2)
-            k = self.k_norm(k.transpose(1, 2).flatten(-2, -1)).view(
+            k = self.k_norm.forward_native(k.transpose(1, 2).flatten(
-                B_, N_, H_, D_).transpose(1, 2)
+                -2, -1)).view(B_, N_, H_, D_).transpose(1, 2)
        x = F.scaled_dot_product_attention(q, k, v, scale=self.scale)
        x = x.transpose(1, 2).reshape(B, N, C)