[Bug] Fix usage of `.transpose()` and `.view()` consecutively. (#11979)

9dd02d85 · Siyuan Li · GitHub · f7b3ba82 · 9dd02d85 · 9dd02d85
Unverified Commit 9dd02d85 authored Jan 13, 2025 by Siyuan Li Committed by GitHub Jan 13, 2025
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

vllm/attention/layer.py vllm/attention/layer.py +1 -1

vllm/model_executor/models/intern_vit.py vllm/model_executor/models/intern_vit.py +1 -1

No files found.
--- a/vllm/attention/layer.py
+++ b/vllm/attention/layer.py
@@ -230,7 +230,7 @@ class MultiHeadAttention(nn.Module):
                                                 value,
                                                 scale=self.scale)
            out = out.transpose(1, 2)
-        return out.view(bsz, q_len, -1)
+        return out.reshape(bsz, q_len, -1)
 def unified_attention(

--- a/vllm/model_executor/models/intern_vit.py
+++ b/vllm/model_executor/models/intern_vit.py
@@ -271,7 +271,7 @@ class InternSdpaAttention(nn.Module):
        v = v.transpose(1, 2)
        x = F.scaled_dot_product_attention(q, k, v, scale=self.scale)
-        x = x.transpose(1, 2).view(B, N, -1)
+        x = x.transpose(1, 2).reshape(B, N, -1)
        x = self.proj(x)
        return x