"vllm/vscode:/vscode.git/clone" did not exist on "9909726d2a30d834d97efd7bf1c4fc0e52fa48b5"
Unverified Commit 9a1f1da5 authored by Shane A's avatar Shane A Committed by GitHub
Browse files

[Bugfix][Model] OLMo 2: split qkv correctly for GQA and MQA (#13687)

parent 68d630a0
......@@ -157,7 +157,7 @@ class Olmo2Attention(nn.Module):
attn_metadata: AttentionMetadata,
) -> torch.Tensor:
qkv, _ = self.qkv_proj(hidden_states)
q, k, v = qkv.chunk(chunks=3, dim=-1)
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
q, k = self._apply_qk_norm(q, k)
q, k = self.rotary_emb(positions, q, k)
attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment