Unverified Commit 194841e3 authored by strgrb's avatar strgrb Committed by GitHub
Browse files

remove kv_a.congigous in DeepseekV2AttentionMLA (#8058)


Co-authored-by: default avatarZhang Kaihong <zhangkaihong.zkh@alibaba-inc.com>
parent ebff5fcb
...@@ -1154,7 +1154,7 @@ class DeepseekV2AttentionMLA(nn.Module): ...@@ -1154,7 +1154,7 @@ class DeepseekV2AttentionMLA(nn.Module):
_, q_pe = q.split([self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1) _, q_pe = q.split([self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1)
kv_a, _ = latent_cache.split([self.kv_lora_rank, self.qk_rope_head_dim], dim=-1) kv_a, _ = latent_cache.split([self.kv_lora_rank, self.qk_rope_head_dim], dim=-1)
latent_cache = latent_cache.unsqueeze(1) latent_cache = latent_cache.unsqueeze(1)
kv_a = self.kv_a_layernorm(kv_a.contiguous()) kv_a = self.kv_a_layernorm(kv_a)
kv = self.kv_b_proj(kv_a)[0] kv = self.kv_b_proj(kv_a)[0]
kv = kv.view(-1, self.num_local_heads, self.qk_nope_head_dim + self.v_head_dim) kv = kv.view(-1, self.num_local_heads, self.qk_nope_head_dim + self.v_head_dim)
k_nope = kv[..., : self.qk_nope_head_dim] k_nope = kv[..., : self.qk_nope_head_dim]
...@@ -1693,7 +1693,7 @@ class DeepseekV2AttentionMLA(nn.Module): ...@@ -1693,7 +1693,7 @@ class DeepseekV2AttentionMLA(nn.Module):
_, q_pe = q.split([self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1) _, q_pe = q.split([self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1)
kv_a, _ = latent_cache.split([self.kv_lora_rank, self.qk_rope_head_dim], dim=-1) kv_a, _ = latent_cache.split([self.kv_lora_rank, self.qk_rope_head_dim], dim=-1)
latent_cache = latent_cache.unsqueeze(1) latent_cache = latent_cache.unsqueeze(1)
kv_a = self.kv_a_layernorm(kv_a.contiguous()) kv_a = self.kv_a_layernorm(kv_a)
kv = self.kv_b_proj(kv_a)[0] kv = self.kv_b_proj(kv_a)[0]
kv = kv.view(-1, self.num_local_heads, self.qk_nope_head_dim + self.v_head_dim) kv = kv.view(-1, self.num_local_heads, self.qk_nope_head_dim + self.v_head_dim)
k_nope = kv[..., : self.qk_nope_head_dim] k_nope = kv[..., : self.qk_nope_head_dim]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment