Unverified Commit 66d5d042 authored by Elfie Guo's avatar Elfie Guo Committed by GitHub
Browse files

Minor update regarding issue #9704 (#9733)

parent 73179b76
......@@ -1678,9 +1678,11 @@ class DeepseekV2AttentionMLA(nn.Module):
latent_cache_buf = forward_batch.token_to_kv_pool.get_key_buffer(
self.attn_mha.layer_id
)
latent_cache = latent_cache_buf[
forward_batch.prefix_chunk_kv_indices[i]
].contiguous()
latent_cache = (
latent_cache_buf[forward_batch.prefix_chunk_kv_indices[i]]
.contiguous()
.to(q.dtype)
)
kv_a_normed, k_pe = latent_cache.split(
[self.kv_lora_rank, self.qk_rope_head_dim], dim=-1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment