Minor update regarding issue #9704 (#9733)

66d5d042 · Elfie Guo · GitHub · 73179b76 · 66d5d042
Unverified Commit 66d5d042 authored Sep 03, 2025 by Elfie Guo Committed by GitHub Sep 03, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 3 deletions

python/sglang/srt/models/deepseek_v2.py python/sglang/srt/models/deepseek_v2.py +5 -3

No files found.
--- a/python/sglang/srt/models/deepseek_v2.py
+++ b/python/sglang/srt/models/deepseek_v2.py
@@ -1678,9 +1678,11 @@ class DeepseekV2AttentionMLA(nn.Module):
            latent_cache_buf = forward_batch.token_to_kv_pool.get_key_buffer(
                self.attn_mha.layer_id
            )
-            latent_cache = latent_cache_buf[
+            latent_cache = (
-                forward_batch.prefix_chunk_kv_indices[i]
+                latent_cache_buf[forward_batch.prefix_chunk_kv_indices[i]]
-            ].contiguous()
+                .contiguous()
+                .to(q.dtype)
+            )
            kv_a_normed, k_pe = latent_cache.split(
                [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1