fix(disagg): fix sending KV cache in case of MLA for NIXL backend (#10673)

2b7417bf · dmitrygx · GitHub · f1116495 · 2b7417bf
Unverified Commit 2b7417bf authored Sep 20, 2025 by dmitrygx Committed by GitHub Sep 20, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

python/sglang/srt/disaggregation/nixl/conn.py python/sglang/srt/disaggregation/nixl/conn.py +1 -1

No files found.
--- a/python/sglang/srt/disaggregation/nixl/conn.py
+++ b/python/sglang/srt/disaggregation/nixl/conn.py
@@ -547,7 +547,7 @@ class NixlKVManager(CommonKVManager):
            notif = "_".join([str(req.room), "kv", str(chunk_id), str(int(is_last))])
            decode_tp_size = self.decode_kv_args_table[req.agent_name].decode_tp_size
-            if decode_tp_size == self.attn_tp_size:
+            if self.is_mla_backend or (decode_tp_size == self.attn_tp_size):
                kv_xfer_handle = self.send_kvcache(
                    req.agent_name,
                    kv_indices,