Unverified Commit 2b7417bf authored by dmitrygx's avatar dmitrygx Committed by GitHub
Browse files

fix(disagg): fix sending KV cache in case of MLA for NIXL backend (#10673)

parent f1116495
...@@ -547,7 +547,7 @@ class NixlKVManager(CommonKVManager): ...@@ -547,7 +547,7 @@ class NixlKVManager(CommonKVManager):
notif = "_".join([str(req.room), "kv", str(chunk_id), str(int(is_last))]) notif = "_".join([str(req.room), "kv", str(chunk_id), str(int(is_last))])
decode_tp_size = self.decode_kv_args_table[req.agent_name].decode_tp_size decode_tp_size = self.decode_kv_args_table[req.agent_name].decode_tp_size
if decode_tp_size == self.attn_tp_size: if self.is_mla_backend or (decode_tp_size == self.attn_tp_size):
kv_xfer_handle = self.send_kvcache( kv_xfer_handle = self.send_kvcache(
req.agent_name, req.agent_name,
kv_indices, kv_indices,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment