Unverified Commit 2ed68d7a authored by Francis's avatar Francis Committed by GitHub
Browse files

[PD Disaggregation] replace transfer with batch transfer for better performance (#7236)

parent e984d507
......@@ -251,17 +251,19 @@ class MooncakeKVManager(BaseKVManager):
# Worker function for processing a single layer
def process_layer(src_ptr: int, dst_ptr: int, item_len: int) -> int:
src_addr_list = []
dst_addr_list = []
length_list = []
for prefill_index, decode_index in zip(prefill_kv_blocks, dst_kv_blocks):
src_addr = src_ptr + int(prefill_index[0]) * item_len
dst_addr = dst_ptr + int(decode_index[0]) * item_len
length = item_len * len(prefill_index)
status = self.engine.transfer_sync(
mooncake_session_id, src_addr, dst_addr, length
)
if status != 0:
return status
return 0
src_addr_list.append(src_addr)
dst_addr_list.append(dst_addr)
length_list.append(length)
return self.engine.batch_transfer_sync(
mooncake_session_id, src_addr_list, dst_addr_list, length_list
)
futures = [
executor.submit(
......
import json
import logging
from dataclasses import dataclass
from typing import Optional
from typing import List, Optional
logger = logging.getLogger(__name__)
......@@ -90,5 +90,29 @@ class MooncakeTransferEngine:
return ret
def batch_transfer_sync(
self,
session_id: str,
buffers: List[int],
peer_buffer_addresses: List[int],
lengths: List[int],
) -> int:
"""Synchronously transfer data to the specified address."""
try:
ret = self.engine.batch_transfer_sync_write(
session_id, buffers, peer_buffer_addresses, lengths
)
except Exception:
ret = -1
if ret < 0:
logger.debug(
"Failed to batch transfer data. Buffers: %s, Session: %s, Peer addresses: %s",
buffers,
session_id,
peer_buffer_addresses,
)
return ret
def get_session_id(self):
return self.session_id
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment