"docs/vscode:/vscode.git/clone" did not exist on "6650e6a930dbdf1cd4def9b58e952376400ccfcf"
Unverified Commit 6287e7fa authored by dtc's avatar dtc Committed by GitHub
Browse files

[P/D] Mooncake: Add unit tests and minor fixes for mooncake connector (#36946)


Signed-off-by: default avatarTianchen Ding <dtcccc@linux.alibaba.com>
parent 84e439a9
This diff is collapsed.
......@@ -100,6 +100,8 @@ def create_vllm_config(
hf_overrides: dict[str, Any] | None = None,
attention_backend: str | None = None,
kv_load_failure_policy: Literal["recompute", "fail"] = "fail",
kv_connector: str = "NixlConnector",
kv_role: str = "kv_both",
) -> VllmConfig:
"""Initialize VllmConfig For Testing."""
model_config = ModelConfig(
......@@ -124,8 +126,8 @@ def create_vllm_config(
enable_prefix_caching=True,
)
kv_transfer_config = KVTransferConfig(
kv_connector="NixlConnector",
kv_role="kv_both",
kv_connector=kv_connector,
kv_role=kv_role,
enable_permute_local_kv=enable_permute_local_kv,
kv_connector_extra_config=kv_connector_extra_config or {},
kv_load_failure_policy=kv_load_failure_policy,
......
......@@ -47,14 +47,17 @@ from vllm.v1.attention.backends.utils import get_kv_cache_layout
from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.request import RequestStatus
logger = init_logger(__name__)
try:
from mooncake.engine import TransferEngine
except ImportError as e:
raise ImportError(
except ImportError:
logger.warning(
"Please install mooncake by following the instructions at "
"https://github.com/kvcache-ai/Mooncake/blob/main/doc/en/build.md "
"to run VLLM with MooncakeTransferEngine."
) from e
)
TransferEngine = None
if TYPE_CHECKING:
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
......@@ -64,8 +67,6 @@ if TYPE_CHECKING:
ReqId = str # Internal scheduler request ID
TransferId = str # KV transfer coordination ID (shared by P/D)
logger = init_logger(__name__)
@dataclass(frozen=True)
class TransferRegion:
......@@ -638,6 +639,9 @@ class MooncakeConnectorWorker:
"""Implementation of Worker side methods"""
def __init__(self, vllm_config: VllmConfig, engine_id: str):
if TransferEngine is None:
logger.error("Mooncake is not available")
raise RuntimeError("Mooncake is not available")
logger.info("Initializing Mooncake Transfer Engine worker %s", engine_id)
self.vllm_config = vllm_config
......@@ -721,9 +725,7 @@ class MooncakeConnectorWorker:
# Start bootstrap server on global rank 0.
if should_launch_bootstrap_server(vllm_config):
_, port = get_mooncake_bootstrap_addr(vllm_config)
self.bootstrap_server = MooncakeBootstrapServer(
vllm_config, "0.0.0.0", port
)
self.bootstrap_server = MooncakeBootstrapServer("0.0.0.0", port)
self.bootstrap_server.start()
if not self.is_kv_producer:
......@@ -778,7 +780,9 @@ class MooncakeConnectorWorker:
if self.sender_loop.is_running():
self.sender_loop.call_soon_threadsafe(self.sender_loop.stop)
self._sender_listener_t.join()
if should_launch_bootstrap_server(self.vllm_config):
if should_launch_bootstrap_server(self.vllm_config) and hasattr(
self, "bootstrap_server"
):
self.bootstrap_server.shutdown()
if not self.is_kv_producer and self.receiver_loop.is_running():
self.receiver_loop.call_soon_threadsafe(self.receiver_loop.stop)
......
......@@ -8,7 +8,6 @@ import uvicorn
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from vllm.config import VllmConfig
from vllm.distributed.kv_transfer.kv_connector.utils import EngineId
from vllm.logger import init_logger
......@@ -38,7 +37,7 @@ class MooncakeBootstrapServer:
Prefiller workers register their connection info (IP, port, ranks) here.
"""
def __init__(self, vllm_config: VllmConfig, host: str, port: int):
def __init__(self, host: str, port: int):
self.workers: dict[int, EngineEntry] = {}
self.host = host
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment