Unverified Commit 6287e7fa authored by dtc's avatar dtc Committed by GitHub
Browse files

[P/D] Mooncake: Add unit tests and minor fixes for mooncake connector (#36946)


Signed-off-by: default avatarTianchen Ding <dtcccc@linux.alibaba.com>
parent 84e439a9
This diff is collapsed.
...@@ -100,6 +100,8 @@ def create_vllm_config( ...@@ -100,6 +100,8 @@ def create_vllm_config(
hf_overrides: dict[str, Any] | None = None, hf_overrides: dict[str, Any] | None = None,
attention_backend: str | None = None, attention_backend: str | None = None,
kv_load_failure_policy: Literal["recompute", "fail"] = "fail", kv_load_failure_policy: Literal["recompute", "fail"] = "fail",
kv_connector: str = "NixlConnector",
kv_role: str = "kv_both",
) -> VllmConfig: ) -> VllmConfig:
"""Initialize VllmConfig For Testing.""" """Initialize VllmConfig For Testing."""
model_config = ModelConfig( model_config = ModelConfig(
...@@ -124,8 +126,8 @@ def create_vllm_config( ...@@ -124,8 +126,8 @@ def create_vllm_config(
enable_prefix_caching=True, enable_prefix_caching=True,
) )
kv_transfer_config = KVTransferConfig( kv_transfer_config = KVTransferConfig(
kv_connector="NixlConnector", kv_connector=kv_connector,
kv_role="kv_both", kv_role=kv_role,
enable_permute_local_kv=enable_permute_local_kv, enable_permute_local_kv=enable_permute_local_kv,
kv_connector_extra_config=kv_connector_extra_config or {}, kv_connector_extra_config=kv_connector_extra_config or {},
kv_load_failure_policy=kv_load_failure_policy, kv_load_failure_policy=kv_load_failure_policy,
......
...@@ -47,14 +47,17 @@ from vllm.v1.attention.backends.utils import get_kv_cache_layout ...@@ -47,14 +47,17 @@ from vllm.v1.attention.backends.utils import get_kv_cache_layout
from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.request import RequestStatus from vllm.v1.request import RequestStatus
logger = init_logger(__name__)
try: try:
from mooncake.engine import TransferEngine from mooncake.engine import TransferEngine
except ImportError as e: except ImportError:
raise ImportError( logger.warning(
"Please install mooncake by following the instructions at " "Please install mooncake by following the instructions at "
"https://github.com/kvcache-ai/Mooncake/blob/main/doc/en/build.md " "https://github.com/kvcache-ai/Mooncake/blob/main/doc/en/build.md "
"to run VLLM with MooncakeTransferEngine." "to run VLLM with MooncakeTransferEngine."
) from e )
TransferEngine = None
if TYPE_CHECKING: if TYPE_CHECKING:
from vllm.v1.core.kv_cache_manager import KVCacheBlocks from vllm.v1.core.kv_cache_manager import KVCacheBlocks
...@@ -64,8 +67,6 @@ if TYPE_CHECKING: ...@@ -64,8 +67,6 @@ if TYPE_CHECKING:
ReqId = str # Internal scheduler request ID ReqId = str # Internal scheduler request ID
TransferId = str # KV transfer coordination ID (shared by P/D) TransferId = str # KV transfer coordination ID (shared by P/D)
logger = init_logger(__name__)
@dataclass(frozen=True) @dataclass(frozen=True)
class TransferRegion: class TransferRegion:
...@@ -638,6 +639,9 @@ class MooncakeConnectorWorker: ...@@ -638,6 +639,9 @@ class MooncakeConnectorWorker:
"""Implementation of Worker side methods""" """Implementation of Worker side methods"""
def __init__(self, vllm_config: VllmConfig, engine_id: str): def __init__(self, vllm_config: VllmConfig, engine_id: str):
if TransferEngine is None:
logger.error("Mooncake is not available")
raise RuntimeError("Mooncake is not available")
logger.info("Initializing Mooncake Transfer Engine worker %s", engine_id) logger.info("Initializing Mooncake Transfer Engine worker %s", engine_id)
self.vllm_config = vllm_config self.vllm_config = vllm_config
...@@ -721,9 +725,7 @@ class MooncakeConnectorWorker: ...@@ -721,9 +725,7 @@ class MooncakeConnectorWorker:
# Start bootstrap server on global rank 0. # Start bootstrap server on global rank 0.
if should_launch_bootstrap_server(vllm_config): if should_launch_bootstrap_server(vllm_config):
_, port = get_mooncake_bootstrap_addr(vllm_config) _, port = get_mooncake_bootstrap_addr(vllm_config)
self.bootstrap_server = MooncakeBootstrapServer( self.bootstrap_server = MooncakeBootstrapServer("0.0.0.0", port)
vllm_config, "0.0.0.0", port
)
self.bootstrap_server.start() self.bootstrap_server.start()
if not self.is_kv_producer: if not self.is_kv_producer:
...@@ -778,7 +780,9 @@ class MooncakeConnectorWorker: ...@@ -778,7 +780,9 @@ class MooncakeConnectorWorker:
if self.sender_loop.is_running(): if self.sender_loop.is_running():
self.sender_loop.call_soon_threadsafe(self.sender_loop.stop) self.sender_loop.call_soon_threadsafe(self.sender_loop.stop)
self._sender_listener_t.join() self._sender_listener_t.join()
if should_launch_bootstrap_server(self.vllm_config): if should_launch_bootstrap_server(self.vllm_config) and hasattr(
self, "bootstrap_server"
):
self.bootstrap_server.shutdown() self.bootstrap_server.shutdown()
if not self.is_kv_producer and self.receiver_loop.is_running(): if not self.is_kv_producer and self.receiver_loop.is_running():
self.receiver_loop.call_soon_threadsafe(self.receiver_loop.stop) self.receiver_loop.call_soon_threadsafe(self.receiver_loop.stop)
......
...@@ -8,7 +8,6 @@ import uvicorn ...@@ -8,7 +8,6 @@ import uvicorn
from fastapi import FastAPI, HTTPException from fastapi import FastAPI, HTTPException
from pydantic import BaseModel from pydantic import BaseModel
from vllm.config import VllmConfig
from vllm.distributed.kv_transfer.kv_connector.utils import EngineId from vllm.distributed.kv_transfer.kv_connector.utils import EngineId
from vllm.logger import init_logger from vllm.logger import init_logger
...@@ -38,7 +37,7 @@ class MooncakeBootstrapServer: ...@@ -38,7 +37,7 @@ class MooncakeBootstrapServer:
Prefiller workers register their connection info (IP, port, ranks) here. Prefiller workers register their connection info (IP, port, ranks) here.
""" """
def __init__(self, vllm_config: VllmConfig, host: str, port: int): def __init__(self, host: str, port: int):
self.workers: dict[int, EngineEntry] = {} self.workers: dict[int, EngineEntry] = {}
self.host = host self.host = host
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment