Unverified Commit 8471e5e6 authored by Teng Ma's avatar Teng Ma Committed by GitHub
Browse files

[HiCache] feat: add mooncake backend extra config (#10213)

parent 4582931a
......@@ -103,6 +103,10 @@ Note: To get started quickly, if `MOONCAKE_GLOBAL_SEGMENT_SIZE` is set to a non-
**Start the `SGLang server` with Mooncake enabled:**
Mooncake configuration can be provided via environment variables. Note that, for optimal performance, the Mooncake backend currently supports only the `page_first` layout (which optimizes memory access patterns for KV cache operations).
There are two ways to configure Mooncake: 1. Using environment variables; 2. Using extra-config of sglang arguments.
**Using env variables to configure Mooncake**
```bash
MOONCAKE_TE_META_DATA_SERVER="http://127.0.0.1:8080/metadata" \
MOONCAKE_MASTER=127.0.0.1:50051 \
......@@ -123,6 +127,16 @@ Parameter Explanation:
* `MOONCAKE_DEVICE`: The RDMA devices used by Mooncake. This parameter is required only when the protocol is set to `"rdma"`. Available devices can be listed using the `ibv_devices` command.
* `MOONCAKE_GLOBAL_SEGMENT_SIZE`: The amount of memory (in bytes) contributed to the global memory pool. If at least one `store service` is launched, then this value could be set to `0`. In this case, the `SGLang server` will not contribute any memory to the system. Note that KV tensors cached in the contributed memory will be lost once this process terminates; however, this will not cause any system errors.
**Using extra-config of sglang arguments to configure Mooncake**
```bash
python -m sglang.launch_server \
--enable-hierarchical-cache \
--hicache-storage-backend mooncake \
--model-path [model_path] \
--hicache-storage-backend-extra-config '{"master_server_address": "127.0.0.1:50051", "local_hostname": "localhost", "metadata_server": "http://127.0.0.1:8080/metadata", "global_segment_size": 4294967296, "local_buffer_size": 16777216, "protocol": "rdma", "device_name": "mlx5_0,mlx5_1"}'
```
**Important: Understanding Global Segment Size**
`global_segment_size` for `store service` and `MOONCAKE_GLOBAL_SEGMENT_SIZE` for `SGLang service`: This parameter specifies the amount of memory each instance contributes to the distributed memory pool. The total memory available for KV cache storage across the cluster is the sum of the memory contributed by all instances.
......
......@@ -72,6 +72,26 @@ class MooncakeStoreConfig:
master_server_address=os.getenv("MOONCAKE_MASTER"),
)
@staticmethod
def load_from_extra_config(extra_config: dict) -> "MooncakeStoreConfig":
"""Load config from extra_config dictionary."""
if "master_server_address" not in extra_config:
raise ValueError("master_server_address is required in extra_config")
return MooncakeStoreConfig(
local_hostname=extra_config.get("local_hostname", "localhost"),
metadata_server=extra_config.get("metadata_server", "P2PHANDSHAKE"),
global_segment_size=extra_config.get(
"global_segment_size", DEFAULT_GLOBAL_SEGMENT_SIZE
),
local_buffer_size=extra_config.get(
"local_buffer_size", DEFAULT_LOCAL_BUFFER_SIZE
),
protocol=extra_config.get("protocol", "tcp"),
device_name=extra_config.get("device_name", "auto"),
master_server_address=extra_config["master_server_address"],
)
def __post_init__(self):
if self.device_name == "auto":
os.environ["MC_MS_AUTO_DISC"] = "1"
......@@ -93,8 +113,26 @@ class MooncakeStore(HiCacheStorage):
try:
self.store = MooncakeDistributedStore()
self.config = MooncakeStoreConfig.load_from_env()
logger.info("Mooncake Configuration loaded from env successfully.")
extra_config = (
getattr(storage_config, "extra_config", None)
if storage_config
else None
)
# Load configuration with master_server_address prioritized from extra_config if available
if (
extra_config is not None
and extra_config.get("master_server_address") is not None
):
# Load from extra_config
self.config = MooncakeStoreConfig.load_from_extra_config(extra_config)
logger.info(
"Mooncake Configuration loaded from extra_config successfully."
)
else:
# Load from environment variables
self.config = MooncakeStoreConfig.load_from_env()
logger.info("Mooncake Configuration loaded from env successfully.")
ret_code = self.store.setup(
self.config.local_hostname,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment