Unverified Commit 9aa4502d authored by JinYan Su's avatar JinYan Su Committed by GitHub
Browse files

feat(mooncake): support GB suffix for global_segment_size (#10745)


Signed-off-by: default avatarJinyang Su <751080330@qq.com>
Co-authored-by: default avatarhuangtingwei <141888744+huangtingwei9988@users.noreply.github.com>
parent a0835c3a
...@@ -91,8 +91,8 @@ First, create and save a configuration file in JSON format. For example: ...@@ -91,8 +91,8 @@ First, create and save a configuration file in JSON format. For example:
"metadata_server": "http://localhost:8080/metadata", "metadata_server": "http://localhost:8080/metadata",
"master_server_address": "localhost:50051", "master_server_address": "localhost:50051",
"protocol": "rdma", "protocol": "rdma",
"device_name": "mlx5_0,mlx5_1", "device_name": "",
"global_segment_size": 2684354560, "global_segment_size": "4gb",
"local_buffer_size": 0 "local_buffer_size": 0
} }
``` ```
...@@ -102,9 +102,9 @@ Parameter Explanation: ...@@ -102,9 +102,9 @@ Parameter Explanation:
* `local_hostname`: The hostname of the `store service`. * `local_hostname`: The hostname of the `store service`.
* `metadata_server`: The network address of the `metadata service`. The default port is 8080. * `metadata_server`: The network address of the `metadata service`. The default port is 8080.
* `master_server_address`: The network address of the `master service`. The default port is 50051. * `master_server_address`: The network address of the `master service`. The default port is 50051.
* `protocol`: The protocol used by the Mooncake. Supported values are `"rdma"` or `"tcp"`. For optimal performance, `"rdma"` is recommended. * `protocol`: The protocol used by Mooncake. Supported values are `"rdma"` or `"tcp"`. For optimal performance, `"rdma"` is recommended.
* `device_name`: The RDMA devices used by Mooncake. This parameter is required only when the protocol is set to `"rdma"`. Available devices can be listed using the `ibv_devices` command. * `device_name`: For `"rdma"`, you can leave this empty in most cases. Mooncake auto-discovers RDMA NICs by default. If you want to pin specific NICs (e.g., `mlx5_0,mlx5_1`), just set `device_name` accordingly. To list available devices, use `ibv_devices`.
* `global_segment_size`: The amount of memory (in bytes) contributed to the global memory pool. A larger value allows Mooncake to cache more KV tensors. * `global_segment_size`: The amount of memory contributed to the global memory pool. Accepts either bytes (integer) or a string with the `gb` suffix, e.g., `"16gb"`. A larger value allows Mooncake to cache more KV tensors.
* `local_buffer_size`: Local buffer is used to do request operations such as `Get` or `Put`. In this case, it is set to 0 because the instance functions solely as a storage server, contributing memory to the global pool without issuing any request operations. * `local_buffer_size`: Local buffer is used to do request operations such as `Get` or `Put`. In this case, it is set to 0 because the instance functions solely as a storage server, contributing memory to the global pool without issuing any request operations.
Then start the `store service`: Then start the `store service`:
...@@ -130,8 +130,11 @@ There are three ways to prepare mooncakes: ...@@ -130,8 +130,11 @@ There are three ways to prepare mooncakes:
MOONCAKE_TE_META_DATA_SERVER="http://127.0.0.1:8080/metadata" \ MOONCAKE_TE_META_DATA_SERVER="http://127.0.0.1:8080/metadata" \
MOONCAKE_MASTER=127.0.0.1:50051 \ MOONCAKE_MASTER=127.0.0.1:50051 \
MOONCAKE_PROTOCOL="rdma" \ MOONCAKE_PROTOCOL="rdma" \
MOONCAKE_DEVICE="mlx5_0,mlx5_1" \ # Leave MOONCAKE_DEVICE empty for auto-discovery (default)
MOONCAKE_GLOBAL_SEGMENT_SIZE=4294967296 \ # To pin NICs, disable auto-discovery then set MOONCAKE_DEVICE, e.g.:
# export MC_MS_AUTO_DISC=0
# export MOONCAKE_DEVICE="mlx5_0,mlx5_1"
MOONCAKE_GLOBAL_SEGMENT_SIZE=4gb \
python -m sglang.launch_server \ python -m sglang.launch_server \
--enable-hierarchical-cache \ --enable-hierarchical-cache \
--hicache-storage-backend mooncake\ --hicache-storage-backend mooncake\
...@@ -143,8 +146,8 @@ Parameter Explanation: ...@@ -143,8 +146,8 @@ Parameter Explanation:
* `MOONCAKE_TE_META_DATA_SERVER`: The network address of the `metadata service`. The default port is 8080. * `MOONCAKE_TE_META_DATA_SERVER`: The network address of the `metadata service`. The default port is 8080.
* `MOONCAKE_MASTER`: The network address of the `master service`. The default port is 50051. * `MOONCAKE_MASTER`: The network address of the `master service`. The default port is 50051.
* `MOONCAKE_PROTOCOL`: The protocol used by Mooncake. Supported values are `"rdma"` or `"tcp"`. For optimal performance, `"rdma"` is recommended. * `MOONCAKE_PROTOCOL`: The protocol used by Mooncake. Supported values are `"rdma"` or `"tcp"`. For optimal performance, `"rdma"` is recommended.
* `MOONCAKE_DEVICE`: The RDMA devices used by Mooncake. This parameter is required only when the protocol is set to `"rdma"`. Available devices can be listed using the `ibv_devices` command. * `MOONCAKE_DEVICE`: Optional for `"rdma"`. By default, Mooncake auto-discovers RDMA NICs. If you need to pin specific NICs, set `MOONCAKE_DEVICE` (comma-separated list, e.g., `mlx5_0,mlx5_1`).
* `MOONCAKE_GLOBAL_SEGMENT_SIZE`: The amount of memory (in bytes) contributed to the global memory pool. If at least one `store service` is launched, then this value could be set to `0`. In this case, the `SGLang server` will not contribute any memory to the system. Note that KV tensors cached in the contributed memory will be lost once this process terminates; however, this will not cause any system errors. * `MOONCAKE_GLOBAL_SEGMENT_SIZE`: The amount of memory contributed to the global memory pool. Accepts either bytes (integer) or a value with the `gb` suffix, e.g., `16gb`. If at least one `store service` is launched, this value can be set to `0`. In this case, the `SGLang server` will not contribute any memory to the system. Note that KV tensors cached in the contributed memory will be lost once this process terminates; however, this will not cause any system errors.
**Using JSON file to configure Mooncake** **Using JSON file to configure Mooncake**
...@@ -155,8 +158,8 @@ echo '{ ...@@ -155,8 +158,8 @@ echo '{
"metadata_server": "http://localhost:8080/metadata", "metadata_server": "http://localhost:8080/metadata",
"master_server_address": "localhost:50051", "master_server_address": "localhost:50051",
"protocol": "rdma", "protocol": "rdma",
"device_name": "mlx5_0,mlx5_1", "device_name": "",
"global_segment_size": 2684354560, "global_segment_size": "4gb",
"local_buffer_size": 0 "local_buffer_size": 0
}' > ${SGLANG_HICACHE_MOONCAKE_CONFIG_PATH} }' > ${SGLANG_HICACHE_MOONCAKE_CONFIG_PATH}
``` ```
...@@ -168,7 +171,7 @@ python -m sglang.launch_server \ ...@@ -168,7 +171,7 @@ python -m sglang.launch_server \
--enable-hierarchical-cache \ --enable-hierarchical-cache \
--hicache-storage-backend mooncake \ --hicache-storage-backend mooncake \
--model-path [model_path] \ --model-path [model_path] \
--hicache-storage-backend-extra-config '{"master_server_address": "127.0.0.1:50051", "local_hostname": "localhost", "metadata_server": "http://127.0.0.1:8080/metadata", "global_segment_size": 4294967296, "local_buffer_size": 16777216, "protocol": "rdma", "device_name": "mlx5_0,mlx5_1"}' --hicache-storage-backend-extra-config '{"master_server_address": "127.0.0.1:50051", "local_hostname": "localhost", "metadata_server": "http://127.0.0.1:8080/metadata", "global_segment_size": "4gb", "local_buffer_size": 16777216, "protocol": "rdma", "device_name": ""}'
``` ```
**Important: Understanding Global Segment Size** **Important: Understanding Global Segment Size**
...@@ -193,7 +196,8 @@ First, start the `metadata service` and `master service`. Then run the `test_moo ...@@ -193,7 +196,8 @@ First, start the `metadata service` and `master service`. Then run the `test_moo
MOONCAKE_TE_META_DATA_SERVER="http://127.0.0.1:8080/metadata" \ MOONCAKE_TE_META_DATA_SERVER="http://127.0.0.1:8080/metadata" \
MOONCAKE_MASTER=127.0.0.1:50051 \ MOONCAKE_MASTER=127.0.0.1:50051 \
MOONCAKE_PROTOCOL="rdma" \ MOONCAKE_PROTOCOL="rdma" \
MOONCAKE_DEVICE="mlx5_0,mlx5_1" \ # Auto-discovery by default. To pin NICs:
# export MOONCAKE_DEVICE="mlx5_0,mlx5_1"
MOONCAKE_GLOBAL_SEGMENT_SIZE=16777216 \ MOONCAKE_GLOBAL_SEGMENT_SIZE=16777216 \
python3 [path of test_mooncake_store.py] python3 [path of test_mooncake_store.py]
``` ```
......
...@@ -20,6 +20,22 @@ DEFAULT_MOONCAKE_CONFIG_PATH_ENV = "SGLANG_HICACHE_MOONCAKE_CONFIG_PATH" ...@@ -20,6 +20,22 @@ DEFAULT_MOONCAKE_CONFIG_PATH_ENV = "SGLANG_HICACHE_MOONCAKE_CONFIG_PATH"
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _parse_global_segment_size(value) -> int:
if isinstance(value, int):
return value
if isinstance(value, str):
s = value.strip().lower()
if s.endswith("gb"):
num = s[:-2].strip()
if not num:
raise ValueError(
"Invalid global_segment_size: missing number before 'gb'"
)
return int(num) * 1024 * 1024 * 1024
return int(s)
return int(value)
@dataclass @dataclass
class MooncakeStoreConfig: class MooncakeStoreConfig:
local_hostname: str local_hostname: str
...@@ -43,13 +59,13 @@ class MooncakeStoreConfig: ...@@ -43,13 +59,13 @@ class MooncakeStoreConfig:
return MooncakeStoreConfig( return MooncakeStoreConfig(
local_hostname=config.get("local_hostname"), local_hostname=config.get("local_hostname"),
metadata_server=config.get("metadata_server"), metadata_server=config.get("metadata_server"),
global_segment_size=config.get( global_segment_size=_parse_global_segment_size(
"global_segment_size", DEFAULT_GLOBAL_SEGMENT_SIZE config.get("global_segment_size", DEFAULT_GLOBAL_SEGMENT_SIZE)
), ),
# Zero copy interface does not need local buffer # Zero copy interface does not need local buffer
local_buffer_size=DEFAULT_LOCAL_BUFFER_SIZE, local_buffer_size=DEFAULT_LOCAL_BUFFER_SIZE,
protocol=config.get("protocol", "tcp"), protocol=config.get("protocol", "tcp"),
device_name=config.get("device_name", "auto"), device_name=config.get("device_name", ""),
master_server_address=config.get("master_server_address"), master_server_address=config.get("master_server_address"),
) )
...@@ -58,7 +74,7 @@ class MooncakeStoreConfig: ...@@ -58,7 +74,7 @@ class MooncakeStoreConfig:
"""Load config from a file specified in the environment variable. """Load config from a file specified in the environment variable.
export MOONCAKE_MASTER=10.13.3.232:50051 export MOONCAKE_MASTER=10.13.3.232:50051
export MOONCAKE_PROTOCOL="rdma" export MOONCAKE_PROTOCOL="rdma"
export MOONCAKE_DEVICE="auto" export MOONCAKE_DEVICE=""
export MOONCAKE_TE_META_DATA_SERVER="P2PHANDSHAKE" export MOONCAKE_TE_META_DATA_SERVER="P2PHANDSHAKE"
""" """
# other required environment variables... # other required environment variables...
...@@ -67,13 +83,13 @@ class MooncakeStoreConfig: ...@@ -67,13 +83,13 @@ class MooncakeStoreConfig:
return MooncakeStoreConfig( return MooncakeStoreConfig(
local_hostname=os.getenv("LOCAL_HOSTNAME", "localhost"), local_hostname=os.getenv("LOCAL_HOSTNAME", "localhost"),
metadata_server=os.getenv("MOONCAKE_TE_META_DATA_SERVER", "P2PHANDSHAKE"), metadata_server=os.getenv("MOONCAKE_TE_META_DATA_SERVER", "P2PHANDSHAKE"),
global_segment_size=int( global_segment_size=_parse_global_segment_size(
os.getenv("MOONCAKE_GLOBAL_SEGMENT_SIZE", DEFAULT_GLOBAL_SEGMENT_SIZE) os.getenv("MOONCAKE_GLOBAL_SEGMENT_SIZE", DEFAULT_GLOBAL_SEGMENT_SIZE)
), ),
# Zero copy interface does not need local buffer # Zero copy interface does not need local buffer
local_buffer_size=DEFAULT_LOCAL_BUFFER_SIZE, local_buffer_size=DEFAULT_LOCAL_BUFFER_SIZE,
protocol=os.getenv("MOONCAKE_PROTOCOL", "tcp"), protocol=os.getenv("MOONCAKE_PROTOCOL", "tcp"),
device_name=os.getenv("MOONCAKE_DEVICE", "auto"), device_name=os.getenv("MOONCAKE_DEVICE", ""),
master_server_address=os.getenv("MOONCAKE_MASTER"), master_server_address=os.getenv("MOONCAKE_MASTER"),
) )
...@@ -86,24 +102,17 @@ class MooncakeStoreConfig: ...@@ -86,24 +102,17 @@ class MooncakeStoreConfig:
return MooncakeStoreConfig( return MooncakeStoreConfig(
local_hostname=extra_config.get("local_hostname", "localhost"), local_hostname=extra_config.get("local_hostname", "localhost"),
metadata_server=extra_config.get("metadata_server", "P2PHANDSHAKE"), metadata_server=extra_config.get("metadata_server", "P2PHANDSHAKE"),
global_segment_size=extra_config.get( global_segment_size=_parse_global_segment_size(
"global_segment_size", DEFAULT_GLOBAL_SEGMENT_SIZE extra_config.get("global_segment_size", DEFAULT_GLOBAL_SEGMENT_SIZE)
), ),
local_buffer_size=extra_config.get( local_buffer_size=extra_config.get(
"local_buffer_size", DEFAULT_LOCAL_BUFFER_SIZE "local_buffer_size", DEFAULT_LOCAL_BUFFER_SIZE
), ),
protocol=extra_config.get("protocol", "tcp"), protocol=extra_config.get("protocol", "tcp"),
device_name=extra_config.get("device_name", "auto"), device_name=extra_config.get("device_name", ""),
master_server_address=extra_config["master_server_address"], master_server_address=extra_config["master_server_address"],
) )
def __post_init__(self):
if self.device_name == "auto":
os.environ["MC_MS_AUTO_DISC"] = "1"
os.environ["MC_MS_FILTERS"] = (
"mlx5_bond_0, mlx5_bond_1, mlx5_bond_2, mlx5_bond_3"
)
class MooncakeStore(HiCacheStorage): class MooncakeStore(HiCacheStorage):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment