"src/diffusers/pipelines/marigold/__init__.py" did not exist on "b978334d71ebc07e92aad2e5463da3b3a6c8c0e2"
Unverified Commit 8b713c72 authored by FlyPanda's avatar FlyPanda Committed by GitHub
Browse files

Hicache L3 backend mooncake optimization configuration reading method (#10319)


Co-authored-by: default avatarTeng Ma <sima.mt@alibaba-inc.com>
Co-authored-by: default avatarhuangtingwei <141888744+huangtingwei9988@users.noreply.github.com>
Co-authored-by: default avatarshicang <shicang@shicang>
Co-authored-by: default avatarShangming Cai <csmthu@gmail.com>
parent 5bfafdfc
......@@ -119,7 +119,10 @@ Note: If `MOONCAKE_GLOBAL_SEGMENT_SIZE` is set to a non-zero value when starting
Mooncake configuration can be provided via environment variables. Note that, for optimal performance, the Mooncake backend currently supports only the `page_first` layout (which optimizes memory access patterns for KV cache operations).
There are two ways to configure Mooncake: 1. Using environment variables; 2. Using extra-config of sglang arguments.
There are three ways to prepare mooncakes:
1. Use environment variables;
2. Use json configuration files;
3. Additional configuration using the sglang parameter.
**Using env variables to configure Mooncake**
......@@ -143,6 +146,21 @@ Parameter Explanation:
* `MOONCAKE_DEVICE`: The RDMA devices used by Mooncake. This parameter is required only when the protocol is set to `"rdma"`. Available devices can be listed using the `ibv_devices` command.
* `MOONCAKE_GLOBAL_SEGMENT_SIZE`: The amount of memory (in bytes) contributed to the global memory pool. If at least one `store service` is launched, then this value could be set to `0`. In this case, the `SGLang server` will not contribute any memory to the system. Note that KV tensors cached in the contributed memory will be lost once this process terminates; however, this will not cause any system errors.
**Using JSON file to configure Mooncake**
```bash
export SGLANG_HICACHE_MOONCAKE_CONFIG_PATH=/sgl-workspace/sglang/benchmark/hicache/mooncake_config.json
echo '{
"local_hostname": "localhost",
"metadata_server": "http://localhost:8080/metadata",
"master_server_address": "localhost:50051",
"protocol": "rdma",
"device_name": "mlx5_0,mlx5_1",
"global_segment_size": 2684354560,
"local_buffer_size": 0
}' > ${SGLANG_HICACHE_MOONCAKE_CONFIG_PATH}
```
**Using extra-config of sglang arguments to configure Mooncake**
```bash
......
......@@ -11,7 +11,7 @@ from sglang.srt.mem_cache.hicache_storage import HiCacheStorage, HiCacheStorageC
DEFAULT_GLOBAL_SEGMENT_SIZE = 4 * 1024 * 1024 * 1024 # 4 GiB
DEFAULT_LOCAL_BUFFER_SIZE = 16 * 1024 * 1024 # 16 MB
DEFAULT_MOONCAKE_CONFIG_PATH_ENV = "SGLANG_HICACHE_MOONCAKE_CONFIG_PATH"
logger = logging.getLogger(__name__)
......@@ -28,13 +28,13 @@ class MooncakeStoreConfig:
@staticmethod
def from_file() -> "MooncakeStoreConfig":
"""Load the config from a JSON file."""
file_path = os.getenv("MOONCAKE_CONFIG_PATH")
if file_path is None:
raise ValueError(
"The environment variable 'MOONCAKE_CONFIG_PATH' is not set."
)
with open(file_path) as fin:
config = json.load(fin)
file_path = os.getenv(DEFAULT_MOONCAKE_CONFIG_PATH_ENV)
try:
with open(file_path) as fin:
config = json.load(fin)
except Exception as e:
raise RuntimeError(f"Failed to load config from {file_path}: {str(e)}")
return MooncakeStoreConfig(
local_hostname=config.get("local_hostname"),
metadata_server=config.get("metadata_server"),
......@@ -101,6 +101,7 @@ class MooncakeStoreConfig:
class MooncakeStore(HiCacheStorage):
def __init__(self, storage_config: HiCacheStorageConfig = None):
try:
from mooncake.store import MooncakeDistributedStore
......@@ -129,6 +130,10 @@ class MooncakeStore(HiCacheStorage):
logger.info(
"Mooncake Configuration loaded from extra_config successfully."
)
elif os.getenv(DEFAULT_MOONCAKE_CONFIG_PATH_ENV):
# Load from config file
self.config = MooncakeStoreConfig.from_file()
logger.info("Mooncake Configuration loaded from file successfully.")
else:
# Load from environment variables
self.config = MooncakeStoreConfig.load_from_env()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment