Unverified Commit 86cb4db0 authored by Shisong Ma's avatar Shisong Ma Committed by GitHub
Browse files

[Feature] Add EIC as sglang HiCache Storage backend (#10271)


Co-authored-by: default avatarmashisong <mashisong@bytedance.com>
parent 2e130b76
......@@ -310,7 +310,7 @@ class HiCacheController:
self.page_get_func = self._generic_page_get
self.page_set_func = self._generic_page_set
if self.storage_backend_type in ["hf3fs", "mooncake"]:
if self.storage_backend_type in ["hf3fs", "mooncake", "eic"]:
self.page_get_func = self._page_get_zero_copy
self.page_set_func = self._page_set_zero_copy
......
......@@ -181,6 +181,8 @@ class StorageBackendFactory:
dtype = mem_pool_host.dtype
return backend_class.from_env_config(bytes_per_page, dtype, storage_config)
elif backend_name == "eic":
return backend_class(storage_config, mem_pool_host)
else:
raise ValueError(f"Unknown built-in backend: {backend_name}")
......@@ -213,3 +215,9 @@ StorageBackendFactory.register_backend(
"sglang.srt.mem_cache.storage.aibrix_kvcache.aibrix_kvcache_storage",
"AibrixKVCacheStorage",
)
StorageBackendFactory.register_backend(
"eic",
"sglang.srt.mem_cache.storage.eic.eic_storage",
"EICStorage",
)
# EIC as sglang HiCache Storage
EIC(Elastic Instant Cache) is a distributed database designed for LLM KV Cache. It supports RDMA, GDR and has the capabilities of distributed disaster tolerance and expansion.
You can understand the principles and architecture of EIC through these articles: https://mp.weixin.qq.com/s/tasDqXf0Gxr3o_WCJ2IJUQ https://mp.weixin.qq.com/s/b_4YhTa96Zeklh23lv8qBw
## Deploy EIC
You can visit the official link https://console.volcengine.com/eic and deploy EIC KVCache on your compute cluster with web UI.In addition, we provide particular image in volcano engine, which integrates various optimizations based on the official image.
You may use test_unit.py to detect the connectivity of EIC.
## Deploy Model With EIC
You can enable EIC KVCache offload with the official interface, such as
```bash
python -m sglang.launch_server \
--model-path [model_path]
--enable-hierarchical-cache \
--hicache-storage-backend eic \
--hicache-write-policy 'write_through' \
--hicache-mem-layout 'page_first' \
```
For more details, you can see https://www.volcengine.com/docs/85848/1749188 .
This diff is collapsed.
import argparse
import os
import eic
import torch
import yaml
def pase_args():
parser = argparse.ArgumentParser(description="EIC Storage Unit Test")
parser.add_argument(
"--config",
"-c",
type=str,
default="/sgl-workspace/config/remote-eic.yaml",
help="EIC yaml config",
)
args, _ = parser.parse_known_args()
return args
def init_eic_client():
args = pase_args()
config_path = os.path.abspath(args.config)
if not os.path.exists(config_path):
raise FileNotFoundError(f"Config file not found: {config_path}")
with open(config_path, "r") as fin:
config = yaml.safe_load(fin)
remote_url = config.get("remote_url", None)
if remote_url is None:
AssertionError("remote_url is None")
endpoint = remote_url[len("eic://") :]
eic_instance_id = config.get("eic_instance_id", None)
eic_log_dir = config.get("eic_log_dir", None)
eic_log_level = config.get("eic_log_level", 2)
eic_trans_type = config.get("eic_trans_type", 3)
eic_flag_file = config.get("eic_flag_file", None)
if not os.path.exists(eic_log_dir):
os.makedirs(eic_log_dir, exist_ok=True)
eic_client = eic.Client()
init_option = eic.InitOption()
init_option.log_dir = eic_log_dir
init_option.log_level = eic.LogLevel(eic_log_level)
init_option.transport_type = eic.TransportType(eic_trans_type)
init_option.flag_file = eic_flag_file
ret = eic_client.init(eic_instance_id, endpoint, init_option)
if ret != 0:
raise RuntimeError(f"EIC Client init failed with error code: {ret}")
return eic_client
def test_set(eic_client):
test_key = ["test_key_" + str(i) for i in range(16)]
tensors = [
torch.ones([12, 6, 1, 512], dtype=torch.bfloat16, device="cpu")
for _ in range(16)
]
data_keys = eic.StringVector()
data_vals = eic.IOBuffers()
for i in range(16):
data_keys.append(test_key[i])
data_vals.append(
tensors[i].data_ptr(), tensors[i].numel() * tensors[i].element_size(), False
)
set_opt = eic.SetOption()
set_opt.ttl_second = 3
status_code, set_outcome = eic_client.mset(data_keys, data_vals, set_opt)
assert (
status_code == eic.StatusCode.SUCCESS
), f"Set failed with status code: {status_code}"
def test_get(eic_client):
test_key = ["test_key_" + str(i) for i in range(16)]
tensors = [
torch.zeros([12, 6, 1, 512], dtype=torch.bfloat16, device="cpu")
for _ in range(16)
]
data_keys = eic.StringVector()
data_vals = eic.IOBuffers()
for i in range(16):
data_keys.append(test_key[i])
data_vals.append(
tensors[i].data_ptr(), tensors[i].numel() * tensors[i].element_size(), False
)
get_opt = eic.GetOption()
status_code, data_vals, get_outcome = eic_client.mget(data_keys, get_opt, data_vals)
assert (
status_code == eic.StatusCode.SUCCESS
), f"Get failed with status code: {status_code}"
def test_exists(eic_client):
test_key = ["test_key_" + str(i) for i in range(16)]
data_keys = eic.StringVector()
for key in test_key:
data_keys.append(key)
exists_opt = eic.ExistOption()
status_code, exists_outcome = eic_client.mexist(data_keys, exists_opt)
assert (
status_code == eic.StatusCode.SUCCESS
), f"Exists failed with status code: {status_code}"
def main():
eic_client = init_eic_client()
test_set(eic_client)
test_exists(eic_client)
test_get(eic_client)
if __name__ == "__main__":
main()
......@@ -2260,7 +2260,7 @@ class ServerArgs:
parser.add_argument(
"--hicache-storage-backend",
type=str,
choices=["file", "mooncake", "hf3fs", "nixl", "aibrix", "dynamic"],
choices=["file", "mooncake", "hf3fs", "nixl", "aibrix", "dynamic", "eic"],
default=ServerArgs.hicache_storage_backend,
help="The storage backend for hierarchical KV cache. "
"Built-in backends: file, mooncake, hf3fs, nixl, aibrix. "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment