Unverified Commit 318424e2 authored by hzh0425's avatar hzh0425 Committed by GitHub
Browse files

[HICache]: Support 3FS-Store with page_first_direct layout (#11460)

parent 6806c4e6
...@@ -67,6 +67,7 @@ python3 -m sglang.launch_server \ ...@@ -67,6 +67,7 @@ python3 -m sglang.launch_server \
--enable-hierarchical-cache \ --enable-hierarchical-cache \
--hicache-ratio 2 \ --hicache-ratio 2 \
--hicache-size 0 \ --hicache-size 0 \
--hicache-mem-layout page_first_direct \
--hicache-io-backend direct \ --hicache-io-backend direct \
--hicache-write-policy write_through \ --hicache-write-policy write_through \
--hicache-storage-backend hf3fs \ --hicache-storage-backend hf3fs \
...@@ -86,6 +87,7 @@ python3 -m sglang.launch_server \ ...@@ -86,6 +87,7 @@ python3 -m sglang.launch_server \
--page-size 64 \ --page-size 64 \
--hicache-ratio 2 \ --hicache-ratio 2 \
--hicache-size 0 \ --hicache-size 0 \
--hicache-mem-layout page_first_direct \
--hicache-io-backend direct \ --hicache-io-backend direct \
--hicache-write-policy write_through \ --hicache-write-policy write_through \
--hicache-storage-backend hf3fs \ --hicache-storage-backend hf3fs \
...@@ -115,7 +117,8 @@ python3 -m sglang.launch_server \ ...@@ -115,7 +117,8 @@ python3 -m sglang.launch_server \
--enable-hierarchical-cache \ --enable-hierarchical-cache \
--hicache-ratio 2 \ --hicache-ratio 2 \
--hicache-size 0 \ --hicache-size 0 \
--hicache-mem-layout page_first \ --hicache-mem-layout page_first_direct \
--hicache-io-backend direct \
--hicache-write-policy write_through \ --hicache-write-policy write_through \
--hicache-storage-backend hf3fs \ --hicache-storage-backend hf3fs \
--hicache-storage-prefetch-policy wait_complete \ --hicache-storage-prefetch-policy wait_complete \
...@@ -140,8 +143,8 @@ python3 -m sglang.launch_server \ ...@@ -140,8 +143,8 @@ python3 -m sglang.launch_server \
--page-size 64 \ --page-size 64 \
--enable-hierarchical-cache \ --enable-hierarchical-cache \
--hicache-ratio 2 \ --hicache-ratio 2 \
--hicache-mem-layout page_first \ --hicache-mem-layout page_first_direct \
--hicache-io-backend kernel \ --hicache-io-backend direct \
--hicache-storage-backend mooncake \ --hicache-storage-backend mooncake \
--hicache-write-policy write_through \ --hicache-write-policy write_through \
--hicache-storage-prefetch-policy timeout --hicache-storage-prefetch-policy timeout
......
...@@ -170,7 +170,7 @@ class StorageBackendFactory: ...@@ -170,7 +170,7 @@ class StorageBackendFactory:
return backend return backend
elif backend_name == "hf3fs": elif backend_name == "hf3fs":
# Calculate bytes_per_page based on memory pool layout # Calculate bytes_per_page based on memory pool layout
if mem_pool_host.layout == "page_first": if mem_pool_host.layout in ["page_first", "page_first_direct"]:
bytes_per_page = ( bytes_per_page = (
mem_pool_host.get_ksize_per_token() * mem_pool_host.page_size mem_pool_host.get_ksize_per_token() * mem_pool_host.page_size
) )
......
...@@ -501,8 +501,12 @@ class HiCacheHF3FS(HiCacheStorage): ...@@ -501,8 +501,12 @@ class HiCacheHF3FS(HiCacheStorage):
def register_mem_pool_host(self, mem_pool_host: HostKVCache): def register_mem_pool_host(self, mem_pool_host: HostKVCache):
super().register_mem_pool_host(mem_pool_host) super().register_mem_pool_host(mem_pool_host)
self.is_zero_copy = self.mem_pool_host.layout == "page_first" self.is_zero_copy = self.mem_pool_host.layout in [
logger.info(f"{self.is_zero_copy=}") "page_first",
"page_first_direct",
]
logger.info(f"{self.is_zero_copy=}, layout={self.mem_pool_host.layout}")
def _get_mha_zero_copy_keys(self, keys: List[str]) -> List[str]: def _get_mha_zero_copy_keys(self, keys: List[str]) -> List[str]:
_keys = [] _keys = []
......
...@@ -76,6 +76,8 @@ class TestHf3fsBackendAccuracy(HiCacheStorage3FSBackendBaseMixin, CustomTestCase ...@@ -76,6 +76,8 @@ class TestHf3fsBackendAccuracy(HiCacheStorage3FSBackendBaseMixin, CustomTestCase
server_args, env_vars = super()._get_additional_server_args_and_env() server_args, env_vars = super()._get_additional_server_args_and_env()
server_args["--hicache-ratio"] = 1.5 server_args["--hicache-ratio"] = 1.5
server_args["--tp-size"] = 2 server_args["--tp-size"] = 2
server_args["--hicache-mem-layout"] = "page_first_direct"
server_args["--hicache-io-backend"] = "direct"
return server_args, env_vars return server_args, env_vars
def test_eval_accuracy(self): def test_eval_accuracy(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment