Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
cae39565
Unverified
Commit
cae39565
authored
Oct 20, 2025
by
huangtingwei
Committed by
GitHub
Oct 20, 2025
Browse files
check master server for mooncake store (#10510)
parent
27a223ab
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
57 additions
and
0 deletions
+57
-0
python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py
...ng/srt/mem_cache/storage/mooncake_store/mooncake_store.py
+57
-0
No files found.
python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py
View file @
cae39565
import
json
import
logging
import
os
import
time
import
uuid
from
dataclasses
import
dataclass
from
typing
import
Any
,
List
,
Optional
import
requests
import
torch
from
sglang.srt.mem_cache.hicache_storage
import
(
...
...
@@ -17,6 +19,10 @@ from sglang.srt.mem_cache.memory_pool_host import HostKVCache
DEFAULT_GLOBAL_SEGMENT_SIZE
=
4
*
1024
*
1024
*
1024
# 4 GiB
DEFAULT_LOCAL_BUFFER_SIZE
=
16
*
1024
*
1024
# 16 MB
DEFAULT_MOONCAKE_CONFIG_PATH_ENV
=
"SGLANG_HICACHE_MOONCAKE_CONFIG_PATH"
SETUP_TIMEOUT
=
600
# 10min
DEFAULT_MASTER_METRICS_PORT
=
9003
DEFAULT_CHECK_SERVER
=
False
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -45,6 +51,8 @@ class MooncakeStoreConfig:
protocol
:
str
device_name
:
str
master_server_address
:
str
master_metrics_port
:
int
check_server
:
bool
@
staticmethod
def
from_file
()
->
"MooncakeStoreConfig"
:
...
...
@@ -67,6 +75,10 @@ class MooncakeStoreConfig:
protocol
=
config
.
get
(
"protocol"
,
"tcp"
),
device_name
=
config
.
get
(
"device_name"
,
""
),
master_server_address
=
config
.
get
(
"master_server_address"
),
master_metrics_port
=
config
.
get
(
"master_metrics_port"
,
DEFAULT_MASTER_METRICS_PORT
),
check_server
=
config
.
get
(
"check_server"
,
DEFAULT_CHECK_SERVER
),
)
@
staticmethod
...
...
@@ -91,6 +103,10 @@ class MooncakeStoreConfig:
protocol
=
os
.
getenv
(
"MOONCAKE_PROTOCOL"
,
"tcp"
),
device_name
=
os
.
getenv
(
"MOONCAKE_DEVICE"
,
""
),
master_server_address
=
os
.
getenv
(
"MOONCAKE_MASTER"
),
master_metrics_port
=
int
(
os
.
getenv
(
"MOONCAKE_MASTER_METRICS_PORT"
,
DEFAULT_GLOBAL_SEGMENT_SIZE
)
),
check_server
=
bool
(
os
.
getenv
(
"MOONCAKE_CHECK_SERVER"
,
DEFAULT_CHECK_SERVER
)),
)
@
staticmethod
...
...
@@ -111,6 +127,10 @@ class MooncakeStoreConfig:
protocol
=
extra_config
.
get
(
"protocol"
,
"tcp"
),
device_name
=
extra_config
.
get
(
"device_name"
,
""
),
master_server_address
=
extra_config
[
"master_server_address"
],
master_metrics_port
=
extra_config
.
get
(
"master_metrics_port"
,
DEFAULT_MASTER_METRICS_PORT
),
check_server
=
extra_config
.
get
(
"check_server"
,
DEFAULT_CHECK_SERVER
),
)
...
...
@@ -166,6 +186,10 @@ class MooncakeStore(HiCacheStorage):
self
.
extra_backend_tag
=
extra_config
[
"extra_backend_tag"
]
logger
.
info
(
f
"Using extra_backend_tag:
{
self
.
extra_backend_tag
}
"
)
# Check server status
if
self
.
config
.
check_server
:
self
.
check_server
()
ret_code
=
self
.
store
.
setup
(
self
.
config
.
local_hostname
,
self
.
config
.
metadata_server
,
...
...
@@ -196,6 +220,39 @@ class MooncakeStore(HiCacheStorage):
logger
.
error
(
"An error occurred while loading the configuration: %s"
,
exc
)
raise
def
check_server
(
self
):
master_server_ip
=
self
.
config
.
master_server_address
.
split
(
":"
)[
0
]
segments_url
=
f
"http://
{
master_server_ip
}
:
{
self
.
config
.
master_metrics_port
}
/get_all_segments"
start_time
=
time
.
perf_counter
()
check_result
=
False
while
time
.
perf_counter
()
-
start_time
<
SETUP_TIMEOUT
:
try
:
check_segments_resp
=
requests
.
get
(
segments_url
,
timeout
=
3
)
except
Exception
:
logger
.
info
(
"waiting mooncake store server started, cost_time: %.2f seconds."
,
time
.
perf_counter
()
-
start_time
,
)
time
.
sleep
(
3
)
continue
if
check_segments_resp
.
text
==
""
:
logger
.
info
(
"waiting mooncake store server started, cost_time: %.2f seconds."
,
time
.
perf_counter
()
-
start_time
,
)
time
.
sleep
(
3
)
continue
logger
.
info
(
"Mooncake store server started successfully."
)
check_result
=
True
break
if
not
check_result
:
logger
.
error
(
"Launch mooncake store server timeout"
)
raise
ValueError
(
"Launch mooncake store server timeout"
)
def
warmup
(
self
):
warmup_key
=
"sglang_mooncake_store_warmup_key"
+
uuid
.
uuid4
().
hex
warmup_value
=
bytes
(
4
*
1024
)
# 4 KB
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment