Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
43de1d73
"vscode:/vscode.git/clone" did not exist on "7e0107c395b24fa55e080dee3903552430008b48"
Unverified
Commit
43de1d73
authored
Aug 26, 2025
by
Zhiqiang Xie
Committed by
GitHub
Aug 26, 2025
Browse files
HiCache Storage fix host memory leak (#9648)
parent
79ce3688
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
8 additions
and
4 deletions
+8
-4
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+5
-4
python/sglang/srt/mem_cache/hiradix_cache.py
python/sglang/srt/mem_cache/hiradix_cache.py
+2
-0
python/sglang/srt/mem_cache/radix_cache.py
python/sglang/srt/mem_cache/radix_cache.py
+1
-0
No files found.
python/sglang/srt/managers/scheduler.py
View file @
43de1d73
...
...
@@ -1296,10 +1296,11 @@ class Scheduler(
def
_prefetch_kvcache
(
self
,
req
:
Req
):
if
self
.
enable_hicache_storage
:
req
.
init_next_round_input
(
self
.
tree_cache
)
last_hash
=
req
.
last_host_node
.
get_last_hash_value
()
matched_len
=
len
(
req
.
prefix_indices
)
+
req
.
host_hit_length
# todo, free-form fetching, calculating hash keys on the fly
if
(
matched_len
>
0
and
last_hash
is
not
None
)
or
matched_len
==
0
:
if
req
.
last_node
.
backuped
:
# only to initiate the prefetch if the last node is backuped
# otherwise, the allocated GPU memory must be locked for integrity
last_hash
=
req
.
last_host_node
.
get_last_hash_value
()
matched_len
=
len
(
req
.
prefix_indices
)
+
req
.
host_hit_length
new_input_tokens
=
req
.
fill_ids
[
matched_len
:]
self
.
tree_cache
.
prefetch_from_storage
(
req
.
rid
,
req
.
last_host_node
,
new_input_tokens
,
last_hash
...
...
python/sglang/srt/mem_cache/hiradix_cache.py
View file @
43de1d73
...
...
@@ -536,6 +536,8 @@ class HiRadixCache(RadixCache):
while
last_node
.
evicted
:
host_hit_length
+=
len
(
last_node
.
host_value
)
last_node
=
last_node
.
parent
while
not
last_host_node
.
backuped
:
last_host_node
=
last_host_node
.
parent
return
MatchResult
(
device_indices
=
value
,
...
...
python/sglang/srt/mem_cache/radix_cache.py
View file @
43de1d73
...
...
@@ -152,6 +152,7 @@ class RadixCache(BasePrefixCache):
self
.
root_node
=
TreeNode
()
self
.
root_node
.
key
=
[]
self
.
root_node
.
value
=
[]
self
.
root_node
.
host_value
=
[]
self
.
root_node
.
lock_ref
=
1
self
.
evictable_size_
=
0
self
.
protected_size_
=
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment