Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
6e0b6468
"examples/pytorch/gin/parser.py" did not exist on "fb6af16f830231d0a185a727e551b5d9732bf26a"
Unverified
Commit
6e0b6468
authored
Aug 09, 2025
by
Zhiqiang Xie
Committed by
GitHub
Aug 09, 2025
Browse files
HiCache Storage tp fix (#8878)
parent
4a9f3eef
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
10 deletions
+17
-10
python/sglang/srt/managers/cache_controller.py
python/sglang/srt/managers/cache_controller.py
+13
-6
python/sglang/srt/mem_cache/hiradix_cache.py
python/sglang/srt/mem_cache/hiradix_cache.py
+4
-4
No files found.
python/sglang/srt/managers/cache_controller.py
View file @
6e0b6468
...
...
@@ -570,10 +570,6 @@ class HiCacheController:
)
completed_tokens
+=
self
.
page_size
else
:
# operation terminated by controller, release pre-allocated memory
self
.
mem_pool_host
.
free
(
operation
.
host_indices
[
operation
.
completed_tokens
:]
)
break
def
mooncake_page_transfer
(
self
,
operation
):
...
...
@@ -599,6 +595,14 @@ class HiCacheController:
self
.
generic_page_transfer
(
operation
,
batch_size
=
128
)
else
:
self
.
generic_page_transfer
(
operation
)
if
self
.
tp_world_size
>
1
:
# to ensure all TP workers release the host memory at the same time
torch
.
distributed
.
barrier
(
group
=
self
.
prefetch_tp_group
)
# operation terminated by controller, release pre-allocated memory
self
.
mem_pool_host
.
free
(
operation
.
host_indices
[
operation
.
completed_tokens
:]
)
except
Empty
:
continue
...
...
@@ -626,7 +630,9 @@ class HiCacheController:
continue
storage_hit_count
=
0
if
self
.
prefetch_rate_limit_check
():
if
(
operation
.
host_indices
is
not
None
)
and
self
.
prefetch_rate_limit_check
():
last_hash
=
operation
.
last_hash
tokens_to_fetch
=
operation
.
token_ids
...
...
@@ -670,7 +676,8 @@ class HiCacheController:
if
storage_hit_count
<
self
.
prefetch_threshold
:
# not to prefetch if not enough benefits
self
.
prefetch_revoke_queue
.
put
(
operation
.
request_id
)
self
.
mem_pool_host
.
free
(
operation
.
host_indices
)
if
operation
.
host_indices
is
not
None
:
self
.
mem_pool_host
.
free
(
operation
.
host_indices
)
logger
.
debug
(
f
"Revoking prefetch for request
{
operation
.
request_id
}
due to insufficient hits (
{
storage_hit_count
}
)."
)
...
...
python/sglang/srt/mem_cache/hiradix_cache.py
View file @
6e0b6468
...
...
@@ -471,6 +471,10 @@ class HiRadixCache(RadixCache):
req_id
]
if
operation
.
host_indices
is
None
:
# prefetch has not been issued due to insufficient host memory
return
True
if
not
self
.
can_terminate_prefetch
(
operation
):
return
False
...
...
@@ -565,10 +569,6 @@ class HiRadixCache(RadixCache):
if
host_indices
is
None
:
self
.
evict_host
(
prefetch_length
)
host_indices
=
self
.
cache_controller
.
mem_pool_host
.
alloc
(
prefetch_length
)
if
host_indices
is
None
:
last_host_node
.
release_host
()
# no sufficient host memory to prefetch
return
operation
=
self
.
cache_controller
.
prefetch
(
req_id
,
host_indices
,
new_input_tokens
,
last_hash
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment