Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
528bd1ed
Unverified
Commit
528bd1ed
authored
Jul 26, 2025
by
Zhiqiang Xie
Committed by
GitHub
Jul 26, 2025
Browse files
HiCache, check before terminate prefetching (#8372)
parent
62a6b7c7
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
9 deletions
+14
-9
python/sglang/srt/managers/cache_controller.py
python/sglang/srt/managers/cache_controller.py
+9
-7
python/sglang/srt/mem_cache/hiradix_cache.py
python/sglang/srt/mem_cache/hiradix_cache.py
+5
-2
No files found.
python/sglang/srt/managers/cache_controller.py
View file @
528bd1ed
...
@@ -201,8 +201,9 @@ class PrefetchOperation(StorageOperation):
...
@@ -201,8 +201,9 @@ class PrefetchOperation(StorageOperation):
def
increment
(
self
,
num_tokens
:
int
):
def
increment
(
self
,
num_tokens
:
int
):
with
self
.
_lock
:
with
self
.
_lock
:
if
self
.
_done_flag
:
if
self
.
_done_flag
:
return
return
False
self
.
completed_tokens
+=
num_tokens
self
.
completed_tokens
+=
num_tokens
return
True
def
mark_done
(
self
):
def
mark_done
(
self
):
with
self
.
_lock
:
with
self
.
_lock
:
...
@@ -528,12 +529,12 @@ class HiCacheController:
...
@@ -528,12 +529,12 @@ class HiCacheController:
f
"Prefetch operation
{
operation
.
request_id
}
failed to retrieve page
{
h
}
."
f
"Prefetch operation
{
operation
.
request_id
}
failed to retrieve page
{
h
}
."
)
)
break
break
self
.
mem_pool_host
.
set_from_flat_data_page
(
if
operation
.
increment
(
self
.
page_size
):
operation
.
host_indices
[
operation
.
completed_tokens
],
self
.
mem_pool_host
.
set_from_flat_data_page
(
page_data
,
operation
.
host_indices
[
operation
.
completed_tokens
]
,
)
page_data
,
operation
.
increment
(
self
.
page_size
)
)
if
operation
.
is_done
()
:
else
:
# operation terminated by controller, release pre-allocated memory
# operation terminated by controller, release pre-allocated memory
self
.
mem_pool_host
.
free
(
self
.
mem_pool_host
.
free
(
operation
.
host_indices
[
operation
.
completed_tokens
:]
operation
.
host_indices
[
operation
.
completed_tokens
:]
...
@@ -589,6 +590,7 @@ class HiCacheController:
...
@@ -589,6 +590,7 @@ class HiCacheController:
if
storage_hit_count
<
self
.
prefetch_threshold
:
if
storage_hit_count
<
self
.
prefetch_threshold
:
# not to prefetch if not enough benefits
# not to prefetch if not enough benefits
self
.
prefetch_revoke_queue
.
put
(
operation
.
request_id
)
self
.
prefetch_revoke_queue
.
put
(
operation
.
request_id
)
self
.
mem_pool_host
.
free
(
operation
.
host_indices
)
logger
.
debug
(
logger
.
debug
(
f
"Revoking prefetch for request
{
operation
.
request_id
}
due to insufficient hits (
{
storage_hit_count
}
)."
f
"Revoking prefetch for request
{
operation
.
request_id
}
due to insufficient hits (
{
storage_hit_count
}
)."
)
)
...
...
python/sglang/srt/mem_cache/hiradix_cache.py
View file @
528bd1ed
...
@@ -365,10 +365,12 @@ class HiRadixCache(RadixCache):
...
@@ -365,10 +365,12 @@ class HiRadixCache(RadixCache):
for
_
in
range
(
queue_size
.
item
()):
for
_
in
range
(
queue_size
.
item
()):
req_id
=
self
.
cache_controller
.
prefetch_revoke_queue
.
get
()
req_id
=
self
.
cache_controller
.
prefetch_revoke_queue
.
get
()
if
req_id
in
self
.
ongoing_prefetch
:
if
req_id
in
self
.
ongoing_prefetch
:
last_host_node
,
_
,
host_indices
,
_
=
self
.
ongoing_prefetch
[
req_id
]
last_host_node
,
_
,
_
,
_
=
self
.
ongoing_prefetch
[
req_id
]
last_host_node
.
release_host
()
last_host_node
.
release_host
()
self
.
cache_controller
.
mem_pool_host
.
free
(
host_indices
)
del
self
.
ongoing_prefetch
[
req_id
]
del
self
.
ongoing_prefetch
[
req_id
]
else
:
# the revoked operation already got terminated
pass
def
check_backup_progress
(
self
):
def
check_backup_progress
(
self
):
queue_size
=
torch
.
tensor
(
queue_size
=
torch
.
tensor
(
...
@@ -403,6 +405,7 @@ class HiRadixCache(RadixCache):
...
@@ -403,6 +405,7 @@ class HiRadixCache(RadixCache):
last_host_node
,
token_ids
,
host_indices
,
operation
=
self
.
ongoing_prefetch
[
last_host_node
,
token_ids
,
host_indices
,
operation
=
self
.
ongoing_prefetch
[
req_id
req_id
]
]
completed_tokens
,
hash_value
=
self
.
cache_controller
.
terminate_prefetch
(
completed_tokens
,
hash_value
=
self
.
cache_controller
.
terminate_prefetch
(
operation
operation
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment