Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
e7ebecf8
"docs/source/vscode:/vscode.git/clone" did not exist on "f9805ef11c892f3615bb56219e817f5320b870e5"
Unverified
Commit
e7ebecf8
authored
Dec 26, 2024
by
Liangsheng Yin
Committed by
GitHub
Dec 26, 2024
Browse files
Fix cache hit rate when chunked prefill (#2555)
parent
9a23c484
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
11 deletions
+8
-11
python/sglang/srt/managers/schedule_policy.py
python/sglang/srt/managers/schedule_policy.py
+1
-1
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+7
-10
No files found.
python/sglang/srt/managers/schedule_policy.py
View file @
e7ebecf8
...
@@ -248,7 +248,7 @@ class PrefillAdder:
...
@@ -248,7 +248,7 @@ class PrefillAdder:
self
.
can_run_list
.
append
(
req
)
self
.
can_run_list
.
append
(
req
)
self
.
_prefill_one_req
(
self
.
_prefill_one_req
(
len
(
req
.
prefix_indices
)
,
0
,
req
.
extend_input_len
,
req
.
extend_input_len
,
(
(
min
(
req
.
sampling_params
.
max_new_tokens
,
CLIP_MAX_NEW_TOKENS_ESTIMATION
)
min
(
req
.
sampling_params
.
max_new_tokens
,
CLIP_MAX_NEW_TOKENS_ESTIMATION
)
...
...
python/sglang/srt/managers/scheduler.py
View file @
e7ebecf8
...
@@ -629,7 +629,6 @@ class Scheduler:
...
@@ -629,7 +629,6 @@ class Scheduler:
self
.
waiting_queue
.
append
(
req
)
self
.
waiting_queue
.
append
(
req
)
def
log_prefill_stats
(
self
,
adder
,
can_run_list
,
running_bs
,
has_being_chunked
):
def
log_prefill_stats
(
self
,
adder
,
can_run_list
,
running_bs
,
has_being_chunked
):
if
isinstance
(
self
.
tree_cache
,
RadixCache
):
self
.
tree_cache_metrics
[
"total"
]
+=
(
self
.
tree_cache_metrics
[
"total"
]
+=
(
adder
.
log_input_tokens
+
adder
.
log_hit_tokens
adder
.
log_input_tokens
+
adder
.
log_hit_tokens
)
/
10
**
9
)
/
10
**
9
...
@@ -637,8 +636,6 @@ class Scheduler:
...
@@ -637,8 +636,6 @@ class Scheduler:
tree_cache_hit_rate
=
(
tree_cache_hit_rate
=
(
self
.
tree_cache_metrics
[
"hit"
]
/
self
.
tree_cache_metrics
[
"total"
]
self
.
tree_cache_metrics
[
"hit"
]
/
self
.
tree_cache_metrics
[
"total"
]
)
)
else
:
tree_cache_hit_rate
=
0.0
num_used
=
self
.
max_total_num_tokens
-
(
num_used
=
self
.
max_total_num_tokens
-
(
self
.
token_to_kv_pool
.
available_size
()
+
self
.
tree_cache
.
evictable_size
()
self
.
token_to_kv_pool
.
available_size
()
+
self
.
tree_cache
.
evictable_size
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment