Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8bf8f458
Unverified
Commit
8bf8f458
authored
Sep 26, 2025
by
Zhuohan Li
Committed by
GitHub
Sep 27, 2025
Browse files
[Core] Don't count preempted tokens in prefix cache hit rate (#25787)
Signed-off-by:
Zhuohan Li
<
zhuohan123@gmail.com
>
parent
6f5c0931
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
60 additions
and
41 deletions
+60
-41
vllm/v1/core/kv_cache_manager.py
vllm/v1/core/kv_cache_manager.py
+16
-8
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/scheduler.py
+34
-32
vllm/v1/metrics/stats.py
vllm/v1/metrics/stats.py
+7
-1
vllm/v1/request.py
vllm/v1/request.py
+3
-0
No files found.
vllm/v1/core/kv_cache_manager.py
View file @
8bf8f458
...
...
@@ -184,6 +184,14 @@ class KVCacheManager:
if
self
.
log_stats
:
assert
self
.
prefix_cache_stats
is
not
None
if
request
.
num_preemptions
>
0
:
# Previously preempted request
self
.
prefix_cache_stats
.
preempted_requests
+=
1
self
.
prefix_cache_stats
.
preempted_queries
+=
request
.
num_tokens
self
.
prefix_cache_stats
.
preempted_hits
+=
(
num_new_computed_tokens
)
else
:
# New request
self
.
prefix_cache_stats
.
requests
+=
1
self
.
prefix_cache_stats
.
queries
+=
request
.
num_tokens
self
.
prefix_cache_stats
.
hits
+=
num_new_computed_tokens
...
...
vllm/v1/core/sched/scheduler.py
View file @
8bf8f458
...
...
@@ -251,12 +251,17 @@ class Scheduler(SchedulerInterface):
req_index
+=
1
continue
# Schedule newly needed KV blocks for the request.
while
True
:
new_blocks
=
self
.
kv_cache_manager
.
allocate_slots
(
request
,
num_new_tokens
,
num_lookahead_tokens
=
self
.
num_lookahead_tokens
)
if
new_blocks
is
None
:
if
new_blocks
is
not
None
:
# The request can be scheduled.
break
# The request cannot be scheduled.
# Preempt the lowest-priority request.
if
self
.
policy
==
SchedulingPolicy
.
PRIORITY
:
...
...
@@ -274,23 +279,20 @@ class Scheduler(SchedulerInterface):
self
.
encoder_cache_manager
.
free
(
preempted_req
)
preempted_req
.
status
=
RequestStatus
.
PREEMPTED
preempted_req
.
num_computed_tokens
=
0
preempted_req
.
num_preemptions
+=
1
if
self
.
log_stats
:
preempted_req
.
record_event
(
EngineCoreEventType
.
PREEMPTED
,
scheduled_timestamp
)
preempted_req
.
record_event
(
EngineCoreEventType
.
PREEMPTED
,
scheduled_timestamp
)
self
.
waiting
.
prepend_request
(
preempted_req
)
preempted_reqs
.
append
(
preempted_req
)
if
preempted_req
==
request
:
# No more request to preempt.
can_schedule
=
False
# No more request to preempt. Cannot schedule this request.
break
else
:
# The request can be scheduled.
can_schedule
=
True
break
if
not
can_schedule
:
if
new_blocks
is
None
:
# Cannot schedule this request.
break
assert
new_blocks
is
not
None
# Schedule the request.
scheduled_running_reqs
.
append
(
request
)
...
...
vllm/v1/metrics/stats.py
View file @
8bf8f458
...
...
@@ -17,13 +17,19 @@ class PrefixCacheStats:
"""Stores prefix cache hit statistics."""
# Whether reset_prefix_cache was invoked.
reset
:
bool
=
False
# The number of requests in this update.
# The number of
new
requests in this update.
requests
:
int
=
0
# The number of queries in these requests. Note that "queries" here
# means the number of tokens that were queried from the cache.
queries
:
int
=
0
# The number of hits in these requests.
hits
:
int
=
0
# The number of previously preempted requests in this update.
preempted_requests
:
int
=
0
# The `queries` number for preempted requests.
preempted_queries
:
int
=
0
# The `hits` number for preempted requests.
preempted_hits
:
int
=
0
@
dataclass
...
...
vllm/v1/request.py
View file @
8bf8f458
...
...
@@ -115,6 +115,9 @@ class Request:
# indicates that the output is corrupted
self
.
num_nans_in_logits
=
0
# The number of requests being preempted by the scheduler
self
.
num_preemptions
=
0
self
.
block_hashes
:
list
[
BlockHash
]
=
[]
self
.
get_hash_new_full_blocks
:
Optional
[
Callable
[
[],
list
[
BlockHash
]]]
=
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment