Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d9737ca1
Unverified
Commit
d9737ca1
authored
Apr 19, 2025
by
vie-serendipity
Committed by
GitHub
Apr 19, 2025
Browse files
[V1][Misc] stop update prefix cache stats when logs_stats is disabled (#16460)
Signed-off-by:
vie-serendipity
<
2733147505@qq.com
>
parent
9d4ca19d
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
44 additions
and
12 deletions
+44
-12
tests/v1/core/test_prefix_caching.py
tests/v1/core/test_prefix_caching.py
+22
-0
vllm/v1/core/kv_cache_manager.py
vllm/v1/core/kv_cache_manager.py
+19
-11
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/scheduler.py
+3
-1
No files found.
tests/v1/core/test_prefix_caching.py
View file @
d9737ca1
...
@@ -751,3 +751,25 @@ def test_reset_prefix_cache():
...
@@ -751,3 +751,25 @@ def test_reset_prefix_cache():
assert
manager
.
reset_prefix_cache
()
assert
manager
.
reset_prefix_cache
()
assert
not
manager
.
block_pool
.
cached_block_hash_to_block
assert
not
manager
.
block_pool
.
cached_block_hash_to_block
assert
all
([
blk
.
block_hash
is
None
for
blk
in
manager
.
block_pool
.
blocks
])
assert
all
([
blk
.
block_hash
is
None
for
blk
in
manager
.
block_pool
.
blocks
])
def
test_prefix_cache_stats_disabled
():
"""Test that prefix_cache_stats is None when log_stats is False."""
manager
=
KVCacheManager
(
make_kv_cache_config
(
16
,
11
),
max_model_len
=
8192
,
enable_caching
=
True
,
log_stats
=
False
,
# Disable logging stats
)
assert
manager
.
prefix_cache_stats
is
None
# Call all functions that check whether log_stats is disabled.
req
=
make_request
(
"0"
,
list
(
range
(
16
)))
computed_blocks
,
num_computed_tokens
=
manager
.
get_computed_blocks
(
req
)
assert
not
computed_blocks
assert
num_computed_tokens
==
0
manager
.
allocate_slots
(
req
,
16
,
computed_blocks
)
manager
.
reset_prefix_cache
()
# Ensure prefix_cache_stats remains None
assert
manager
.
prefix_cache_stats
is
None
vllm/v1/core/kv_cache_manager.py
View file @
d9737ca1
...
@@ -39,8 +39,9 @@ class KVCacheManager:
...
@@ -39,8 +39,9 @@ class KVCacheManager:
self
.
enable_caching
=
enable_caching
self
.
enable_caching
=
enable_caching
self
.
caching_hash_fn
=
sha256
if
caching_hash_algo
==
"sha256"
else
hash
self
.
caching_hash_fn
=
sha256
if
caching_hash_algo
==
"sha256"
else
hash
# FIXME: make prefix cache stats conditional on log_stats
self
.
log_stats
=
log_stats
self
.
log_stats
=
log_stats
# FIXME: make prefix cache stats conditional on log_stats
self
.
prefix_cache_stats
=
PrefixCacheStats
()
if
log_stats
else
None
# NOTE(woosuk): To avoid frequent block allocation, we preallocate some
# NOTE(woosuk): To avoid frequent block allocation, we preallocate some
# blocks for each request. For example, when a request reaches the end
# blocks for each request. For example, when a request reaches the end
# of its block table, we preallocate N blocks in advance. This way, we
# of its block table, we preallocate N blocks in advance. This way, we
...
@@ -79,7 +80,6 @@ class KVCacheManager:
...
@@ -79,7 +80,6 @@ class KVCacheManager:
# This is only used to track the RUNNING requests, we do not track the
# This is only used to track the RUNNING requests, we do not track the
# data for reempted ones.
# data for reempted ones.
self
.
num_cached_block
:
dict
[
str
,
int
]
=
{}
self
.
num_cached_block
:
dict
[
str
,
int
]
=
{}
self
.
prefix_cache_stats
=
PrefixCacheStats
()
@
property
@
property
def
usage
(
self
)
->
float
:
def
usage
(
self
)
->
float
:
...
@@ -90,12 +90,14 @@ class KVCacheManager:
...
@@ -90,12 +90,14 @@ class KVCacheManager:
"""
"""
return
self
.
block_pool
.
get_usage
()
return
self
.
block_pool
.
get_usage
()
def
make_prefix_cache_stats
(
self
)
->
PrefixCacheStats
:
def
make_prefix_cache_stats
(
self
)
->
Optional
[
PrefixCacheStats
]
:
"""Get (and reset) the prefix cache stats.
"""Get (and reset) the prefix cache stats.
Returns:
Returns:
The current prefix caching stats.
The current prefix caching stats
, or None if logging is disabled
.
"""
"""
if
not
self
.
log_stats
:
return
None
stats
=
self
.
prefix_cache_stats
stats
=
self
.
prefix_cache_stats
self
.
prefix_cache_stats
=
PrefixCacheStats
()
self
.
prefix_cache_stats
=
PrefixCacheStats
()
return
stats
return
stats
...
@@ -125,6 +127,8 @@ class KVCacheManager:
...
@@ -125,6 +127,8 @@ class KVCacheManager:
self
.
block_size
,
request
)
self
.
block_size
,
request
)
self
.
req_to_block_hashes
[
request
.
request_id
]
=
block_hashes
self
.
req_to_block_hashes
[
request
.
request_id
]
=
block_hashes
if
self
.
log_stats
:
assert
self
.
prefix_cache_stats
is
not
None
self
.
prefix_cache_stats
.
requests
+=
1
self
.
prefix_cache_stats
.
requests
+=
1
# When the request requires prompt logprobs, we skip prefix caching.
# When the request requires prompt logprobs, we skip prefix caching.
if
request
.
sampling_params
.
prompt_logprobs
is
not
None
:
if
request
.
sampling_params
.
prompt_logprobs
is
not
None
:
...
@@ -145,6 +149,8 @@ class KVCacheManager:
...
@@ -145,6 +149,8 @@ class KVCacheManager:
computed_blocks
=
(
computed_blocks
=
(
self
.
specialized_manager
.
find_longest_cache_hit
(
block_hashes
))
self
.
specialized_manager
.
find_longest_cache_hit
(
block_hashes
))
if
self
.
log_stats
:
assert
self
.
prefix_cache_stats
is
not
None
self
.
prefix_cache_stats
.
queries
+=
len
(
block_hashes
)
self
.
prefix_cache_stats
.
queries
+=
len
(
block_hashes
)
self
.
prefix_cache_stats
.
hits
+=
len
(
computed_blocks
)
self
.
prefix_cache_stats
.
hits
+=
len
(
computed_blocks
)
...
@@ -317,17 +323,19 @@ class KVCacheManager:
...
@@ -317,17 +323,19 @@ class KVCacheManager:
def
reset_prefix_cache
(
self
)
->
bool
:
def
reset_prefix_cache
(
self
)
->
bool
:
"""Reset prefix cache. This function may be used in RLHF
"""Reset prefix cache. This function may be used in RLHF
flows to invalid prefix caching after the weights are updated,
flows to invalid
ate
prefix caching after the weights are updated,
or used for resetting prefix caching status for benchmarking.
or used for resetting prefix caching status for benchmarking.
Returns:
Returns:
bool: True if the prefix cache is successfully reset,
bool: True if the prefix cache is successfully reset,
False otherwise.
False otherwise.
"""
"""
if
self
.
block_pool
.
reset_prefix_cache
():
if
not
self
.
block_pool
.
reset_prefix_cache
():
return
False
if
self
.
log_stats
:
assert
self
.
prefix_cache_stats
is
not
None
self
.
prefix_cache_stats
.
reset
=
True
self
.
prefix_cache_stats
.
reset
=
True
return
True
return
True
return
False
def
get_num_common_prefix_blocks
(
def
get_num_common_prefix_blocks
(
self
,
self
,
...
...
vllm/v1/core/sched/scheduler.py
View file @
d9737ca1
...
@@ -798,11 +798,13 @@ class Scheduler(SchedulerInterface):
...
@@ -798,11 +798,13 @@ class Scheduler(SchedulerInterface):
)
->
Optional
[
SchedulerStats
]:
)
->
Optional
[
SchedulerStats
]:
if
not
self
.
log_stats
:
if
not
self
.
log_stats
:
return
None
return
None
prefix_cache_stats
=
self
.
kv_cache_manager
.
make_prefix_cache_stats
()
assert
prefix_cache_stats
is
not
None
return
SchedulerStats
(
return
SchedulerStats
(
num_running_reqs
=
len
(
self
.
running
),
num_running_reqs
=
len
(
self
.
running
),
num_waiting_reqs
=
len
(
self
.
waiting
),
num_waiting_reqs
=
len
(
self
.
waiting
),
gpu_cache_usage
=
self
.
kv_cache_manager
.
usage
,
gpu_cache_usage
=
self
.
kv_cache_manager
.
usage
,
prefix_cache_stats
=
self
.
kv_cache_manager
.
make_
prefix_cache_stats
()
,
prefix_cache_stats
=
prefix_cache_stats
,
spec_decoding_stats
=
spec_decoding_stats
,
spec_decoding_stats
=
spec_decoding_stats
,
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment