Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
fd11a325
Unverified
Commit
fd11a325
authored
Apr 27, 2025
by
Ning Xie
Committed by
GitHub
Apr 26, 2025
Browse files
[MISC] rename interval to max_recent_requests (#14285)
parent
4d17e203
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
7 deletions
+7
-7
tests/v1/core/test_kv_cache_utils.py
tests/v1/core/test_kv_cache_utils.py
+1
-1
vllm/v1/core/kv_cache_utils.py
vllm/v1/core/kv_cache_utils.py
+6
-6
No files found.
tests/v1/core/test_kv_cache_utils.py
View file @
fd11a325
...
...
@@ -310,7 +310,7 @@ def test_metrics():
def
stats
(
requests
,
queries
,
hits
):
return
PrefixCacheStats
(
requests
=
requests
,
queries
=
queries
,
hits
=
hits
)
metrics
=
PrefixCachingMetrics
(
interval
=
5
)
metrics
=
PrefixCachingMetrics
(
max_recent_requests
=
5
)
assert
metrics
.
hit_rate
==
0.0
metrics
.
observe
(
stats
(
1
,
20
,
9
))
...
...
vllm/v1/core/kv_cache_utils.py
View file @
fd11a325
...
...
@@ -47,15 +47,15 @@ NONE_HASH = int.from_bytes(os.urandom(32), byteorder="big") if os.getenv(
class
PrefixCachingMetrics
:
"""Metrics for prefix caching with a hit rate of the m
ost
recent N requests.
"""Metrics for prefix caching with a hit rate of the m
ax
recent N requests.
Args:
interval
: The number of the m
ost
recent requests to aggregate.
max_recent_requests
: The number of the m
ax
recent requests to aggregate.
Defaults to 1000.
"""
def
__init__
(
self
,
interval
:
int
=
1000
):
self
.
interval
=
interval
def
__init__
(
self
,
max_recent_requests
:
int
=
1000
):
self
.
max_recent_requests
=
max_recent_requests
# The current aggregated values.
self
.
aggregated_requests
=
0
self
.
aggregated_query_total
=
0
...
...
@@ -70,7 +70,7 @@ class PrefixCachingMetrics:
are being scheduled and are looking for computed blocks.
When there are more than `interval` requests, the oldest set of
requestsare removed from the metrics.
requests
are removed from the metrics.
Args:
stats: The prefix cache stats.
...
...
@@ -87,7 +87,7 @@ class PrefixCachingMetrics:
self
.
aggregated_query_hit
+=
stats
.
hits
# Remove the oldest stats if the number of requests exceeds.
if
self
.
aggregated_requests
>
self
.
interval
:
if
self
.
aggregated_requests
>
self
.
max_recent_requests
:
old_requests
,
old_queries
,
old_hits
=
self
.
query_queue
.
popleft
()
self
.
aggregated_requests
-=
old_requests
self
.
aggregated_query_total
-=
old_queries
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment