Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e5192819
Unverified
Commit
e5192819
authored
Oct 10, 2025
by
Mark McLoughlin
Committed by
GitHub
Oct 10, 2025
Browse files
[Metrics] Add test for multi-modal cache stats logging (#26588)
Signed-off-by:
Mark McLoughlin
<
markmc@redhat.com
>
parent
7b03584d
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
30 additions
and
4 deletions
+30
-4
tests/entrypoints/llm/test_mm_cache_stats.py
tests/entrypoints/llm/test_mm_cache_stats.py
+24
-0
vllm/v1/metrics/loggers.py
vllm/v1/metrics/loggers.py
+1
-4
vllm/v1/metrics/stats.py
vllm/v1/metrics/stats.py
+5
-0
No files found.
tests/entrypoints/llm/test_mm_cache_stats.py
View file @
e5192819
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
logging
import
pytest
import
pytest
import
regex
as
re
from
vllm
import
LLM
from
vllm
import
LLM
from
vllm.entrypoints.chat_utils
import
ChatCompletionMessageParam
from
vllm.entrypoints.chat_utils
import
ChatCompletionMessageParam
from
vllm.v1.metrics
import
loggers
as
stat_loggers
from
vllm.v1.metrics.reader
import
Counter
,
Metric
from
vllm.v1.metrics.reader
import
Counter
,
Metric
from
..openai.test_vision
import
TEST_IMAGE_ASSETS
from
..openai.test_vision
import
TEST_IMAGE_ASSETS
...
@@ -37,12 +41,27 @@ def _get_mm_cache_stats(metrics: list[Metric]):
...
@@ -37,12 +41,27 @@ def _get_mm_cache_stats(metrics: list[Metric]):
return
mm_cache_queries
,
mm_cache_hits
return
mm_cache_queries
,
mm_cache_hits
def
_get_mm_cache_log
(
llm
:
LLM
,
caplog_vllm
:
pytest
.
LogCaptureFixture
)
->
float
:
caplog_vllm
.
clear
()
with
caplog_vllm
.
at_level
(
logging
.
INFO
,
logger
=
stat_loggers
.
__name__
):
llm
.
llm_engine
.
do_log_stats
()
assert
len
(
caplog_vllm
.
records
)
==
1
msg
=
caplog_vllm
.
records
[
0
].
getMessage
()
assert
"MM cache hit rate"
in
msg
match
=
re
.
search
(
r
"MM cache hit rate: ([0-9.]+)%"
,
msg
)
assert
match
is
not
None
return
float
(
match
.
group
(
1
))
@
pytest
.
mark
.
parametrize
(
"image_urls"
,
[
TEST_IMAGE_ASSETS
[:
2
]],
indirect
=
True
)
@
pytest
.
mark
.
parametrize
(
"image_urls"
,
[
TEST_IMAGE_ASSETS
[:
2
]],
indirect
=
True
)
@
pytest
.
mark
.
parametrize
(
"mm_processor_cache_type"
,
[
"lru"
,
"shm"
])
@
pytest
.
mark
.
parametrize
(
"mm_processor_cache_type"
,
[
"lru"
,
"shm"
])
def
test_mm_cache_stats
(
def
test_mm_cache_stats
(
num_gpus_available
,
num_gpus_available
,
image_urls
,
image_urls
,
mm_processor_cache_type
,
mm_processor_cache_type
,
caplog_vllm
,
):
):
llm
=
LLM
(
llm
=
LLM
(
model
=
"llava-hf/llava-1.5-7b-hf"
,
model
=
"llava-hf/llava-1.5-7b-hf"
,
...
@@ -56,12 +75,15 @@ def test_mm_cache_stats(
...
@@ -56,12 +75,15 @@ def test_mm_cache_stats(
llm
.
chat
(
_make_messages
(
image_urls
[
0
]))
llm
.
chat
(
_make_messages
(
image_urls
[
0
]))
assert
_get_mm_cache_stats
(
llm
.
get_metrics
())
==
(
1
,
0
)
assert
_get_mm_cache_stats
(
llm
.
get_metrics
())
==
(
1
,
0
)
assert
_get_mm_cache_log
(
llm
,
caplog_vllm
)
==
pytest
.
approx
(
0.0
)
llm
.
chat
(
_make_messages
(
image_urls
[
1
]))
llm
.
chat
(
_make_messages
(
image_urls
[
1
]))
assert
_get_mm_cache_stats
(
llm
.
get_metrics
())
==
(
2
,
0
)
assert
_get_mm_cache_stats
(
llm
.
get_metrics
())
==
(
2
,
0
)
assert
_get_mm_cache_log
(
llm
,
caplog_vllm
)
==
pytest
.
approx
(
0.0
)
llm
.
chat
(
_make_messages
(
image_urls
[
0
]))
llm
.
chat
(
_make_messages
(
image_urls
[
0
]))
assert
_get_mm_cache_stats
(
llm
.
get_metrics
())
==
(
3
,
1
)
assert
_get_mm_cache_stats
(
llm
.
get_metrics
())
==
(
3
,
1
)
assert
_get_mm_cache_log
(
llm
,
caplog_vllm
)
==
pytest
.
approx
(
33.3
)
# NOTE: This only resets hit rate stats in CachingMetrics
# NOTE: This only resets hit rate stats in CachingMetrics
# The raw queries and hits counts remain unaffected
# The raw queries and hits counts remain unaffected
...
@@ -69,6 +91,8 @@ def test_mm_cache_stats(
...
@@ -69,6 +91,8 @@ def test_mm_cache_stats(
llm
.
chat
(
_make_messages
(
image_urls
[
0
]))
llm
.
chat
(
_make_messages
(
image_urls
[
0
]))
assert
_get_mm_cache_stats
(
llm
.
get_metrics
())
==
(
4
,
1
)
assert
_get_mm_cache_stats
(
llm
.
get_metrics
())
==
(
4
,
1
)
assert
_get_mm_cache_log
(
llm
,
caplog_vllm
)
==
pytest
.
approx
(
0.0
)
llm
.
chat
(
_make_messages
(
image_urls
[
1
]))
llm
.
chat
(
_make_messages
(
image_urls
[
1
]))
assert
_get_mm_cache_stats
(
llm
.
get_metrics
())
==
(
5
,
1
)
assert
_get_mm_cache_stats
(
llm
.
get_metrics
())
==
(
5
,
1
)
assert
_get_mm_cache_log
(
llm
,
caplog_vllm
)
==
pytest
.
approx
(
0.0
)
vllm/v1/metrics/loggers.py
View file @
e5192819
...
@@ -60,7 +60,6 @@ class LoggingStatLogger(StatLoggerBase):
...
@@ -60,7 +60,6 @@ class LoggingStatLogger(StatLoggerBase):
self
.
_reset
(
time
.
monotonic
())
self
.
_reset
(
time
.
monotonic
())
self
.
last_scheduler_stats
=
SchedulerStats
()
self
.
last_scheduler_stats
=
SchedulerStats
()
self
.
last_mm_cache_stats
:
Optional
[
MultiModalCacheStats
]
=
None
# Caching metrics. This cannot be reset.
# Caching metrics. This cannot be reset.
# TODO: Make the interval configurable.
# TODO: Make the interval configurable.
...
@@ -115,8 +114,6 @@ class LoggingStatLogger(StatLoggerBase):
...
@@ -115,8 +114,6 @@ class LoggingStatLogger(StatLoggerBase):
if
mm_cache_stats
:
if
mm_cache_stats
:
self
.
mm_caching_metrics
.
observe
(
mm_cache_stats
)
self
.
mm_caching_metrics
.
observe
(
mm_cache_stats
)
self
.
last_mm_cache_stats
=
mm_cache_stats
def
log
(
self
):
def
log
(
self
):
now
=
time
.
monotonic
()
now
=
time
.
monotonic
()
prompt_throughput
=
self
.
_get_throughput
(
self
.
num_prompt_tokens
,
now
)
prompt_throughput
=
self
.
_get_throughput
(
self
.
num_prompt_tokens
,
now
)
...
@@ -157,7 +154,7 @@ class LoggingStatLogger(StatLoggerBase):
...
@@ -157,7 +154,7 @@ class LoggingStatLogger(StatLoggerBase):
scheduler_stats
.
kv_cache_usage
*
100
,
scheduler_stats
.
kv_cache_usage
*
100
,
self
.
prefix_caching_metrics
.
hit_rate
*
100
,
self
.
prefix_caching_metrics
.
hit_rate
*
100
,
]
]
if
self
.
last_
mm_cach
e_stats
:
if
not
self
.
mm_cach
ing_metrics
.
empty
:
log_parts
.
append
(
"MM cache hit rate: %.1f%%"
)
log_parts
.
append
(
"MM cache hit rate: %.1f%%"
)
log_args
.
append
(
self
.
mm_caching_metrics
.
hit_rate
*
100
)
log_args
.
append
(
self
.
mm_caching_metrics
.
hit_rate
*
100
)
...
...
vllm/v1/metrics/stats.py
View file @
e5192819
...
@@ -96,6 +96,11 @@ class CachingMetrics:
...
@@ -96,6 +96,11 @@ class CachingMetrics:
self
.
aggregated_query_hit
=
0
self
.
aggregated_query_hit
=
0
self
.
query_queue
.
clear
()
self
.
query_queue
.
clear
()
@
property
def
empty
(
self
)
->
bool
:
"""Return true if no requests have been observed."""
return
self
.
aggregated_requests
==
0
@
property
@
property
def
hit_rate
(
self
)
->
float
:
def
hit_rate
(
self
)
->
float
:
"""Calculate the hit rate for the past N requests."""
"""Calculate the hit rate for the past N requests."""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment