Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
72ff142c
Unverified
Commit
72ff142c
authored
Apr 12, 2026
by
Mark McLoughlin
Committed by
GitHub
Apr 12, 2026
Browse files
[Core][Metrics] Remove `vllm:prompt_tokens_recomputed` metric (#38709)
Signed-off-by:
Mark McLoughlin
<
markmc@redhat.com
>
parent
ee3c0c83
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
6 additions
and
27 deletions
+6
-27
tests/v1/metrics/test_stats.py
tests/v1/metrics/test_stats.py
+3
-5
vllm/v1/metrics/loggers.py
vllm/v1/metrics/loggers.py
+0
-11
vllm/v1/metrics/stats.py
vllm/v1/metrics/stats.py
+3
-11
No files found.
tests/v1/metrics/test_stats.py
View file @
72ff142c
...
...
@@ -190,8 +190,7 @@ def test_prompt_token_stats_full_local_cache_recompute():
)
assert
stats
.
computed
==
1
assert
stats
.
local_cache_hit
==
1000
assert
stats
.
recomputed_tokens
==
1
assert
stats
.
local_cache_hit
==
999
def
test_prompt_token_stats_full_external_transfer_recompute
():
...
...
@@ -201,11 +200,10 @@ def test_prompt_token_stats_full_external_transfer_recompute():
# Case 6: Full external transfer (999 cached after reduction, 1 recomputed)
stats
.
update_from_output
(
num_cached_tokens
=
999
,
num_external_computed_tokens
=
1000
,
num_external_computed_tokens
=
999
,
prompt_len
=
1000
,
)
assert
stats
.
computed
==
1
assert
stats
.
local_cache_hit
==
0
assert
stats
.
external_kv_transfer
==
1000
assert
stats
.
recomputed_tokens
==
1
assert
stats
.
external_kv_transfer
==
999
vllm/v1/metrics/loggers.py
View file @
72ff142c
...
...
@@ -622,16 +622,6 @@ class PrometheusStatLogger(AggregateStatLoggerBase):
counter_prompt_tokens_cached
,
per_engine_labelvalues
)
# Recomputed tokens (last token recomputed when entire prompt is cached)
counter_prompt_tokens_recomputed
=
self
.
_counter_cls
(
name
=
"vllm:prompt_tokens_recomputed"
,
documentation
=
"Number of cached tokens recomputed for forward pass."
,
labelnames
=
labelnames
,
)
self
.
counter_prompt_tokens_recomputed
=
create_metric_per_engine
(
counter_prompt_tokens_recomputed
,
per_engine_labelvalues
)
counter_generation_tokens
=
self
.
_counter_cls
(
name
=
"vllm:generation_tokens"
,
documentation
=
"Number of generation tokens processed."
,
...
...
@@ -1122,7 +1112,6 @@ class PrometheusStatLogger(AggregateStatLoggerBase):
pts
.
get_by_source
(
source
)
)
self
.
counter_prompt_tokens_cached
[
engine_idx
].
inc
(
pts
.
cached_tokens
)
self
.
counter_prompt_tokens_recomputed
[
engine_idx
].
inc
(
pts
.
recomputed_tokens
)
self
.
counter_generation_tokens
[
engine_idx
].
inc
(
iteration_stats
.
num_generation_tokens
)
...
...
vllm/v1/metrics/stats.py
View file @
72ff142c
...
...
@@ -246,12 +246,11 @@ class PromptTokenStats:
local_cache_hit: Tokens from local prefix cache.
external_kv_transfer: Tokens from external KV transfer.
cached_tokens: Tokens skipped during prefill (from scheduler).
recomputed_tokens: Cached tokens that were recomputed (see below).
total: Total prompt tokens.
Invariants:
computed + local_cache_hit + external_kv_transfer
- recomputed_tokens
= total
local_cache_hit + external_kv_transfer
- recomputed_tokens
= cached_tokens
computed + local_cache_hit + external_kv_transfer = total
local_cache_hit + external_kv_transfer = cached_tokens
"""
ALL_SOURCES
:
tuple
[
str
,
...]
=
(
...
...
@@ -264,7 +263,6 @@ class PromptTokenStats:
local_cache_hit
:
int
=
0
external_kv_transfer
:
int
=
0
cached_tokens
:
int
=
0
recomputed_tokens
:
int
=
0
total
:
int
=
0
def
update_from_output
(
...
...
@@ -274,11 +272,6 @@ class PromptTokenStats:
prompt_len
:
int
,
)
->
None
:
"""Update stats from a prefill output."""
# When all tokens are cached, the scheduler reduces num_cached_tokens
# by 1 to force the model to recompute the last token, since the model
# needs at least one input token to run a forward pass.
recomputed
=
1
if
(
num_cached_tokens
+
1
==
prompt_len
)
else
0
self
.
computed
+=
prompt_len
-
num_cached_tokens
self
.
external_kv_transfer
+=
num_external_computed_tokens
# FIXME(yifan): local_cache_hit can go negative after preemption.
...
...
@@ -290,10 +283,9 @@ class PromptTokenStats:
# as a separate metric rather than reusing num_external_computed_tokens
# for metric directly.
self
.
local_cache_hit
+=
max
(
0
,
(
num_cached_tokens
+
recomputed
-
num_external_computed_tokens
)
0
,
(
num_cached_tokens
-
num_external_computed_tokens
)
)
self
.
cached_tokens
+=
num_cached_tokens
self
.
recomputed_tokens
+=
recomputed
self
.
total
+=
prompt_len
def
get_by_source
(
self
,
source
:
str
)
->
int
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment