Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c8525f06
Unverified
Commit
c8525f06
authored
Mar 04, 2025
by
Mark McLoughlin
Committed by
GitHub
Mar 04, 2025
Browse files
[V0][Metrics] Deprecate some questionable request time metrics (#14135)
Signed-off-by:
Mark McLoughlin
<
markmc@redhat.com
>
parent
5db6b2c9
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
17 additions
and
6 deletions
+17
-6
vllm/engine/metrics.py
vllm/engine/metrics.py
+17
-6
No files found.
vllm/engine/metrics.py
View file @
c8525f06
...
@@ -197,24 +197,35 @@ class Metrics:
...
@@ -197,24 +197,35 @@ class Metrics:
"Histogram of time spent in DECODE phase for request."
,
"Histogram of time spent in DECODE phase for request."
,
labelnames
=
labelnames
,
labelnames
=
labelnames
,
buckets
=
request_latency_buckets
)
buckets
=
request_latency_buckets
)
# Deprecated in 0.8 - duplicates vllm:request_queue_time_seconds:
# TODO: in 0.9, only enable if show_hidden_metrics=True
self
.
histogram_time_in_queue_request
=
self
.
_histogram_cls
(
self
.
histogram_time_in_queue_request
=
self
.
_histogram_cls
(
name
=
"vllm:time_in_queue_requests"
,
name
=
"vllm:time_in_queue_requests"
,
documentation
=
documentation
=
(
"Histogram of time the request spent in the queue in seconds."
,
"Histogram of time the request spent in the queue in seconds. "
"DEPRECATED: use vllm:request_queue_time_seconds instead."
),
labelnames
=
labelnames
,
labelnames
=
labelnames
,
buckets
=
request_latency_buckets
)
buckets
=
request_latency_buckets
)
# Deprecated in 0.8 - use prefill/decode/inference time metrics
# TODO: in 0.9, only enable if show_hidden_metrics=True
self
.
histogram_model_forward_time_request
=
self
.
_histogram_cls
(
self
.
histogram_model_forward_time_request
=
self
.
_histogram_cls
(
name
=
"vllm:model_forward_time_milliseconds"
,
name
=
"vllm:model_forward_time_milliseconds"
,
documentation
=
documentation
=
(
"Histogram of time spent in the model forward pass in ms."
,
"Histogram of time spent in the model forward pass in ms. "
"DEPRECATED: use prefill/decode/inference time metrics instead."
),
labelnames
=
labelnames
,
labelnames
=
labelnames
,
buckets
=
build_1_2_3_5_8_buckets
(
3000
))
buckets
=
build_1_2_3_5_8_buckets
(
3000
))
self
.
histogram_model_execute_time_request
=
self
.
_histogram_cls
(
self
.
histogram_model_execute_time_request
=
self
.
_histogram_cls
(
name
=
"vllm:model_execute_time_milliseconds"
,
name
=
"vllm:model_execute_time_milliseconds"
,
documentation
=
documentation
=
(
"Histogram of time spent in the model execute function in ms."
,
"Histogram of time spent in the model execute function in ms."
"DEPRECATED: use prefill/decode/inference time metrics instead."
),
labelnames
=
labelnames
,
labelnames
=
labelnames
,
buckets
=
build_1_2_3_5_8_buckets
(
3000
))
buckets
=
build_1_2_3_5_8_buckets
(
3000
))
# Metadata
# Metadata
self
.
histogram_num_prompt_tokens_request
=
self
.
_histogram_cls
(
self
.
histogram_num_prompt_tokens_request
=
self
.
_histogram_cls
(
name
=
"vllm:request_prompt_tokens"
,
name
=
"vllm:request_prompt_tokens"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment