[V0][Metrics] Deprecate some questionable request time metrics (#14135)

Signed-off-by: Mark McLoughlin <markmc@redhat.com>

[V0][Metrics] Deprecate some questionable request time metrics (#14135)
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
c8525f06 · Mark McLoughlin · GitHub · 5db6b2c9 · c8525f06
Unverified Commit c8525f06 authored Mar 04, 2025 by Mark McLoughlin Committed by GitHub Mar 04, 2025
Show whitespace changes
Inline Side-by-side

Showing with 17 additions and 6 deletions

vllm/engine/metrics.py vllm/engine/metrics.py +17 -6

No files found.
--- a/vllm/engine/metrics.py
+++ b/vllm/engine/metrics.py
@@ -197,24 +197,35 @@ class Metrics:
            "Histogram of time spent in DECODE phase for request.",
            labelnames=labelnames,
            buckets=request_latency_buckets)
+        # Deprecated in 0.8 - duplicates vllm:request_queue_time_seconds:
+        # TODO: in 0.9, only enable if show_hidden_metrics=True
        self.histogram_time_in_queue_request = self._histogram_cls(
            name="vllm:time_in_queue_requests",
-            documentation=
+            documentation=(
-            "Histogram of time the request spent in the queue in seconds.",
+                "Histogram of time the request spent in the queue in seconds. "
+                "DEPRECATED: use vllm:request_queue_time_seconds instead."),
            labelnames=labelnames,
            buckets=request_latency_buckets)
+        # Deprecated in 0.8 - use prefill/decode/inference time metrics
+        # TODO: in 0.9, only enable if show_hidden_metrics=True
        self.histogram_model_forward_time_request = self._histogram_cls(
            name="vllm:model_forward_time_milliseconds",
-            documentation=
+            documentation=(
-            "Histogram of time spent in the model forward pass in ms.",
+                "Histogram of time spent in the model forward pass in ms. "
+                "DEPRECATED: use prefill/decode/inference time metrics instead."
+            ),
            labelnames=labelnames,
            buckets=build_1_2_3_5_8_buckets(3000))
        self.histogram_model_execute_time_request = self._histogram_cls(
            name="vllm:model_execute_time_milliseconds",
-            documentation=
+            documentation=(
-            "Histogram of time spent in the model execute function in ms.",
+                "Histogram of time spent in the model execute function in ms."
+                "DEPRECATED: use prefill/decode/inference time metrics instead."
+            ),
            labelnames=labelnames,
            buckets=build_1_2_3_5_8_buckets(3000))
        #   Metadata
        self.histogram_num_prompt_tokens_request = self._histogram_cls(
            name="vllm:request_prompt_tokens",