Unverified Commit 86fc2321 authored by Kay Yan's avatar Kay Yan Committed by GitHub
Browse files

[Metrics] Add bucket for `request_latency`, `time_to_first_token` and...


[Metrics] Add bucket for `request_latency`, `time_to_first_token` and `time_per_output_token` (#15202)
Signed-off-by: default avatarKay Yan <kay.yan@daocloud.io>
parent 2549c0df
...@@ -156,7 +156,8 @@ class Metrics: ...@@ -156,7 +156,8 @@ class Metrics:
labelnames=labelnames, labelnames=labelnames,
buckets=[ buckets=[
0.001, 0.005, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.25, 0.5, 0.001, 0.005, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.25, 0.5,
0.75, 1.0, 2.5, 5.0, 7.5, 10.0 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 20.0, 40.0, 80.0, 160.0, 640.0,
2560.0
]) ])
self.histogram_time_per_output_token = self._histogram_cls( self.histogram_time_per_output_token = self._histogram_cls(
name="vllm:time_per_output_token_seconds", name="vllm:time_per_output_token_seconds",
...@@ -164,14 +165,14 @@ class Metrics: ...@@ -164,14 +165,14 @@ class Metrics:
labelnames=labelnames, labelnames=labelnames,
buckets=[ buckets=[
0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75, 0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75,
1.0, 2.5 1.0, 2.5, 5.0, 7.5, 10.0, 20.0, 40.0, 80.0
]) ])
# Request stats # Request stats
# Latency # Latency
request_latency_buckets = [ request_latency_buckets = [
0.3, 0.5, 0.8, 1.0, 1.5, 2.0, 2.5, 5.0, 10.0, 15.0, 20.0, 30.0, 0.3, 0.5, 0.8, 1.0, 1.5, 2.0, 2.5, 5.0, 10.0, 15.0, 20.0, 30.0,
40.0, 50.0, 60.0 40.0, 50.0, 60.0, 120.0, 240.0, 480.0, 960.0, 1920.0, 7680.0
] ]
self.histogram_e2e_time_request = self._histogram_cls( self.histogram_e2e_time_request = self._histogram_cls(
name="vllm:e2e_request_latency_seconds", name="vllm:e2e_request_latency_seconds",
......
...@@ -239,7 +239,8 @@ class PrometheusStatLogger(StatLoggerBase): ...@@ -239,7 +239,8 @@ class PrometheusStatLogger(StatLoggerBase):
documentation="Histogram of time to first token in seconds.", documentation="Histogram of time to first token in seconds.",
buckets=[ buckets=[
0.001, 0.005, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.25, 0.5, 0.001, 0.005, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.25, 0.5,
0.75, 1.0, 2.5, 5.0, 7.5, 10.0 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 20.0, 40.0, 80.0, 160.0,
640.0, 2560.0
], ],
labelnames=labelnames).labels(*labelvalues) labelnames=labelnames).labels(*labelvalues)
...@@ -249,13 +250,13 @@ class PrometheusStatLogger(StatLoggerBase): ...@@ -249,13 +250,13 @@ class PrometheusStatLogger(StatLoggerBase):
documentation="Histogram of time per output token in seconds.", documentation="Histogram of time per output token in seconds.",
buckets=[ buckets=[
0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5,
0.75, 1.0, 2.5 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 20.0, 40.0, 80.0
], ],
labelnames=labelnames).labels(*labelvalues) labelnames=labelnames).labels(*labelvalues)
request_latency_buckets = [ request_latency_buckets = [
0.3, 0.5, 0.8, 1.0, 1.5, 2.0, 2.5, 5.0, 10.0, 15.0, 20.0, 30.0, 0.3, 0.5, 0.8, 1.0, 1.5, 2.0, 2.5, 5.0, 10.0, 15.0, 20.0, 30.0,
40.0, 50.0, 60.0 40.0, 50.0, 60.0, 120.0, 240.0, 480.0, 960.0, 1920.0, 7680.0
] ]
self.histogram_e2e_time_request = \ self.histogram_e2e_time_request = \
prometheus_client.Histogram( prometheus_client.Histogram(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment