Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ac04a97a
Unverified
Commit
ac04a97a
authored
Nov 05, 2024
by
tomeras91
Committed by
GitHub
Nov 04, 2024
Browse files
[Frontend] Add max_tokens prometheus metric (#9881)
Signed-off-by:
Tomer Asida
<
tomera@ai21.com
>
parent
9a5664d4
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
23 additions
and
2 deletions
+23
-2
tests/entrypoints/openai/test_metrics.py
tests/entrypoints/openai/test_metrics.py
+9
-2
tests/metrics/test_metrics.py
tests/metrics/test_metrics.py
+1
-0
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+4
-0
vllm/engine/metrics.py
vllm/engine/metrics.py
+8
-0
vllm/engine/metrics_types.py
vllm/engine/metrics_types.py
+1
-0
No files found.
tests/entrypoints/openai/test_metrics.py
View file @
ac04a97a
...
...
@@ -70,10 +70,14 @@ EXPECTED_VALUES = {
[(
"_sum"
,
_NUM_REQUESTS
*
_NUM_GENERATION_TOKENS_PER_REQUEST
),
(
"_count"
,
_NUM_REQUESTS
)],
"vllm:request_params_n"
:
[(
"_count"
,
_NUM_REQUESTS
)],
"vllm:request_params_max_tokens"
:
[(
"_sum"
,
_NUM_REQUESTS
*
_NUM_GENERATION_TOKENS_PER_REQUEST
),
(
"_count"
,
_NUM_REQUESTS
)],
"vllm:prompt_tokens"
:
[(
"_total"
,
_NUM_REQUESTS
*
_NUM_PROMPT_TOKENS_PER_REQUEST
)],
"vllm:generation_tokens"
:
[(
"_total"
,
_NUM_REQUESTS
*
_NUM_PROMPT_TOKENS_PER_REQUEST
)],
"vllm:generation_tokens"
:
[
(
"_total"
,
_NUM_REQUESTS
*
_NUM_PROMPT_TOKENS_PER_REQUEST
)
],
"vllm:request_success"
:
[(
"_total"
,
_NUM_REQUESTS
)],
}
...
...
@@ -149,6 +153,9 @@ EXPECTED_METRICS = [
"vllm:request_params_n_sum"
,
"vllm:request_params_n_bucket"
,
"vllm:request_params_n_count"
,
"vllm:request_params_max_tokens_sum"
,
"vllm:request_params_max_tokens_bucket"
,
"vllm:request_params_max_tokens_count"
,
"vllm:num_preemptions_total"
,
"vllm:prompt_tokens_total"
,
"vllm:generation_tokens_total"
,
...
...
tests/metrics/test_metrics.py
View file @
ac04a97a
...
...
@@ -365,6 +365,7 @@ def assert_metrics(engine: LLMEngine, disable_log_stats: bool,
"vllm:request_prompt_tokens"
,
"vllm:request_generation_tokens"
,
"vllm:request_params_n"
,
"vllm:request_params_max_tokens"
,
]
for
metric_name
in
request_histogram_metrics
:
metric_value
=
REGISTRY
.
get_sample_value
(
f
"
{
metric_name
}
_count"
,
...
...
vllm/engine/llm_engine.py
View file @
ac04a97a
...
...
@@ -1685,6 +1685,7 @@ class LLMEngine:
num_prompt_tokens_requests
:
List
[
int
]
=
[]
num_generation_tokens_requests
:
List
[
int
]
=
[]
n_requests
:
List
[
int
]
=
[]
max_tokens_requests
:
List
[
int
]
=
[]
finished_reason_requests
:
List
[
str
]
=
[]
# Lora requests
...
...
@@ -1792,6 +1793,8 @@ class LLMEngine:
])
if
seq_group
.
sampling_params
is
not
None
:
n_requests
.
append
(
seq_group
.
sampling_params
.
n
)
max_tokens_requests
.
append
(
seq_group
.
sampling_params
.
max_tokens
)
finished_reason_requests
.
extend
([
SequenceStatus
.
get_finished_reason
(
seq
.
status
)
for
seq
in
seq_group
.
get_finished_seqs
()
...
...
@@ -1847,6 +1850,7 @@ class LLMEngine:
num_prompt_tokens_requests
=
num_prompt_tokens_requests
,
num_generation_tokens_requests
=
num_generation_tokens_requests
,
n_requests
=
n_requests
,
max_tokens_requests
=
max_tokens_requests
,
finished_reason_requests
=
finished_reason_requests
,
max_lora
=
str
(
max_lora_stat
),
waiting_lora_adapters
=
list
(
waiting_lora_adapters
.
keys
()),
...
...
vllm/engine/metrics.py
View file @
ac04a97a
...
...
@@ -179,6 +179,12 @@ class Metrics:
labelnames
=
labelnames
,
buckets
=
[
1
,
2
,
5
,
10
,
20
],
)
self
.
histogram_max_tokens_request
=
self
.
_histogram_cls
(
name
=
"vllm:request_params_max_tokens"
,
documentation
=
"Histogram of the max_tokens request parameter."
,
labelnames
=
labelnames
,
buckets
=
build_1_2_5_buckets
(
max_model_len
),
)
self
.
counter_request_success
=
self
.
_counter_cls
(
name
=
"vllm:request_success_total"
,
documentation
=
"Count of successfully processed requests."
,
...
...
@@ -547,6 +553,8 @@ class PrometheusStatLogger(StatLoggerBase):
self
.
metrics
.
histogram_num_generation_tokens_request
,
stats
.
num_generation_tokens_requests
)
self
.
_log_histogram
(
self
.
metrics
.
histogram_n_request
,
stats
.
n_requests
)
self
.
_log_histogram
(
self
.
metrics
.
histogram_max_tokens_request
,
stats
.
max_tokens_requests
)
def
_log_prometheus_interval
(
self
,
prompt_throughput
:
float
,
generation_throughput
:
float
)
->
None
:
...
...
vllm/engine/metrics_types.py
View file @
ac04a97a
...
...
@@ -53,6 +53,7 @@ class Stats:
num_prompt_tokens_requests
:
List
[
int
]
num_generation_tokens_requests
:
List
[
int
]
n_requests
:
List
[
int
]
max_tokens_requests
:
List
[
int
]
finished_reason_requests
:
List
[
str
]
waiting_lora_adapters
:
List
[
str
]
running_lora_adapters
:
List
[
str
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment