Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1cd981da
Unverified
Commit
1cd981da
authored
Feb 22, 2025
by
Mark McLoughlin
Committed by
GitHub
Feb 22, 2025
Browse files
[V1][Metrics] Support `vllm:cache_config_info` (#13299)
parent
fca20841
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
32 additions
and
12 deletions
+32
-12
tests/entrypoints/openai/test_metrics.py
tests/entrypoints/openai/test_metrics.py
+1
-0
vllm/config.py
vllm/config.py
+6
-0
vllm/engine/metrics.py
vllm/engine/metrics.py
+2
-3
vllm/engine/metrics_types.py
vllm/engine/metrics_types.py
+2
-8
vllm/v1/metrics/loggers.py
vllm/v1/metrics/loggers.py
+21
-1
No files found.
tests/entrypoints/openai/test_metrics.py
View file @
1cd981da
...
@@ -230,6 +230,7 @@ EXPECTED_METRICS_V1 = [
...
@@ -230,6 +230,7 @@ EXPECTED_METRICS_V1 = [
"vllm:prompt_tokens_total"
,
"vllm:prompt_tokens_total"
,
"vllm:generation_tokens_total"
,
"vllm:generation_tokens_total"
,
"vllm:iteration_tokens_total"
,
"vllm:iteration_tokens_total"
,
"vllm:cache_config_info"
,
"vllm:request_success_total"
,
"vllm:request_success_total"
,
"vllm:request_prompt_tokens_sum"
,
"vllm:request_prompt_tokens_sum"
,
"vllm:request_prompt_tokens_bucket"
,
"vllm:request_prompt_tokens_bucket"
,
...
...
vllm/config.py
View file @
1cd981da
...
@@ -88,6 +88,12 @@ class SupportsHash(Protocol):
...
@@ -88,6 +88,12 @@ class SupportsHash(Protocol):
...
...
class
SupportsMetricsInfo
(
Protocol
):
def
metrics_info
(
self
)
->
Dict
[
str
,
str
]:
...
class
ModelImpl
(
str
,
enum
.
Enum
):
class
ModelImpl
(
str
,
enum
.
Enum
):
AUTO
=
"auto"
AUTO
=
"auto"
VLLM
=
"vllm"
VLLM
=
"vllm"
...
...
vllm/engine/metrics.py
View file @
1cd981da
...
@@ -8,9 +8,8 @@ from typing import Dict, List, Optional, Type, Union, cast
...
@@ -8,9 +8,8 @@ from typing import Dict, List, Optional, Type, Union, cast
import
numpy
as
np
import
numpy
as
np
import
prometheus_client
import
prometheus_client
from
vllm.config
import
VllmConfig
from
vllm.config
import
SupportsMetricsInfo
,
VllmConfig
from
vllm.engine.metrics_types
import
(
StatLoggerBase
,
Stats
,
from
vllm.engine.metrics_types
import
StatLoggerBase
,
Stats
SupportsMetricsInfo
)
from
vllm.executor.ray_utils
import
ray
from
vllm.executor.ray_utils
import
ray
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
...
...
vllm/engine/metrics_types.py
View file @
1cd981da
...
@@ -15,9 +15,9 @@ do this in Python code and lazily import prometheus_client.
...
@@ -15,9 +15,9 @@ do this in Python code and lazily import prometheus_client.
import
time
import
time
from
abc
import
ABC
,
abstractmethod
from
abc
import
ABC
,
abstractmethod
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
typing
import
Dict
,
List
,
Optional
,
Protocol
from
typing
import
List
,
Optional
from
vllm.config
import
VllmConfig
from
vllm.config
import
SupportsMetricsInfo
,
VllmConfig
from
vllm.spec_decode.metrics
import
SpecDecodeWorkerMetrics
from
vllm.spec_decode.metrics
import
SpecDecodeWorkerMetrics
...
@@ -70,12 +70,6 @@ class Stats:
...
@@ -70,12 +70,6 @@ class Stats:
spec_decode_metrics
:
Optional
[
"SpecDecodeWorkerMetrics"
]
=
None
spec_decode_metrics
:
Optional
[
"SpecDecodeWorkerMetrics"
]
=
None
class
SupportsMetricsInfo
(
Protocol
):
def
metrics_info
(
self
)
->
Dict
[
str
,
str
]:
...
class
StatLoggerBase
(
ABC
):
class
StatLoggerBase
(
ABC
):
"""Base class for StatLogger."""
"""Base class for StatLogger."""
...
...
vllm/v1/metrics/loggers.py
View file @
1cd981da
...
@@ -7,7 +7,7 @@ from typing import Dict, List
...
@@ -7,7 +7,7 @@ from typing import Dict, List
import
numpy
as
np
import
numpy
as
np
import
prometheus_client
import
prometheus_client
from
vllm.config
import
VllmConfig
from
vllm.config
import
SupportsMetricsInfo
,
VllmConfig
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.v1.core.kv_cache_utils
import
PrefixCachingMetrics
from
vllm.v1.core.kv_cache_utils
import
PrefixCachingMetrics
from
vllm.v1.engine
import
FinishReason
from
vllm.v1.engine
import
FinishReason
...
@@ -228,6 +228,26 @@ class PrometheusStatLogger(StatLoggerBase):
...
@@ -228,6 +228,26 @@ class PrometheusStatLogger(StatLoggerBase):
buckets
=
request_latency_buckets
,
buckets
=
request_latency_buckets
,
labelnames
=
labelnames
).
labels
(
*
labelvalues
)
labelnames
=
labelnames
).
labels
(
*
labelvalues
)
self
.
log_metrics_info
(
"cache_config"
,
vllm_config
.
cache_config
)
def
log_metrics_info
(
self
,
type
:
str
,
config_obj
:
SupportsMetricsInfo
):
metrics_info
=
config_obj
.
metrics_info
()
name
,
documentation
=
None
,
None
if
type
==
"cache_config"
:
name
=
"vllm:cache_config_info"
documentation
=
"Information of the LLMEngine CacheConfig"
assert
name
is
not
None
,
f
"Unknown metrics info type
{
type
}
"
# Info type metrics are syntactic sugar for a gauge permanently set to 1
# Since prometheus multiprocessing mode does not support Info, emulate
# info here with a gauge.
info_gauge
=
prometheus_client
.
Gauge
(
name
=
name
,
documentation
=
documentation
,
labelnames
=
metrics_info
.
keys
()).
labels
(
**
metrics_info
)
info_gauge
.
set
(
1
)
def
log
(
self
,
scheduler_stats
:
SchedulerStats
,
def
log
(
self
,
scheduler_stats
:
SchedulerStats
,
iteration_stats
:
IterationStats
):
iteration_stats
:
IterationStats
):
"""Log to prometheus."""
"""Log to prometheus."""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment