Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a03cf9bc
Unverified
Commit
a03cf9bc
authored
Oct 20, 2025
by
Nick Hill
Committed by
GitHub
Oct 21, 2025
Browse files
[V0 Deprecation] Remove V0 metrics code (#27215)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
352c0c8a
Changes
2
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
0 additions
and
772 deletions
+0
-772
vllm/engine/metrics.py
vllm/engine/metrics.py
+0
-688
vllm/engine/metrics_types.py
vllm/engine/metrics_types.py
+0
-84
No files found.
vllm/engine/metrics.py
deleted
100644 → 0
View file @
352c0c8a
This diff is collapsed.
Click to expand it.
vllm/engine/metrics_types.py
deleted
100644 → 0
View file @
352c0c8a
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
These types are defined in this file to avoid importing vllm.engine.metrics
and therefore importing prometheus_client.
This is required due to usage of Prometheus multiprocess mode to enable
metrics after splitting out the uvicorn process from the engine process.
Prometheus multiprocess mode requires setting PROMETHEUS_MULTIPROC_DIR
before prometheus_client is imported. Typically, this is done by setting
the env variable before launch, but since we are a library, we need to
do this in Python code and lazily import prometheus_client.
"""
import
time
from
abc
import
ABC
,
abstractmethod
from
dataclasses
import
dataclass
from
vllm.config
import
SupportsMetricsInfo
,
VllmConfig
@
dataclass
class
Stats
:
"""Created by LLMEngine for use by StatLogger."""
now
:
float
# System stats (should have _sys suffix)
# Scheduler State
num_running_sys
:
int
num_waiting_sys
:
int
num_swapped_sys
:
int
# KV Cache Usage in %
gpu_cache_usage_sys
:
float
cpu_cache_usage_sys
:
float
# Prefix caching block hit rate
cpu_prefix_cache_hit_rate
:
float
gpu_prefix_cache_hit_rate
:
float
# Iteration stats (should have _iter suffix)
num_prompt_tokens_iter
:
int
num_generation_tokens_iter
:
int
num_tokens_iter
:
int
time_to_first_tokens_iter
:
list
[
float
]
inter_token_latencies_iter
:
list
[
float
]
num_preemption_iter
:
int
# Request stats (should have _requests suffix)
# Latency
time_e2e_requests
:
list
[
float
]
time_queue_requests
:
list
[
float
]
time_inference_requests
:
list
[
float
]
time_prefill_requests
:
list
[
float
]
time_decode_requests
:
list
[
float
]
# Metadata
num_prompt_tokens_requests
:
list
[
int
]
num_generation_tokens_requests
:
list
[
int
]
n_requests
:
list
[
int
]
max_num_generation_tokens_requests
:
list
[
int
]
max_tokens_requests
:
list
[
int
]
finished_reason_requests
:
list
[
str
]
waiting_lora_adapters
:
list
[
str
]
running_lora_adapters
:
list
[
str
]
max_lora
:
str
class
StatLoggerBase
(
ABC
):
"""Base class for StatLogger."""
def
__init__
(
self
,
local_interval
:
float
,
vllm_config
:
VllmConfig
)
->
None
:
# Tracked stats over current local logging interval.
self
.
num_prompt_tokens
:
list
[
int
]
=
[]
self
.
num_generation_tokens
:
list
[
int
]
=
[]
self
.
last_local_log
=
time
.
time
()
self
.
local_interval
=
local_interval
@
abstractmethod
def
log
(
self
,
stats
:
Stats
)
->
None
:
raise
NotImplementedError
@
abstractmethod
def
info
(
self
,
type
:
str
,
obj
:
SupportsMetricsInfo
)
->
None
:
raise
NotImplementedError
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment