Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
39c8d125
Unverified
Commit
39c8d125
authored
Jul 09, 2025
by
jain-ria
Committed by
GitHub
Jul 09, 2025
Browse files
fix: metrics publishing in vllm v0 worker (#1845)
parent
0f2fa928
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
26 additions
and
9 deletions
+26
-9
examples/vllm_v0/components/worker.py
examples/vllm_v0/components/worker.py
+26
-9
No files found.
examples/vllm_v0/components/worker.py
View file @
39c8d125
...
@@ -34,7 +34,14 @@ from vllm.inputs import TokensPrompt
...
@@ -34,7 +34,14 @@ from vllm.inputs import TokensPrompt
from
vllm.remote_prefill
import
RemotePrefillParams
,
RemotePrefillRequest
from
vllm.remote_prefill
import
RemotePrefillParams
,
RemotePrefillRequest
from
vllm.sampling_params
import
RequestOutputKind
from
vllm.sampling_params
import
RequestOutputKind
from
dynamo.llm
import
ModelType
,
WorkerMetricsPublisher
,
register_llm
from
dynamo.llm
import
(
ForwardPassMetrics
,
KvStats
,
ModelType
,
WorkerMetricsPublisher
,
WorkerStats
,
register_llm
,
)
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -122,15 +129,25 @@ class VllmWorker:
...
@@ -122,15 +129,25 @@ class VllmWorker:
self
.
engine_client
.
set_metrics_publisher
(
self
.
metrics_publisher
)
self
.
engine_client
.
set_metrics_publisher
(
self
.
metrics_publisher
)
# Initially send dummy metrics to kick start,
# Initially send dummy metrics to kick start,
# vLLM will not update stat until forward pass is triggered
# vLLM will not update stat until forward pass is triggered
self
.
metrics_publisher
.
publish
(
worker_stats
=
WorkerStats
(
0
,
# request_active_slots
request_active_slots
=
0
,
1024
,
# request_total_slots
request_total_slots
=
1024
,
0
,
# kv_active_blocks
num_requests_waiting
=
0
,
1024
,
# kv_total_blocks
data_parallel_rank
=
None
,
0
,
# num_requests_waiting
)
0.0
,
# gpu_cache_usage_perc
kv_stats
=
KvStats
(
0.0
,
# gpu_prefix_cache_hit_rate
kv_active_blocks
=
0
,
kv_total_blocks
=
1024
,
gpu_cache_usage_perc
=
0.0
,
gpu_prefix_cache_hit_rate
=
0.0
,
)
spec_dec_stats
=
None
metrics
=
ForwardPassMetrics
(
worker_stats
=
worker_stats
,
kv_stats
=
kv_stats
,
spec_decode_stats
=
spec_dec_stats
,
)
)
self
.
metrics_publisher
.
publish
(
metrics
)
task
=
asyncio
.
create_task
(
self
.
create_metrics_publisher_endpoint
())
task
=
asyncio
.
create_task
(
self
.
create_metrics_publisher_endpoint
())
task
.
add_done_callback
(
task
.
add_done_callback
(
lambda
_
:
logger
.
info
(
"metrics publisher endpoint created"
)
lambda
_
:
logger
.
info
(
"metrics publisher endpoint created"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment