Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
17abc9de
Unverified
Commit
17abc9de
authored
Mar 18, 2026
by
Hongkuan Zhou
Committed by
GitHub
Mar 18, 2026
Browse files
chore: expose inc id and add version to forwardpassmetric (#7501)
Signed-off-by:
hongkuanz
<
hongkuanz@nvidia.com
>
parent
da2c5e76
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
67 additions
and
16 deletions
+67
-16
components/src/dynamo/common/forward_pass_metrics.py
components/src/dynamo/common/forward_pass_metrics.py
+39
-2
components/src/dynamo/common/recv_forward_pass_metrics.py
components/src/dynamo/common/recv_forward_pass_metrics.py
+22
-13
components/src/dynamo/vllm/instrumented_scheduler.py
components/src/dynamo/vllm/instrumented_scheduler.py
+6
-1
No files found.
components/src/dynamo/common/forward_pass_metrics.py
View file @
17abc9de
...
@@ -36,8 +36,14 @@ TODO: planner consuming these metrics instead of frontend/router metrics
...
@@ -36,8 +36,14 @@ TODO: planner consuming these metrics instead of frontend/router metrics
from
__future__
import
annotations
from
__future__
import
annotations
import
logging
import
msgspec
import
msgspec
logger
=
logging
.
getLogger
(
__name__
)
FPM_VERSION
:
int
=
1
class
WelfordAccumulator
:
class
WelfordAccumulator
:
"""Welford's online algorithm for count / sum / population-variance.
"""Welford's online algorithm for count / sum / population-variance.
...
@@ -156,12 +162,21 @@ class ForwardPassMetrics(
...
@@ -156,12 +162,21 @@ class ForwardPassMetrics(
engine transitions from active to idle.
engine transitions from active to idle.
"""
"""
# Schema version. Consumers must check this before interpreting
# the remaining fields. Bump when the schema changes incompatibly.
version
:
int
=
FPM_VERSION
# Unique worker identifier (Dynamo runtime connection_id).
# Unique worker identifier (Dynamo runtime connection_id).
worker_id
:
str
=
""
worker_id
:
str
=
""
# Data parallel rank. Each DP rank has its own scheduler and ZMQ port.
# Data parallel rank. Each DP rank has its own scheduler and ZMQ port.
dp_rank
:
int
=
0
dp_rank
:
int
=
0
# Monotonically increasing sequence number per (worker_id, dp_rank).
# Set by _FpmPublisherThread before encoding; 0 for messages that
# have not been stamped (e.g. unit-test fixtures).
counter_id
:
int
=
0
# Wall-clock time of this iteration: from schedule() to update_from_output().
# Wall-clock time of this iteration: from schedule() to update_from_output().
# Covers scheduling + model forward pass + output processing.
# Covers scheduling + model forward pass + output processing.
# 0.0 for idle heartbeat messages.
# 0.0 for idle heartbeat messages.
...
@@ -182,5 +197,27 @@ def encode(metrics: ForwardPassMetrics) -> bytes:
...
@@ -182,5 +197,27 @@ def encode(metrics: ForwardPassMetrics) -> bytes:
return
_encoder
.
encode
(
metrics
)
return
_encoder
.
encode
(
metrics
)
def
decode
(
data
:
bytes
)
->
ForwardPassMetrics
:
class
UnsupportedFpmVersionError
(
Exception
):
return
_decoder
.
decode
(
data
)
"""Raised when a ForwardPassMetrics message has an unrecognised version."""
def
decode
(
data
:
bytes
)
->
ForwardPassMetrics
|
None
:
"""Decode a ForwardPassMetrics message, returning None for unknown versions.
Returns None (and logs a warning) if the message cannot be decoded or
carries a version this code does not understand, so callers can simply
skip unsupported messages without crashing.
"""
try
:
metrics
=
_decoder
.
decode
(
data
)
except
Exception
:
logger
.
warning
(
"Failed to decode ForwardPassMetrics message, skipping"
)
return
None
if
metrics
.
version
!=
FPM_VERSION
:
logger
.
warning
(
"Unsupported ForwardPassMetrics version %d (expected %d), skipping"
,
metrics
.
version
,
FPM_VERSION
,
)
return
None
return
metrics
components/src/dynamo/common/recv_forward_pass_metrics.py
View file @
17abc9de
...
@@ -16,13 +16,18 @@ Usage:
...
@@ -16,13 +16,18 @@ Usage:
import
argparse
import
argparse
import
asyncio
import
asyncio
import
json
import
json
import
logging
import
os
import
os
import
sys
import
msgspec
import
msgspec
from
dynamo.common.forward_pass_metrics
import
decode
from
dynamo.common.forward_pass_metrics
import
decode
from
dynamo.llm
import
FpmEventSubscriber
from
dynamo.runtime
import
DistributedRuntime
from
dynamo.runtime
import
DistributedRuntime
from
dynamo.runtime.logging
import
configure_dynamo_logging
configure_dynamo_logging
()
logger
=
logging
.
getLogger
(
__name__
)
def
main
()
->
None
:
def
main
()
->
None
:
...
@@ -54,8 +59,6 @@ def main() -> None:
...
@@ -54,8 +59,6 @@ def main() -> None:
async
def
run
(
args
:
argparse
.
Namespace
)
->
None
:
async
def
run
(
args
:
argparse
.
Namespace
)
->
None
:
from
dynamo.llm
import
FpmEventSubscriber
loop
=
asyncio
.
get_running_loop
()
loop
=
asyncio
.
get_running_loop
()
event_plane
=
os
.
environ
.
get
(
"DYN_EVENT_PLANE"
,
"nats"
)
event_plane
=
os
.
environ
.
get
(
"DYN_EVENT_PLANE"
,
"nats"
)
enable_nats
=
args
.
request_plane
==
"nats"
or
event_plane
==
"nats"
enable_nats
=
args
.
request_plane
==
"nats"
or
event_plane
==
"nats"
...
@@ -67,26 +70,32 @@ async def run(args: argparse.Namespace) -> None:
...
@@ -67,26 +70,32 @@ async def run(args: argparse.Namespace) -> None:
subscriber
=
FpmEventSubscriber
(
endpoint
)
subscriber
=
FpmEventSubscriber
(
endpoint
)
json_encoder
=
msgspec
.
json
.
Encoder
()
json_encoder
=
msgspec
.
json
.
Encoder
()
print
(
logger
.
info
(
f
"Subscribed to forward-pass-metrics via event plane "
"Subscribed to forward-pass-metrics via event plane "
f
"(namespace=
{
args
.
namespace
}
, component=
{
args
.
component
}
) "
"(namespace=
%s, component=%s) Ctrl+C to stop"
,
f
"Ctrl+C to stop"
,
args
.
namespace
,
file
=
sys
.
stderr
,
args
.
component
,
)
)
seq
=
0
try
:
try
:
while
True
:
while
True
:
data
=
await
asyncio
.
to_thread
(
subscriber
.
recv
)
data
=
await
asyncio
.
to_thread
(
subscriber
.
recv
)
if
data
is
None
:
if
data
is
None
:
print
(
"Stream closed."
,
file
=
sys
.
stderr
)
logger
.
info
(
"Stream closed."
)
break
break
metrics
=
decode
(
data
)
metrics
=
decode
(
data
)
if
metrics
is
None
:
continue
pretty
=
json
.
loads
(
json_encoder
.
encode
(
metrics
))
pretty
=
json
.
loads
(
json_encoder
.
encode
(
metrics
))
print
(
f
"[seq=
{
seq
}
]
{
json
.
dumps
(
pretty
,
indent
=
2
)
}
"
,
flush
=
True
)
logger
.
info
(
seq
+=
1
"[worker=%s dp=%d counter=%d] %s"
,
metrics
.
worker_id
,
metrics
.
dp_rank
,
metrics
.
counter_id
,
json
.
dumps
(
pretty
,
indent
=
2
),
)
except
KeyboardInterrupt
:
except
KeyboardInterrupt
:
print
(
"
\n
Stopped."
,
file
=
sys
.
stderr
)
logger
.
info
(
"Stopped."
)
finally
:
finally
:
subscriber
.
shutdown
()
subscriber
.
shutdown
()
...
...
components/src/dynamo/vllm/instrumented_scheduler.py
View file @
17abc9de
...
@@ -25,6 +25,7 @@ import time
...
@@ -25,6 +25,7 @@ import time
from
itertools
import
count
from
itertools
import
count
from
typing
import
TYPE_CHECKING
from
typing
import
TYPE_CHECKING
import
msgspec.structs
import
zmq
import
zmq
from
vllm.v1.core.sched.output
import
SchedulerOutput
from
vllm.v1.core.sched.output
import
SchedulerOutput
from
vllm.v1.core.sched.scheduler
import
Scheduler
from
vllm.v1.core.sched.scheduler
import
Scheduler
...
@@ -37,6 +38,7 @@ from dynamo.common.forward_pass_metrics import (
...
@@ -37,6 +38,7 @@ from dynamo.common.forward_pass_metrics import (
WelfordAccumulator
,
WelfordAccumulator
,
encode
,
encode
,
)
)
from
dynamo.runtime.logging
import
configure_dynamo_logging
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
...
@@ -45,6 +47,7 @@ if TYPE_CHECKING:
...
@@ -45,6 +47,7 @@ if TYPE_CHECKING:
from
vllm.v1.outputs
import
ModelRunnerOutput
from
vllm.v1.outputs
import
ModelRunnerOutput
from
vllm.v1.structured_output
import
StructuredOutputManager
from
vllm.v1.structured_output
import
StructuredOutputManager
configure_dynamo_logging
()
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
DEFAULT_FPM_PORT
=
20380
DEFAULT_FPM_PORT
=
20380
...
@@ -128,8 +131,10 @@ class _FpmPublisherThread:
...
@@ -128,8 +131,10 @@ class _FpmPublisherThread:
continue
continue
try
:
try
:
seq
=
next
(
self
.
_seq
)
metrics
=
msgspec
.
structs
.
replace
(
metrics
,
counter_id
=
seq
)
payload
=
encode
(
metrics
)
payload
=
encode
(
metrics
)
seq_bytes
=
next
(
self
.
_
seq
)
.
to_bytes
(
8
,
"big"
)
seq_bytes
=
seq
.
to_bytes
(
8
,
"big"
)
self
.
_pub
.
send_multipart
((
topic
,
seq_bytes
,
payload
),
flags
=
zmq
.
NOBLOCK
)
self
.
_pub
.
send_multipart
((
topic
,
seq_bytes
,
payload
),
flags
=
zmq
.
NOBLOCK
)
last_publish
=
time
.
monotonic
()
last_publish
=
time
.
monotonic
()
except
zmq
.
Again
:
except
zmq
.
Again
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment