Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
8c129ed4
Unverified
Commit
8c129ed4
authored
Jan 13, 2026
by
jh-nv
Committed by
GitHub
Jan 13, 2026
Browse files
feat: Propagate OTEL tracing context for trtllm (#5377)
parent
869562da
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
145 additions
and
26 deletions
+145
-26
components/src/dynamo/common/utils/__init__.py
components/src/dynamo/common/utils/__init__.py
+3
-2
components/src/dynamo/common/utils/otel_tracing.py
components/src/dynamo/common/utils/otel_tracing.py
+24
-0
components/src/dynamo/trtllm/request_handlers/handler_base.py
...onents/src/dynamo/trtllm/request_handlers/handler_base.py
+5
-0
components/src/dynamo/vllm/handlers.py
components/src/dynamo/vllm/handlers.py
+4
-18
examples/backends/trtllm/launch/agg.sh
examples/backends/trtllm/launch/agg.sh
+34
-0
examples/backends/trtllm/launch/disagg.sh
examples/backends/trtllm/launch/disagg.sh
+38
-4
examples/backends/trtllm/launch/disagg_same_gpu.sh
examples/backends/trtllm/launch/disagg_same_gpu.sh
+37
-2
No files found.
components/src/dynamo/common/utils/__init__.py
View file @
8c129ed4
...
...
@@ -9,10 +9,11 @@ Dynamo backends and components.
Submodules:
- endpoint_types: Endpoint type parsing utilities
- otel_tracing: OpenTelemetry tracing header utilities
- paths: Workspace directory detection and path utilities
- prometheus: Prometheus metrics collection and logging utilities
"""
from
dynamo.common.utils
import
endpoint_types
,
paths
,
prometheus
from
dynamo.common.utils
import
endpoint_types
,
otel_tracing
,
paths
,
prometheus
__all__
=
[
"endpoint_types"
,
"paths"
,
"prometheus"
]
__all__
=
[
"endpoint_types"
,
"otel_tracing"
,
"paths"
,
"prometheus"
]
components/src/dynamo/common/utils/otel_tracing.py
0 → 100644
View file @
8c129ed4
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""
OpenTelemetry tracing header utilities for Dynamo components.
"""
from
dynamo._core
import
Context
def
build_trace_headers
(
context
:
Context
)
->
dict
[
str
,
str
]
|
None
:
"""
Build trace headers from context for propagation.
"""
trace_id
=
context
.
trace_id
span_id
=
context
.
span_id
if
not
trace_id
or
not
span_id
:
return
None
# W3C Trace Context format: {version}-{trace_id}-{parent_id}-{trace_flags}
# version: 00, trace_flags: 01 (sampled)
# TODO: properly propagate the trace-flags from current span.
return
{
"traceparent"
:
f
"00-
{
trace_id
}
-
{
span_id
}
-01"
}
components/src/dynamo/trtllm/request_handlers/handler_base.py
View file @
8c129ed4
...
...
@@ -29,6 +29,7 @@ from tensorrt_llm.llmapi import DisaggregatedParams as LlmDisaggregatedParams
from
tensorrt_llm.llmapi.llm
import
SamplingParams
from
dynamo._core
import
Context
from
dynamo.common.utils.otel_tracing
import
build_trace_headers
from
dynamo.logits_processing.examples
import
HelloWorldLogitsProcessor
from
dynamo.nixl_connect
import
Connector
from
dynamo.runtime
import
DistributedRuntime
...
...
@@ -366,6 +367,9 @@ class HandlerBase:
prefill_result
.
get
(
"prompt_tokens_details"
)
if
prefill_result
else
None
)
# Build trace headers for distributed tracing
trace_headers
=
build_trace_headers
(
context
)
try
:
# NEW: Updated engine call to include multimodal data
generation_result
=
self
.
engine
.
llm
.
generate_async
(
...
...
@@ -373,6 +377,7 @@ class HandlerBase:
sampling_params
=
sampling_params
,
disaggregated_params
=
disaggregated_params
,
streaming
=
streaming
,
trace_headers
=
trace_headers
,
)
# Use the context manager to handle cancellation monitoring
...
...
components/src/dynamo/vllm/handlers.py
View file @
8c129ed4
...
...
@@ -21,8 +21,8 @@ from vllm.outputs import RequestOutput
from
vllm.sampling_params
import
SamplingParams
,
StructuredOutputsParams
from
vllm.v1.engine.exceptions
import
EngineDeadError
from
dynamo._core
import
Context
from
dynamo.common.utils.input_params
import
InputParamManager
from
dynamo.common.utils.otel_tracing
import
build_trace_headers
from
dynamo.llm
import
(
ModelInput
,
ModelType
,
...
...
@@ -965,20 +965,6 @@ class BaseWorkerHandler(ABC):
return
log_probs
if
log_probs
else
None
,
top_logprobs
if
top_logprobs
else
None
def
_build_trace_headers
(
self
,
context
:
Context
)
->
dict
[
str
,
str
]
|
None
:
"""
Build trace headers from context for propagation to vLLM engine.
"""
trace_id
=
context
.
trace_id
span_id
=
context
.
span_id
if
not
trace_id
or
not
span_id
:
return
None
# W3C Trace Context format: {version}-{trace_id}-{parent_id}-{trace_flags}
# version: 00, trace_flags: 01 (sampled)
# TODO: properly propagate the trace-flags from current span.
return
{
"traceparent"
:
f
"00-
{
trace_id
}
-
{
span_id
}
-01"
}
@
staticmethod
def
_log_with_lora_context
(
message
:
str
,
...
...
@@ -1203,7 +1189,7 @@ class DecodeWorkerHandler(BaseWorkerHandler):
dp_rank
=
request
.
get
(
"dp_rank"
,
None
)
trace_headers
=
self
.
_
build_trace_headers
(
context
)
trace_headers
=
build_trace_headers
(
context
)
async
with
self
.
_abort_monitor
(
context
,
request_id
):
try
:
...
...
@@ -1249,7 +1235,7 @@ class DecodeWorkerHandler(BaseWorkerHandler):
openai_request_id
=
request
.
get
(
"id"
)
or
request
.
get
(
"request_id"
,
request_id
)
previous_text
=
""
trace_headers
=
self
.
_
build_trace_headers
(
context
)
trace_headers
=
build_trace_headers
(
context
)
async
with
self
.
_abort_monitor
(
context
,
request_id
):
try
:
...
...
@@ -1411,7 +1397,7 @@ class PrefillWorkerHandler(BaseWorkerHandler):
dp_rank
=
request
.
get
(
"dp_rank"
,
None
)
trace_headers
=
self
.
_
build_trace_headers
(
context
)
trace_headers
=
build_trace_headers
(
context
)
async
with
self
.
_abort_monitor
(
context
,
request_id
,
is_prefill
=
True
):
try
:
...
...
examples/backends/trtllm/launch/agg.sh
View file @
8c129ed4
...
...
@@ -20,17 +20,51 @@ cleanup() {
}
trap
cleanup EXIT INT TERM
ENABLE_OTEL
=
false
while
[[
$#
-gt
0
]]
;
do
case
$1
in
--enable-otel
)
ENABLE_OTEL
=
true
shift
;;
-h
|
--help
)
echo
"Usage:
$0
[OPTIONS]"
echo
"Options:"
echo
" --enable-otel Enable OpenTelemetry tracing"
echo
" -h, --help Show this help message"
echo
""
exit
0
;;
*
)
echo
"Unknown option:
$1
"
echo
"Use --help for usage information"
exit
1
;;
esac
done
TRACE_ARGS
=()
if
[
"
$ENABLE_OTEL
"
=
true
]
;
then
export
DYN_LOGGING_JSONL
=
true
export
OTEL_EXPORT_ENABLED
=
1
export
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
=
${
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
:-
http
://localhost:4317
}
TRACE_ARGS+
=(
--override-engine-args
"{
\"
return_perf_metrics
\"
: true,
\"
otlp_traces_endpoint
\"
:
\"
${
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
}
\"
}"
)
fi
# run frontend
# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
OTEL_SERVICE_NAME
=
dynamo-frontend
\
python3
-m
dynamo.frontend &
DYNAMO_PID
=
$!
# run worker
# Additional command line args can be passed
OTEL_SERVICE_NAME
=
dynamo-worker
\
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--modality
"
$MODALITY
"
\
--extra-engine-args
"
$AGG_ENGINE_ARGS
"
\
"
${
TRACE_ARGS
[@]
}
"
\
"
$@
"
examples/backends/trtllm/launch/disagg.sh
View file @
8c129ed4
...
...
@@ -23,25 +23,59 @@ cleanup() {
}
trap
cleanup EXIT INT TERM
ENABLE_OTEL
=
false
while
[[
$#
-gt
0
]]
;
do
case
$1
in
--enable-otel
)
ENABLE_OTEL
=
true
shift
;;
-h
|
--help
)
echo
"Usage:
$0
[OPTIONS]"
echo
"Options:"
echo
" --enable-otel Enable OpenTelemetry tracing"
echo
" -h, --help Show this help message"
echo
""
exit
0
;;
*
)
echo
"Unknown option:
$1
"
echo
"Use --help for usage information"
exit
1
;;
esac
done
# Enable tracing if requested
TRACE_ARGS
=()
if
[
"
$ENABLE_OTEL
"
=
true
]
;
then
export
DYN_LOGGING_JSONL
=
true
export
OTEL_EXPORT_ENABLED
=
1
export
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
=
${
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
:-
http
://localhost:4317
}
TRACE_ARGS+
=(
--override-engine-args
"{
\"
return_perf_metrics
\"
: true,
\"
otlp_traces_endpoint
\"
:
\"
${
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
}
\"
}"
)
fi
# run frontend
# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
OTEL_SERVICE_NAME
=
dynamo-frontend
\
python3
-m
dynamo.frontend &
DYNAMO_PID
=
$!
# run prefill worker
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
OTEL_SERVICE_NAME
=
dynamo-worker-prefill
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
--modality
"
$MODALITY
"
\
--disaggregation-mode
prefill &
--disaggregation-mode
prefill
\
"
${
TRACE_ARGS
[@]
}
"
&
PREFILL_PID
=
$!
# run decode worker
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
OTEL_SERVICE_NAME
=
dynamo-worker-decode
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
--modality
"
$MODALITY
"
\
--disaggregation-mode
decode
--disaggregation-mode
decode
\
"
${
TRACE_ARGS
[@]
}
"
examples/backends/trtllm/launch/disagg_same_gpu.sh
View file @
8c129ed4
...
...
@@ -46,13 +46,45 @@ cleanup() {
}
trap
cleanup EXIT INT TERM
ENABLE_OTEL
=
false
while
[[
$#
-gt
0
]]
;
do
case
$1
in
--enable-otel
)
ENABLE_OTEL
=
true
shift
;;
-h
|
--help
)
echo
"Usage:
$0
[OPTIONS]"
echo
"Options:"
echo
" --enable-otel Enable OpenTelemetry tracing"
echo
" -h, --help Show this help message"
echo
""
exit
0
;;
*
)
echo
"Unknown option:
$1
"
echo
"Use --help for usage information"
exit
1
;;
esac
done
# Enable tracing if requested
TRACE_ARGS
=()
if
[
"
$ENABLE_OTEL
"
=
true
]
;
then
export
DYN_LOGGING_JSONL
=
true
export
OTEL_EXPORT_ENABLED
=
1
export
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
=
${
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
:-
http
://localhost:4317
}
TRACE_ARGS+
=(
--override-engine-args
"{
\"
return_perf_metrics
\"
: true,
\"
otlp_traces_endpoint
\"
:
\"
${
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
}
\"
}"
)
fi
# run frontend
# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
OTEL_SERVICE_NAME
=
dynamo-frontend
\
python3
-m
dynamo.frontend &
DYNAMO_PID
=
$!
# run prefill worker (shares GPU with decode)
OTEL_SERVICE_NAME
=
dynamo-worker-prefill
\
CUDA_VISIBLE_DEVICES
=
$CUDA_VISIBLE_DEVICES
\
DYN_SYSTEM_PORT
=
${
DYN_SYSTEM_PORT1
:-
8081
}
\
python3
-m
dynamo.trtllm
\
...
...
@@ -61,10 +93,12 @@ python3 -m dynamo.trtllm \
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
--modality
"
$MODALITY
"
\
--publish-events-and-metrics
\
--disaggregation-mode
prefill &
--disaggregation-mode
prefill
\
"
${
TRACE_ARGS
[@]
}
"
&
PREFILL_PID
=
$!
# run decode worker (shares GPU with prefill)
OTEL_SERVICE_NAME
=
dynamo-worker-decode
\
CUDA_VISIBLE_DEVICES
=
$CUDA_VISIBLE_DEVICES
\
DYN_SYSTEM_PORT
=
${
DYN_SYSTEM_PORT2
:-
8082
}
\
python3
-m
dynamo.trtllm
\
...
...
@@ -73,5 +107,6 @@ python3 -m dynamo.trtllm \
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
--modality
"
$MODALITY
"
\
--publish-events-and-metrics
\
--disaggregation-mode
decode
--disaggregation-mode
decode
\
"
${
TRACE_ARGS
[@]
}
"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment