Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
34ce777d
"...ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "b19de4ed77c11a550e037582b92f91ca19eebde1"
Unverified
Commit
34ce777d
authored
Apr 14, 2026
by
Jie Hao
Committed by
GitHub
Apr 14, 2026
Browse files
feat: propagate otel tracing to trtllm E/P/D workers (#7592)
parent
b78ec99a
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
22 additions
and
7 deletions
+22
-7
components/src/dynamo/trtllm/multimodal/embedding_fetcher.py
components/src/dynamo/trtllm/multimodal/embedding_fetcher.py
+13
-3
components/src/dynamo/trtllm/request_handlers/aggregated_handler.py
.../src/dynamo/trtllm/request_handlers/aggregated_handler.py
+1
-0
components/src/dynamo/trtllm/request_handlers/handlers.py
components/src/dynamo/trtllm/request_handlers/handlers.py
+7
-3
components/src/dynamo/trtllm/tests/multimodal/test_trtllm_embedding_fetcher.py
.../trtllm/tests/multimodal/test_trtllm_embedding_fetcher.py
+1
-1
No files found.
components/src/dynamo/trtllm/multimodal/embedding_fetcher.py
View file @
34ce777d
...
@@ -29,6 +29,7 @@ async def fetch_embeddings_from_encoder(
...
@@ -29,6 +29,7 @@ async def fetch_embeddings_from_encoder(
request
:
Dict
[
str
,
Any
],
request
:
Dict
[
str
,
Any
],
encode_client
:
Any
,
encode_client
:
Any
,
encoder_cache
:
Optional
[
MultimodalEmbeddingCacheManager
]
=
None
,
encoder_cache
:
Optional
[
MultimodalEmbeddingCacheManager
]
=
None
,
trace_context
=
None
,
)
->
Union
[
List
[
torch
.
Tensor
],
DisaggregatedParams
]:
)
->
Union
[
List
[
torch
.
Tensor
],
DisaggregatedParams
]:
"""
"""
Fetch embeddings from remote encode worker.
Fetch embeddings from remote encode worker.
...
@@ -38,6 +39,7 @@ async def fetch_embeddings_from_encoder(
...
@@ -38,6 +39,7 @@ async def fetch_embeddings_from_encoder(
request: Request dict (used for creating modified requests for caching)
request: Request dict (used for creating modified requests for caching)
encode_client: Client to call remote encode worker
encode_client: Client to call remote encode worker
encoder_cache: Optional cache for embeddings
encoder_cache: Optional cache for embeddings
trace_context: Optional Dynamo context for OTel trace propagation
Returns:
Returns:
- List[torch.Tensor]: When using cache (CPU tensors from cache)
- List[torch.Tensor]: When using cache (CPU tensors from cache)
...
@@ -56,13 +58,19 @@ async def fetch_embeddings_from_encoder(
...
@@ -56,13 +58,19 @@ async def fetch_embeddings_from_encoder(
request
,
request
,
encoder_cache
,
encoder_cache
,
lambda
req
:
_remote_encode_full_epd
(
lambda
req
:
_remote_encode_full_epd
(
req
,
encode_client
,
update_request_for_decode
=
False
req
,
encode_client
,
update_request_for_decode
=
False
,
trace_context
=
trace_context
,
),
),
)
)
else
:
else
:
# No cache: return DisaggregatedParams directly (no GPU→CPU extraction)
# No cache: return DisaggregatedParams directly (no GPU→CPU extraction)
return
await
_remote_encode_full_epd
(
return
await
_remote_encode_full_epd
(
request
,
encode_client
,
update_request_for_decode
=
True
request
,
encode_client
,
update_request_for_decode
=
True
,
trace_context
=
trace_context
,
)
)
...
@@ -70,6 +78,7 @@ async def _remote_encode_full_epd(
...
@@ -70,6 +78,7 @@ async def _remote_encode_full_epd(
request
:
Dict
[
str
,
Any
],
request
:
Dict
[
str
,
Any
],
encode_client
:
Any
,
encode_client
:
Any
,
update_request_for_decode
:
bool
=
True
,
update_request_for_decode
:
bool
=
True
,
trace_context
=
None
,
)
->
DisaggregatedParams
:
)
->
DisaggregatedParams
:
"""
"""
Call encode worker for full EPD flow.
Call encode worker for full EPD flow.
...
@@ -78,6 +87,7 @@ async def _remote_encode_full_epd(
...
@@ -78,6 +87,7 @@ async def _remote_encode_full_epd(
request: Request dict
request: Request dict
encode_client: Client to call remote encode worker
encode_client: Client to call remote encode worker
update_request_for_decode: If True, store EPD metadata in request
update_request_for_decode: If True, store EPD metadata in request
trace_context: Optional Dynamo context for OTel trace propagation
Returns:
Returns:
DisaggregatedParams with multimodal_embedding_handles
DisaggregatedParams with multimodal_embedding_handles
...
@@ -86,7 +96,7 @@ async def _remote_encode_full_epd(
...
@@ -86,7 +96,7 @@ async def _remote_encode_full_epd(
RuntimeError: If encode worker returns invalid response
RuntimeError: If encode worker returns invalid response
"""
"""
encode_response
=
None
encode_response
=
None
async
for
res
in
await
encode_client
.
round_robin
(
request
):
async
for
res
in
await
encode_client
.
round_robin
(
request
,
context
=
trace_context
):
encode_response
=
res
.
data
()
encode_response
=
res
.
data
()
break
break
...
...
components/src/dynamo/trtllm/request_handlers/aggregated_handler.py
View file @
34ce777d
...
@@ -57,6 +57,7 @@ class AggregatedHandler(HandlerBase):
...
@@ -57,6 +57,7 @@ class AggregatedHandler(HandlerBase):
request
,
request
,
self
.
encode_client
,
self
.
encode_client
,
self
.
_encoder_cache
,
self
.
_encoder_cache
,
trace_context
=
context
,
)
)
if
isinstance
(
result
,
list
):
if
isinstance
(
result
,
list
):
embeddings
=
result
# type: ignore[assignment]
embeddings
=
result
# type: ignore[assignment]
...
...
components/src/dynamo/trtllm/request_handlers/handlers.py
View file @
34ce777d
...
@@ -100,12 +100,13 @@ class PrefillHandler(HandlerBase):
...
@@ -100,12 +100,13 @@ class PrefillHandler(HandlerBase):
super
().
__init__
(
config
)
super
().
__init__
(
config
)
self
.
_encoder_cache
=
encoder_cache
self
.
_encoder_cache
=
encoder_cache
async
def
remote_encode_with_nixl
(
self
,
request
:
dict
):
async
def
remote_encode_with_nixl
(
self
,
request
:
dict
,
context
=
None
):
"""
"""
Call encode worker for NIXL flow to load embeddings and unpack the response.
Call encode worker for NIXL flow to load embeddings and unpack the response.
Args:
Args:
request: Request dict
request: Request dict
context: Optional Dynamo context for trace propagation
Returns:
Returns:
Encoder's embeddings tensor to be used by the prefill worker
Encoder's embeddings tensor to be used by the prefill worker
...
@@ -114,7 +115,7 @@ class PrefillHandler(HandlerBase):
...
@@ -114,7 +115,7 @@ class PrefillHandler(HandlerBase):
if
self
.
encode_client
is
None
:
if
self
.
encode_client
is
None
:
raise
RuntimeError
(
"Encode client is not configured."
)
raise
RuntimeError
(
"Encode client is not configured."
)
encode_response
=
None
encode_response
=
None
async
for
res
in
await
self
.
encode_client
.
round_robin
(
request
):
async
for
res
in
await
self
.
encode_client
.
round_robin
(
request
,
context
=
context
):
encode_response
=
res
.
data
()
encode_response
=
res
.
data
()
break
break
...
@@ -154,7 +155,9 @@ class PrefillHandler(HandlerBase):
...
@@ -154,7 +155,9 @@ class PrefillHandler(HandlerBase):
if
embedding_paths
:
if
embedding_paths
:
if
self
.
encode_client
and
self
.
connector
:
if
self
.
encode_client
and
self
.
connector
:
logging
.
info
(
f
"PrefillHandler: embedding_paths=
{
embedding_paths
}
"
)
logging
.
info
(
f
"PrefillHandler: embedding_paths=
{
embedding_paths
}
"
)
embeddings_tensor
=
await
self
.
remote_encode_with_nixl
(
request
)
embeddings_tensor
=
await
self
.
remote_encode_with_nixl
(
request
,
context
=
context
)
else
:
else
:
# We can still handle embedding_paths without NIXL:
# We can still handle embedding_paths without NIXL:
# `MultimodalRequestProcessor.process_openai_request` will load the embeddings
# `MultimodalRequestProcessor.process_openai_request` will load the embeddings
...
@@ -172,6 +175,7 @@ class PrefillHandler(HandlerBase):
...
@@ -172,6 +175,7 @@ class PrefillHandler(HandlerBase):
request
,
request
,
self
.
encode_client
,
self
.
encode_client
,
self
.
_encoder_cache
,
self
.
_encoder_cache
,
trace_context
=
context
,
)
)
if
isinstance
(
result
,
list
):
if
isinstance
(
result
,
list
):
# Cache path: got List[torch.Tensor]
# Cache path: got List[torch.Tensor]
...
...
components/src/dynamo/trtllm/tests/multimodal/test_trtllm_embedding_fetcher.py
View file @
34ce777d
...
@@ -51,7 +51,7 @@ def create_mock_encode_client(
...
@@ -51,7 +51,7 @@ def create_mock_encode_client(
"prompt_token_ids"
:
prompt_token_ids
or
[
1
,
2
,
3
],
"prompt_token_ids"
:
prompt_token_ids
or
[
1
,
2
,
3
],
}
}
async
def
mock_round_robin
(
req
:
dict
[
str
,
Any
])
->
Any
:
async
def
mock_round_robin
(
req
:
dict
[
str
,
Any
]
,
context
=
None
)
->
Any
:
async
def
gen
():
async
def
gen
():
yield
MockResponse
()
yield
MockResponse
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment