Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
96fe63fe
Unverified
Commit
96fe63fe
authored
Dec 09, 2025
by
J Wyman
Committed by
GitHub
Dec 09, 2025
Browse files
feat: nixl_connect: Improve Concurrency Support (#4433)
Signed-off-by:
J Wyman
<
jwyman@nvidia.com
>
parent
0ce7280d
Changes
16
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
294 additions
and
156 deletions
+294
-156
components/src/dynamo/sglang/request_handlers/multimodal/encode_worker_handler.py
...lang/request_handlers/multimodal/encode_worker_handler.py
+1
-2
components/src/dynamo/sglang/request_handlers/multimodal/worker_handler.py
...namo/sglang/request_handlers/multimodal/worker_handler.py
+0
-2
components/src/dynamo/trtllm/encode_helper.py
components/src/dynamo/trtllm/encode_helper.py
+1
-1
components/src/dynamo/trtllm/main.py
components/src/dynamo/trtllm/main.py
+0
-1
components/src/dynamo/vllm/multimodal_handlers/encode_worker_handler.py
.../dynamo/vllm/multimodal_handlers/encode_worker_handler.py
+1
-2
components/src/dynamo/vllm/multimodal_handlers/worker_handler.py
...nts/src/dynamo/vllm/multimodal_handlers/worker_handler.py
+0
-2
docs/api/nixl_connect/connector.md
docs/api/nixl_connect/connector.md
+11
-19
docs/api/nixl_connect/read_operation.md
docs/api/nixl_connect/read_operation.md
+1
-1
docs/api/nixl_connect/readable_operation.md
docs/api/nixl_connect/readable_operation.md
+1
-1
docs/api/nixl_connect/writable_operation.md
docs/api/nixl_connect/writable_operation.md
+1
-1
docs/api/nixl_connect/write_operation.md
docs/api/nixl_connect/write_operation.md
+1
-1
docs/backends/sglang/multimodal_sglang_guide.md
docs/backends/sglang/multimodal_sglang_guide.md
+1
-1
examples/multimodal/components/audio_encode_worker.py
examples/multimodal/components/audio_encode_worker.py
+1
-1
examples/multimodal/components/encode_worker.py
examples/multimodal/components/encode_worker.py
+1
-2
examples/multimodal/components/video_encode_worker.py
examples/multimodal/components/video_encode_worker.py
+1
-2
lib/bindings/python/src/dynamo/nixl_connect/__init__.py
lib/bindings/python/src/dynamo/nixl_connect/__init__.py
+272
-117
No files found.
components/src/dynamo/sglang/request_handlers/multimodal/encode_worker_handler.py
View file @
96fe63fe
...
@@ -159,7 +159,7 @@ class MultimodalEncodeWorkerHandler(BaseWorkerHandler):
...
@@ -159,7 +159,7 @@ class MultimodalEncodeWorkerHandler(BaseWorkerHandler):
# Create descriptor for the multimodal data
# Create descriptor for the multimodal data
descriptor
=
connect
.
Descriptor
(
precomputed_embeddings
)
descriptor
=
connect
.
Descriptor
(
precomputed_embeddings
)
with
self
.
_connector
.
create_readable
(
descriptor
)
as
readable
:
with
await
self
.
_connector
.
create_readable
(
descriptor
)
as
readable
:
request
.
serialized_request
=
readable
.
metadata
()
request
.
serialized_request
=
readable
.
metadata
()
logger
.
debug
(
f
"Request:
{
request
.
model_dump_json
()
}
"
)
logger
.
debug
(
f
"Request:
{
request
.
model_dump_json
()
}
"
)
...
@@ -184,6 +184,5 @@ class MultimodalEncodeWorkerHandler(BaseWorkerHandler):
...
@@ -184,6 +184,5 @@ class MultimodalEncodeWorkerHandler(BaseWorkerHandler):
# Create and initialize a dynamo connector for this worker.
# Create and initialize a dynamo connector for this worker.
# We'll needs this to move data between this worker and remote workers efficiently.
# We'll needs this to move data between this worker and remote workers efficiently.
self
.
_connector
=
connect
.
Connector
()
self
.
_connector
=
connect
.
Connector
()
await
self
.
_connector
.
initialize
()
logger
.
info
(
"Startup completed."
)
logger
.
info
(
"Startup completed."
)
components/src/dynamo/sglang/request_handlers/multimodal/worker_handler.py
View file @
96fe63fe
...
@@ -77,7 +77,6 @@ class EmbeddingsProcessor:
...
@@ -77,7 +77,6 @@ class EmbeddingsProcessor:
async
def
initialize
(
self
):
async
def
initialize
(
self
):
"""Initialize the connector for embeddings processing"""
"""Initialize the connector for embeddings processing"""
self
.
_connector
=
connect
.
Connector
()
self
.
_connector
=
connect
.
Connector
()
await
self
.
_connector
.
initialize
()
async
def
process_embeddings
(
self
,
request
:
SglangMultimodalRequest
):
async
def
process_embeddings
(
self
,
request
:
SglangMultimodalRequest
):
"""Process embeddings from serialized request"""
"""Process embeddings from serialized request"""
...
@@ -103,7 +102,6 @@ class EmbeddingsProcessor:
...
@@ -103,7 +102,6 @@ class EmbeddingsProcessor:
"Connector is None - this should not happen after initialization"
"Connector is None - this should not happen after initialization"
)
)
self
.
_connector
=
connect
.
Connector
()
self
.
_connector
=
connect
.
Connector
()
await
self
.
_connector
.
initialize
()
read_op
=
await
self
.
_connector
.
begin_read
(
read_op
=
await
self
.
_connector
.
begin_read
(
request
.
serialized_request
,
descriptor
request
.
serialized_request
,
descriptor
...
...
components/src/dynamo/trtllm/encode_helper.py
View file @
96fe63fe
...
@@ -241,7 +241,7 @@ class EncodeHelper:
...
@@ -241,7 +241,7 @@ class EncodeHelper:
# Create readable operation with main embeddings tensor (works for both formats)
# Create readable operation with main embeddings tensor (works for both formats)
descriptor
=
nixl_connect
.
Descriptor
(
encodings
)
descriptor
=
nixl_connect
.
Descriptor
(
encodings
)
with
connector
.
create_readable
(
descriptor
)
as
readable_op
:
with
await
connector
.
create_readable
(
descriptor
)
as
readable_op
:
# Get the metadata for the readable operation
# Get the metadata for the readable operation
op_metadata
=
readable_op
.
metadata
()
op_metadata
=
readable_op
.
metadata
()
...
...
components/src/dynamo/trtllm/main.py
View file @
96fe63fe
...
@@ -332,7 +332,6 @@ async def init(runtime: DistributedRuntime, config: Config):
...
@@ -332,7 +332,6 @@ async def init(runtime: DistributedRuntime, config: Config):
connector
=
None
connector
=
None
logging
.
info
(
"Initializing NIXL Connect."
)
logging
.
info
(
"Initializing NIXL Connect."
)
connector
=
nixl_connect
.
Connector
()
connector
=
nixl_connect
.
Connector
()
await
connector
.
initialize
()
dump_config
(
dump_config
(
config
.
dump_config_to
,
{
"engine_args"
:
engine_args
,
"dynamo_args"
:
config
}
config
.
dump_config_to
,
{
"engine_args"
:
engine_args
,
"dynamo_args"
:
config
}
...
...
components/src/dynamo/vllm/multimodal_handlers/encode_worker_handler.py
View file @
96fe63fe
...
@@ -69,7 +69,6 @@ class EncodeWorkerHandler:
...
@@ -69,7 +69,6 @@ class EncodeWorkerHandler:
# Create and initialize a dynamo connector for this worker.
# Create and initialize a dynamo connector for this worker.
# We'll needs this to move data between this worker and remote workers efficiently.
# We'll needs this to move data between this worker and remote workers efficiently.
self
.
_connector
=
connect
.
Connector
()
self
.
_connector
=
connect
.
Connector
()
await
self
.
_connector
.
initialize
()
logger
.
info
(
"Encode worker startup completed."
)
logger
.
info
(
"Encode worker startup completed."
)
async
def
generate
(
async
def
generate
(
...
@@ -130,7 +129,7 @@ class EncodeWorkerHandler:
...
@@ -130,7 +129,7 @@ class EncodeWorkerHandler:
request
.
embeddings_shape
=
tuple
(
embeddings
.
shape
)
request
.
embeddings_shape
=
tuple
(
embeddings
.
shape
)
descriptor
=
connect
.
Descriptor
(
embeddings_cpu
)
descriptor
=
connect
.
Descriptor
(
embeddings_cpu
)
with
self
.
_connector
.
create_readable
(
descriptor
)
as
readable
:
with
await
self
.
_connector
.
create_readable
(
descriptor
)
as
readable
:
request
.
serialized_request
=
readable
.
metadata
()
request
.
serialized_request
=
readable
.
metadata
()
# Clear the image URL as hint that the image is passed as embeddings.
# Clear the image URL as hint that the image is passed as embeddings.
request
.
multimodal_input
.
image_url
=
None
request
.
multimodal_input
.
image_url
=
None
...
...
components/src/dynamo/vllm/multimodal_handlers/worker_handler.py
View file @
96fe63fe
...
@@ -52,7 +52,6 @@ class MultimodalDecodeWorkerHandler(BaseWorkerHandler):
...
@@ -52,7 +52,6 @@ class MultimodalDecodeWorkerHandler(BaseWorkerHandler):
async
def
async_init
(
self
,
runtime
:
DistributedRuntime
):
async
def
async_init
(
self
,
runtime
:
DistributedRuntime
):
"""Async initialization - connector needs async setup"""
"""Async initialization - connector needs async setup"""
self
.
_connector
=
connect
.
Connector
()
self
.
_connector
=
connect
.
Connector
()
await
self
.
_connector
.
initialize
()
logger
.
info
(
"Multimodal Decode Worker async initialization completed."
)
logger
.
info
(
"Multimodal Decode Worker async initialization completed."
)
async
def
generate
(
self
,
request
:
vLLMMultimodalRequest
,
context
):
async
def
generate
(
self
,
request
:
vLLMMultimodalRequest
,
context
):
...
@@ -138,7 +137,6 @@ class MultimodalPDWorkerHandler(BaseWorkerHandler):
...
@@ -138,7 +137,6 @@ class MultimodalPDWorkerHandler(BaseWorkerHandler):
"""Async initialization for connector that requires async setup"""
"""Async initialization for connector that requires async setup"""
# Initialize the connector asynchronously
# Initialize the connector asynchronously
self
.
_connector
=
connect
.
Connector
()
self
.
_connector
=
connect
.
Connector
()
await
self
.
_connector
.
initialize
()
logger
.
info
(
"Multimodal PD Worker async initialization completed."
)
logger
.
info
(
"Multimodal PD Worker async initialization completed."
)
async
def
generate
(
self
,
request
:
vLLMMultimodalRequest
,
context
):
async
def
generate
(
self
,
request
:
vLLMMultimodalRequest
,
context
):
...
...
docs/api/nixl_connect/connector.md
View file @
96fe63fe
...
@@ -47,7 +47,6 @@ The metadata contains required information (identifiers, keys, etc.) which enabl
...
@@ -47,7 +47,6 @@ The metadata contains required information (identifiers, keys, etc.) which enabl
@
async_on_start
@
async_on_start
async
def
async_init
(
self
):
async
def
async_init
(
self
):
self
.
connector
=
dynamo
.
nixl_connect
.
Connector
()
self
.
connector
=
dynamo
.
nixl_connect
.
Connector
()
await
self
.
connector
.
initialize
()
```
```
> [!Tip]
> [!Tip]
...
@@ -109,7 +108,7 @@ Use [`.wait_for_completion()`](write_operation.md#wait_for_completion) to block
...
@@ -109,7 +108,7 @@ Use [`.wait_for_completion()`](write_operation.md#wait_for_completion) to block
### `create_readable`
### `create_readable`
```
python
```
python
def
create_readable
(
async
def
create_readable
(
self
,
self
,
local_descriptors
:
Descriptor
|
list
[
Descriptor
],
local_descriptors
:
Descriptor
|
list
[
Descriptor
],
)
->
ReadableOperation
:
)
->
ReadableOperation
:
...
@@ -130,7 +129,7 @@ Use [`.wait_for_completion()`](readable_operation.md#wait_for_completion) to blo
...
@@ -130,7 +129,7 @@ Use [`.wait_for_completion()`](readable_operation.md#wait_for_completion) to blo
### `create_writable`
### `create_writable`
```
python
```
python
def
create_writable
(
async
def
create_writable
(
self
,
self
,
local_descriptors
:
Descriptor
|
list
[
Descriptor
],
local_descriptors
:
Descriptor
|
list
[
Descriptor
],
)
->
WritableOperation
:
)
->
WritableOperation
:
...
@@ -151,6 +150,15 @@ Use [`.wait_for_completion()`](writable_operation.md#wait_for_completion) to blo
...
@@ -151,6 +150,15 @@ Use [`.wait_for_completion()`](writable_operation.md#wait_for_completion) to blo
## Properties
## Properties
### `hostname`
```
python
@
property
def
hostname
(
self
)
->
str
:
```
Gets the name of the current worker's host.
### `is_cuda_available`
### `is_cuda_available`
```
python
```
python
...
@@ -169,22 +177,6 @@ def name(self) -> str | None:
...
@@ -169,22 +177,6 @@ def name(self) -> str | None:
Gets the Dynamo component name used by the connector.
Gets the Dynamo component name used by the connector.
### `namespace`
```
python
@
property
def
namespace
(
self
)
->
str
:
```
Gets the Dynamo namespace used by the connector.
### `runtime`
```
python
def
runtime
(
self
)
->
dynamo
.
runtime
.
DistributedRuntime
:
```
Gets the Dynamo distributed runtime instance associated with the connector.
## Related Classes
## Related Classes
...
...
docs/api/nixl_connect/read_operation.md
View file @
96fe63fe
...
@@ -38,7 +38,7 @@ therefore the operation should be awaited until completed unless cancellation is
...
@@ -38,7 +38,7 @@ therefore the operation should be awaited until completed unless cancellation is
)
->
None
:
)
->
None
:
descriptor
=
dynamo
.
nixl_connect
.
Descriptor
(
local_tensor
)
descriptor
=
dynamo
.
nixl_connect
.
Descriptor
(
local_tensor
)
with
self
.
connector
.
begin_read
(
descriptor
,
remote_metadata
)
as
read_op
:
with
await
self
.
connector
.
begin_read
(
remote_metadata
,
descriptor
)
as
read_op
:
# Wait for the operation to complete writing data from the remote worker to local_tensor.
# Wait for the operation to complete writing data from the remote worker to local_tensor.
await
read_op
.
wait_for_completion
()
await
read_op
.
wait_for_completion
()
```
```
...
...
docs/api/nixl_connect/readable_operation.md
View file @
96fe63fe
...
@@ -37,7 +37,7 @@ therefore the operation should be awaited until completed unless cancellation is
...
@@ -37,7 +37,7 @@ therefore the operation should be awaited until completed unless cancellation is
)
->
None
:
)
->
None
:
descriptor
=
dynamo
.
nixl_connect
.
Descriptor
(
local_tensor
)
descriptor
=
dynamo
.
nixl_connect
.
Descriptor
(
local_tensor
)
with
self
.
connector
.
create_readable
(
descriptor
)
as
read_op
:
with
await
self
.
connector
.
create_readable
(
descriptor
)
as
read_op
:
op_metadata
=
read_op
.
metadata
()
op_metadata
=
read_op
.
metadata
()
# Send the metadata to the remote worker via sideband communication.
# Send the metadata to the remote worker via sideband communication.
...
...
docs/api/nixl_connect/writable_operation.md
View file @
96fe63fe
...
@@ -38,7 +38,7 @@ Cancellation is handled asynchronously.
...
@@ -38,7 +38,7 @@ Cancellation is handled asynchronously.
)
->
None
:
)
->
None
:
descriptor
=
dynamo
.
nixl_connect
.
Descriptor
(
local_tensor
)
descriptor
=
dynamo
.
nixl_connect
.
Descriptor
(
local_tensor
)
with
self
.
connector
.
create_writable
(
descriptor
)
as
write_op
:
with
await
self
.
connector
.
create_writable
(
descriptor
)
as
write_op
:
op_metadata
=
write_op
.
metadata
()
op_metadata
=
write_op
.
metadata
()
# Send the metadata to the remote worker via sideband communication.
# Send the metadata to the remote worker via sideband communication.
...
...
docs/api/nixl_connect/write_operation.md
View file @
96fe63fe
...
@@ -39,7 +39,7 @@ Cancellation is handled asynchronously.
...
@@ -39,7 +39,7 @@ Cancellation is handled asynchronously.
)
->
None
:
)
->
None
:
descriptor
=
dynamo
.
nixl_connect
.
Descriptor
(
local_tensor
)
descriptor
=
dynamo
.
nixl_connect
.
Descriptor
(
local_tensor
)
with
self
.
connector
.
begin_write
(
descriptor
,
remote_metadata
)
as
write_op
:
with
await
self
.
connector
.
begin_write
(
descriptor
,
remote_metadata
)
as
write_op
:
# Wait for the operation to complete writing local_tensor to the remote worker.
# Wait for the operation to complete writing local_tensor to the remote worker.
await
write_op
.
wait_for_completion
()
await
write_op
.
wait_for_completion
()
```
```
...
...
docs/backends/sglang/multimodal_sglang_guide.md
View file @
96fe63fe
...
@@ -222,7 +222,7 @@ NIXL is used only for embedding transfer:
...
@@ -222,7 +222,7 @@ NIXL is used only for embedding transfer:
```
python
```
python
Encode
Worker
:
Encode
Worker
:
descriptor
=
connect
.
Descriptor
(
precomputed_embeddings
)
descriptor
=
connect
.
Descriptor
(
precomputed_embeddings
)
with
connector
.
create_readable
(
descriptor
)
as
readable
:
with
await
connector
.
create_readable
(
descriptor
)
as
readable
:
request
.
serialized_request
=
readable
.
metadata
()
request
.
serialized_request
=
readable
.
metadata
()
# Send request with NIXL metadata
# Send request with NIXL metadata
await
pd_worker_client
.
round_robin
(
request
)
await
pd_worker_client
.
round_robin
(
request
)
...
...
examples/multimodal/components/audio_encode_worker.py
View file @
96fe63fe
...
@@ -168,7 +168,7 @@ class VllmEncodeWorker:
...
@@ -168,7 +168,7 @@ class VllmEncodeWorker:
with
torch
.
no_grad
():
with
torch
.
no_grad
():
audio_embeddings
=
self
.
get_audio_embeddings
(
audio_features
)
audio_embeddings
=
self
.
get_audio_embeddings
(
audio_features
)
descriptor
=
connect
.
Descriptor
(
audio_embeddings
)
descriptor
=
connect
.
Descriptor
(
audio_embeddings
)
with
self
.
_connector
.
create_readable
(
descriptor
)
as
readable
:
with
await
self
.
_connector
.
create_readable
(
descriptor
)
as
readable
:
request
.
serialized_request
=
readable
.
metadata
()
request
.
serialized_request
=
readable
.
metadata
()
# Clear the audio URL as hint that the audio is passed as embeddings.
# Clear the audio URL as hint that the audio is passed as embeddings.
request
.
multimodal_input
.
audio_url
=
None
request
.
multimodal_input
.
audio_url
=
None
...
...
examples/multimodal/components/encode_worker.py
View file @
96fe63fe
...
@@ -125,7 +125,7 @@ class VllmEncodeWorker:
...
@@ -125,7 +125,7 @@ class VllmEncodeWorker:
request
.
embeddings_shape
=
tuple
(
embeddings
.
shape
)
request
.
embeddings_shape
=
tuple
(
embeddings
.
shape
)
descriptor
=
connect
.
Descriptor
(
embeddings
)
descriptor
=
connect
.
Descriptor
(
embeddings
)
with
self
.
_connector
.
create_readable
(
descriptor
)
as
readable
:
with
await
self
.
_connector
.
create_readable
(
descriptor
)
as
readable
:
request
.
serialized_request
=
readable
.
metadata
()
request
.
serialized_request
=
readable
.
metadata
()
# Clear the image URL as hint that the image is passed as embeddings.
# Clear the image URL as hint that the image is passed as embeddings.
request
.
multimodal_input
.
image_url
=
None
request
.
multimodal_input
.
image_url
=
None
...
@@ -158,7 +158,6 @@ class VllmEncodeWorker:
...
@@ -158,7 +158,6 @@ class VllmEncodeWorker:
# Create and initialize a dynamo connector for this worker.
# Create and initialize a dynamo connector for this worker.
# We'll needs this to move data between this worker and remote workers efficiently.
# We'll needs this to move data between this worker and remote workers efficiently.
self
.
_connector
=
connect
.
Connector
()
self
.
_connector
=
connect
.
Connector
()
await
self
.
_connector
.
initialize
()
logger
.
info
(
"Startup completed."
)
logger
.
info
(
"Startup completed."
)
...
...
examples/multimodal/components/video_encode_worker.py
View file @
96fe63fe
...
@@ -153,7 +153,7 @@ class VllmEncodeWorker:
...
@@ -153,7 +153,7 @@ class VllmEncodeWorker:
request
.
embeddings_shape
=
tuple
(
tensor_for_descriptor
.
shape
)
request
.
embeddings_shape
=
tuple
(
tensor_for_descriptor
.
shape
)
descriptor
=
connect
.
Descriptor
(
tensor_for_descriptor
)
descriptor
=
connect
.
Descriptor
(
tensor_for_descriptor
)
with
self
.
_connector
.
create_readable
(
descriptor
)
as
readable
:
with
await
self
.
_connector
.
create_readable
(
descriptor
)
as
readable
:
request
.
serialized_request
=
readable
.
metadata
()
request
.
serialized_request
=
readable
.
metadata
()
# Clear the image URL as hint that the image is passed as embeddings.
# Clear the image URL as hint that the image is passed as embeddings.
request
.
multimodal_input
.
video_url
=
None
request
.
multimodal_input
.
video_url
=
None
...
@@ -199,7 +199,6 @@ class VllmEncodeWorker:
...
@@ -199,7 +199,6 @@ class VllmEncodeWorker:
# Create and initialize a dynamo connector for this worker.
# Create and initialize a dynamo connector for this worker.
# We'll needs this to move data between this worker and remote workers efficiently.
# We'll needs this to move data between this worker and remote workers efficiently.
self
.
_connector
=
connect
.
Connector
()
self
.
_connector
=
connect
.
Connector
()
await
self
.
_connector
.
initialize
()
logger
.
info
(
"Startup completed."
)
logger
.
info
(
"Startup completed."
)
...
...
lib/bindings/python/src/dynamo/nixl_connect/__init__.py
View file @
96fe63fe
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment