Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
329752d9
Unverified
Commit
329752d9
authored
Jan 12, 2026
by
Kyle McGill
Committed by
GitHub
Jan 12, 2026
Browse files
fix: Synchronizing on new thread to avoid delaing TRTLLM (#5333)
parent
7952ea88
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
57 additions
and
3 deletions
+57
-3
lib/bindings/kvbm/Cargo.lock
lib/bindings/kvbm/Cargo.lock
+3
-0
lib/bindings/kvbm/python/kvbm/trtllm_integration/connector/kvbm_connector_worker.py
...vbm/trtllm_integration/connector/kvbm_connector_worker.py
+3
-2
lib/bindings/kvbm/src/block_manager/vllm/connector/trtllm_worker.rs
...gs/kvbm/src/block_manager/vllm/connector/trtllm_worker.rs
+39
-1
lib/llm/src/block_manager/connector/scheduler.rs
lib/llm/src/block_manager/connector/scheduler.rs
+12
-0
No files found.
lib/bindings/kvbm/Cargo.lock
View file @
329752d9
...
@@ -1578,6 +1578,7 @@ dependencies = [
...
@@ -1578,6 +1578,7 @@ dependencies = [
"tokio-util",
"tokio-util",
"tracing",
"tracing",
"url",
"url",
"utoipa",
"uuid",
"uuid",
]
]
...
@@ -7754,6 +7755,8 @@ dependencies = [
...
@@ -7754,6 +7755,8 @@ dependencies = [
"quote",
"quote",
"regex",
"regex",
"syn 2.0.110",
"syn 2.0.110",
"url",
"uuid",
]
]
[[package]]
[[package]]
...
...
lib/bindings/kvbm/python/kvbm/trtllm_integration/connector/kvbm_connector_worker.py
View file @
329752d9
...
@@ -26,8 +26,9 @@ class DynamoKVBMConnectorWorker(KvCacheConnectorWorker):
...
@@ -26,8 +26,9 @@ class DynamoKVBMConnectorWorker(KvCacheConnectorWorker):
def
callback
():
def
callback
():
self
.
event
.
record
()
self
.
event
.
record
()
self
.
event
.
synchronize
()
# Non-blocking: passes event to Rust for async polling
self
.
_connector
.
execute_offload_operations
()
self
.
_connector
.
submit_offload_on_event
(
self
.
event
.
cuda_event
)
# Returns immediately - no CPU blocking
return
callback
return
callback
...
...
lib/bindings/kvbm/src/block_manager/vllm/connector/trtllm_worker.rs
View file @
329752d9
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
use
dynamo_llm
::
block_manager
::
connector
::
protocol
::
TransferType
;
use
dynamo_llm
::
block_manager
::
connector
::
protocol
::
TransferType
;
use
dynamo_llm
::
block_manager
::
connector
::
scheduler
::{
use
dynamo_llm
::
block_manager
::
connector
::
scheduler
::{
Scheduler
,
TransferSchedulerClient
,
WorkerSchedulerClient
,
Scheduler
,
SchedulerMessage
,
TransferSchedulerClient
,
WorkerSchedulerClient
,
};
};
use
std
::
collections
::
HashSet
;
use
std
::
collections
::
HashSet
;
...
@@ -48,6 +48,11 @@ pub trait Worker: Send + Sync {
...
@@ -48,6 +48,11 @@ pub trait Worker: Send + Sync {
finished_gen_req_ids
:
Vec
<
u64
>
,
finished_gen_req_ids
:
Vec
<
u64
>
,
started_loading_req_ids
:
Vec
<
u64
>
,
started_loading_req_ids
:
Vec
<
u64
>
,
)
->
(
Vec
<
u64
>
,
Vec
<
u64
>
);
)
->
(
Vec
<
u64
>
,
Vec
<
u64
>
);
/// Submit offload operations to execute after the CUDA event completes (non-blocking).
/// Does slot bookkeeping synchronously, then spawns an async task to poll the event
/// and send operations to the scheduler when complete.
fn
submit_offload_on_event
(
&
mut
self
,
event
:
u64
)
->
anyhow
::
Result
<
()
>
;
}
}
pub
struct
KvConnectorWorker
{
pub
struct
KvConnectorWorker
{
...
@@ -394,6 +399,33 @@ impl Worker for KvConnectorWorker {
...
@@ -394,6 +399,33 @@ impl Worker for KvConnectorWorker {
(
finished_offloading
,
finished_onboarding
)
(
finished_offloading
,
finished_onboarding
)
}
}
fn
submit_offload_on_event
(
&
mut
self
,
event
:
u64
)
->
anyhow
::
Result
<
()
>
{
let
operations
=
std
::
mem
::
take
(
&
mut
self
.offloading_operations
);
// Bookkeeping done synchronously while we have &mut self
for
op
in
&
operations
{
self
.connector
.record_operation
(
&
op
.request_id
,
op
.uuid
);
}
// Clone channel for async use
let
tx
=
self
.connector
.get_scheduler_tx
();
// Use std::thread since we may be in a subprocess without tokio runtime
std
::
thread
::
spawn
(
move
||
{
// Block this thread until event completes (doesn't block main thread)
event_sync_blocking
(
event
);
// Send operations to scheduler
for
op
in
operations
{
if
let
Err
(
e
)
=
tx
.send
(
SchedulerMessage
::
EnqueueRequest
(
op
))
{
tracing
::
error!
(
"Failed to send offload operation: {}"
,
e
);
}
}
});
Ok
(())
}
}
}
#[pyclass]
#[pyclass]
...
@@ -473,4 +505,10 @@ impl PyTrtllmKvConnectorWorker {
...
@@ -473,4 +505,10 @@ impl PyTrtllmKvConnectorWorker {
self
.connector_worker
self
.connector_worker
.get_finished
(
finished_gen_req_ids
,
started_loading_req_ids
)
.get_finished
(
finished_gen_req_ids
,
started_loading_req_ids
)
}
}
pub
fn
submit_offload_on_event
(
&
mut
self
,
event
:
u64
)
->
PyResult
<
()
>
{
self
.connector_worker
.submit_offload_on_event
(
event
)
.map_err
(
to_pyerr
)
}
}
}
lib/llm/src/block_manager/connector/scheduler.rs
View file @
329752d9
...
@@ -229,6 +229,18 @@ impl WorkerSchedulerClient {
...
@@ -229,6 +229,18 @@ impl WorkerSchedulerClient {
}
}
}
}
}
}
/// Clone the scheduler channel for async use.
pub
fn
get_scheduler_tx
(
&
self
)
->
mpsc
::
UnboundedSender
<
SchedulerMessage
>
{
self
.scheduler_tx
.clone
()
}
/// Record operation in slot (bookkeeping only, no send).
/// This updates the slot's expected operation count so is_complete() works correctly.
pub
fn
record_operation
(
&
mut
self
,
request_id
:
&
str
,
uuid
:
uuid
::
Uuid
)
{
let
slot
=
self
.slots
.get_mut
(
request_id
)
.expect
(
"slot does not exist"
);
slot
.operations
.push
(
uuid
);
}
}
}
pub
type
Iteration
=
u64
;
pub
type
Iteration
=
u64
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment