Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
3efc733f
Unverified
Commit
3efc733f
authored
Apr 14, 2026
by
Yan Ru Pei
Committed by
GitHub
Apr 15, 2026
Browse files
fix(llm): close push-router cleanup gap (#8181)
Signed-off-by:
PeaBrane
<
yanrpei@gmail.com
>
parent
89d67172
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
108 additions
and
41 deletions
+108
-41
lib/llm/src/kv_router/push_router.rs
lib/llm/src/kv_router/push_router.rs
+108
-41
No files found.
lib/llm/src/kv_router/push_router.rs
View file @
3efc733f
...
...
@@ -92,6 +92,46 @@ struct RequestGuard {
deferred_close
:
Option
<
SessionCloseAction
>
,
}
struct
PendingDispatchGuard
{
chooser
:
Arc
<
KvRouter
>
,
scheduler_tracked
:
bool
,
context_id
:
String
,
deferred_close
:
Option
<
SessionCloseAction
>
,
disarmed
:
bool
,
}
fn
spawn_cleanup_task
(
chooser
:
&
Arc
<
KvRouter
>
,
scheduler_tracked
:
bool
,
context_id
:
&
str
,
deferred_close
:
Option
<
SessionCloseAction
>
,
log_context
:
&
'static
str
,
)
{
if
deferred_close
.is_none
()
&&
!
scheduler_tracked
{
return
;
}
let
Ok
(
handle
)
=
tokio
::
runtime
::
Handle
::
try_current
()
else
{
tracing
::
warn!
(
"No tokio runtime for {log_context} cleanup of request {}"
,
context_id
);
return
;
};
let
chooser
=
chooser
.clone
();
let
context_id
=
context_id
.to_owned
();
handle
.spawn
(
async
move
{
if
scheduler_tracked
&&
let
Err
(
e
)
=
chooser
.free
(
&
context_id
)
.await
{
tracing
::
warn!
(
"Failed to free request {context_id} ({log_context}): {e}"
);
}
if
let
Some
(
close
)
=
deferred_close
{
close
.execute
(
&
context_id
);
}
});
}
impl
RequestGuard
{
async
fn
on_item
(
&
mut
self
,
item
:
&
Annotated
<
LLMEngineOutput
>
)
{
if
!
self
.prefill_marked
{
...
...
@@ -208,34 +248,51 @@ impl Drop for RequestGuard {
fn
drop
(
&
mut
self
)
{
self
.record_metrics
();
let
deferred_close
=
self
.deferred_close
.take
();
let
needs_free
=
!
self
.freed
&&
self
.scheduler_tracked
;
if
deferred_close
.is_none
()
&&
!
needs_free
{
return
;
spawn_cleanup_task
(
&
self
.chooser
,
!
self
.freed
&&
self
.scheduler_tracked
,
&
self
.context_id
,
self
.deferred_close
.take
(),
"drop guard"
,
);
}
}
let
Ok
(
handle
)
=
tokio
::
runtime
::
Handle
::
try_current
()
else
{
tracing
::
warn!
(
"No tokio runtime for drop guard cleanup of request {}"
,
self
.context_id
);
return
;
};
impl
PendingDispatchGuard
{
fn
new
(
chooser
:
Arc
<
KvRouter
>
,
scheduler_tracked
:
bool
,
context_id
:
String
,
deferred_close
:
Option
<
SessionCloseAction
>
,
)
->
Self
{
Self
{
chooser
,
scheduler_tracked
,
context_id
,
deferred_close
,
disarmed
:
false
,
}
}
// Mirror finish(): free the scheduler slot first, then fire the
// deferred session close so the worker's KV isn't released while
// generation teardown is still in progress.
let
chooser
=
self
.chooser
.clone
();
let
context_id
=
self
.context_id
.clone
();
handle
.spawn
(
async
move
{
if
needs_free
&&
let
Err
(
e
)
=
chooser
.free
(
&
context_id
)
.await
{
tracing
::
warn!
(
"Failed to free request {context_id} (drop guard): {e}"
);
fn
disarm
(
mut
self
)
->
Option
<
SessionCloseAction
>
{
self
.disarmed
=
true
;
self
.deferred_close
.take
()
}
if
let
Some
(
close
)
=
deferred_close
{
close
.execute
(
&
context_id
);
}
impl
Drop
for
PendingDispatchGuard
{
fn
drop
(
&
mut
self
)
{
if
self
.disarmed
{
return
;
}
});
spawn_cleanup_task
(
&
self
.chooser
,
self
.scheduler_tracked
,
&
self
.context_id
,
self
.deferred_close
.take
(),
"dispatch guard"
,
);
}
}
...
...
@@ -620,6 +677,12 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutpu
}
let
chooser
=
self
.chooser
.clone
();
let
dispatch_guard
=
PendingDispatchGuard
::
new
(
chooser
.clone
(),
scheduler_tracked
,
context_id
.clone
(),
deferred_close
,
);
let
mut
response_stream
=
self
.inner
.direct
(
updated_request
,
instance_id
)
...
...
@@ -632,11 +695,12 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutpu
phase
=
?
phase
,
))
.await
?
;
let
deferred_close
=
dispatch_guard
.disarm
();
let
stream_context
=
response_stream
.context
();
let
context_for_monitoring
=
stream_context
.clone
();
let
wrapped_stream
=
Box
::
pin
(
async_stream
::
stream!
{
let
mu
t
guard
=
RequestGuard
{
// Build the guard before returning the stream so a drop-before-first-poll
// still frees booked scheduler state.
le
t
guard
=
RequestGuard
{
chooser
:
chooser
.clone
(),
scheduler_tracked
,
context_id
:
context_id
.clone
(),
...
...
@@ -655,6 +719,9 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutpu
deferred_close
,
};
let
wrapped_stream
=
Box
::
pin
(
async_stream
::
stream!
{
let
mut
guard
=
guard
;
loop
{
tokio
::
select!
{
biased
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment