Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
3efc733f
Unverified
Commit
3efc733f
authored
Apr 14, 2026
by
Yan Ru Pei
Committed by
GitHub
Apr 15, 2026
Browse files
fix(llm): close push-router cleanup gap (#8181)
Signed-off-by:
PeaBrane
<
yanrpei@gmail.com
>
parent
89d67172
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
108 additions
and
41 deletions
+108
-41
lib/llm/src/kv_router/push_router.rs
lib/llm/src/kv_router/push_router.rs
+108
-41
No files found.
lib/llm/src/kv_router/push_router.rs
View file @
3efc733f
...
...
@@ -92,6 +92,46 @@ struct RequestGuard {
deferred_close
:
Option
<
SessionCloseAction
>
,
}
struct
PendingDispatchGuard
{
chooser
:
Arc
<
KvRouter
>
,
scheduler_tracked
:
bool
,
context_id
:
String
,
deferred_close
:
Option
<
SessionCloseAction
>
,
disarmed
:
bool
,
}
fn
spawn_cleanup_task
(
chooser
:
&
Arc
<
KvRouter
>
,
scheduler_tracked
:
bool
,
context_id
:
&
str
,
deferred_close
:
Option
<
SessionCloseAction
>
,
log_context
:
&
'static
str
,
)
{
if
deferred_close
.is_none
()
&&
!
scheduler_tracked
{
return
;
}
let
Ok
(
handle
)
=
tokio
::
runtime
::
Handle
::
try_current
()
else
{
tracing
::
warn!
(
"No tokio runtime for {log_context} cleanup of request {}"
,
context_id
);
return
;
};
let
chooser
=
chooser
.clone
();
let
context_id
=
context_id
.to_owned
();
handle
.spawn
(
async
move
{
if
scheduler_tracked
&&
let
Err
(
e
)
=
chooser
.free
(
&
context_id
)
.await
{
tracing
::
warn!
(
"Failed to free request {context_id} ({log_context}): {e}"
);
}
if
let
Some
(
close
)
=
deferred_close
{
close
.execute
(
&
context_id
);
}
});
}
impl
RequestGuard
{
async
fn
on_item
(
&
mut
self
,
item
:
&
Annotated
<
LLMEngineOutput
>
)
{
if
!
self
.prefill_marked
{
...
...
@@ -208,34 +248,51 @@ impl Drop for RequestGuard {
fn
drop
(
&
mut
self
)
{
self
.record_metrics
();
let
deferred_close
=
self
.deferred_close
.take
();
let
needs_free
=
!
self
.freed
&&
self
.scheduler_tracked
;
spawn_cleanup_task
(
&
self
.chooser
,
!
self
.freed
&&
self
.scheduler_tracked
,
&
self
.context_id
,
self
.deferred_close
.take
(),
"drop guard"
,
);
}
}
if
deferred_close
.is_none
()
&&
!
needs_free
{
return
;
impl
PendingDispatchGuard
{
fn
new
(
chooser
:
Arc
<
KvRouter
>
,
scheduler_tracked
:
bool
,
context_id
:
String
,
deferred_close
:
Option
<
SessionCloseAction
>
,
)
->
Self
{
Self
{
chooser
,
scheduler_tracked
,
context_id
,
deferred_close
,
disarmed
:
false
,
}
}
let
Ok
(
handle
)
=
tokio
::
runtime
::
Handle
::
try_current
()
else
{
tracing
::
warn!
(
"No tokio runtime for drop guard cleanup of request {}"
,
self
.context_id
);
fn
disarm
(
mut
self
)
->
Option
<
SessionCloseAction
>
{
self
.disarmed
=
true
;
self
.deferred_close
.take
()
}
}
impl
Drop
for
PendingDispatchGuard
{
fn
drop
(
&
mut
self
)
{
if
self
.disarmed
{
return
;
}
;
}
// Mirror finish(): free the scheduler slot first, then fire the
// deferred session close so the worker's KV isn't released while
// generation teardown is still in progress.
let
chooser
=
self
.chooser
.clone
();
let
context_id
=
self
.context_id
.clone
();
handle
.spawn
(
async
move
{
if
needs_free
&&
let
Err
(
e
)
=
chooser
.free
(
&
context_id
)
.await
{
tracing
::
warn!
(
"Failed to free request {context_id} (drop guard): {e}"
);
}
if
let
Some
(
close
)
=
deferred_close
{
close
.execute
(
&
context_id
);
}
});
spawn_cleanup_task
(
&
self
.chooser
,
self
.scheduler_tracked
,
&
self
.context_id
,
self
.deferred_close
.take
(),
"dispatch guard"
,
);
}
}
...
...
@@ -620,6 +677,12 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutpu
}
let
chooser
=
self
.chooser
.clone
();
let
dispatch_guard
=
PendingDispatchGuard
::
new
(
chooser
.clone
(),
scheduler_tracked
,
context_id
.clone
(),
deferred_close
,
);
let
mut
response_stream
=
self
.inner
.direct
(
updated_request
,
instance_id
)
...
...
@@ -632,28 +695,32 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutpu
phase
=
?
phase
,
))
.await
?
;
let
deferred_close
=
dispatch_guard
.disarm
();
let
stream_context
=
response_stream
.context
();
let
context_for_monitoring
=
stream_context
.clone
();
// Build the guard before returning the stream so a drop-before-first-poll
// still frees booked scheduler state.
let
guard
=
RequestGuard
{
chooser
:
chooser
.clone
(),
scheduler_tracked
,
context_id
:
context_id
.clone
(),
tracker
:
tracker
.clone
(),
request_metrics
:
request_metrics
.clone
(),
cumulative_osl
:
0
,
metrics_recorded
:
false
,
freed
:
false
,
prefill_marked
:
false
,
first_token_recorded
:
false
,
track_output_blocks
:
scheduler_tracked
&&
track_output_blocks
,
current_total_blocks
:
isl_tokens
.div_ceil
(
block_size
),
isl_tokens
,
block_size
,
expected_output_tokens
,
deferred_close
,
};
let
wrapped_stream
=
Box
::
pin
(
async_stream
::
stream!
{
let
mut
guard
=
RequestGuard
{
chooser
:
chooser
.clone
(),
scheduler_tracked
,
context_id
:
context_id
.clone
(),
tracker
:
tracker
.clone
(),
request_metrics
:
request_metrics
.clone
(),
cumulative_osl
:
0
,
metrics_recorded
:
false
,
freed
:
false
,
prefill_marked
:
false
,
first_token_recorded
:
false
,
track_output_blocks
:
scheduler_tracked
&&
track_output_blocks
,
current_total_blocks
:
isl_tokens
.div_ceil
(
block_size
),
isl_tokens
,
block_size
,
expected_output_tokens
,
deferred_close
,
};
let
mut
guard
=
guard
;
loop
{
tokio
::
select!
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment