Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
c0c664a9
Unverified
Commit
c0c664a9
authored
Nov 19, 2025
by
Yan Ru Pei
Committed by
GitHub
Nov 20, 2025
Browse files
chore: better error handling in Router slot manager (#4496)
Signed-off-by:
PeaBrane
<
yanrpei@gmail.com
>
parent
55c66a63
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
86 additions
and
33 deletions
+86
-33
lib/llm/src/kv_router.rs
lib/llm/src/kv_router.rs
+12
-7
lib/llm/src/kv_router/scheduler.rs
lib/llm/src/kv_router/scheduler.rs
+7
-10
lib/llm/src/kv_router/sequence.rs
lib/llm/src/kv_router/sequence.rs
+67
-16
No files found.
lib/llm/src/kv_router.rs
View file @
c0c664a9
...
@@ -46,6 +46,7 @@ use crate::{
...
@@ -46,6 +46,7 @@ use crate::{
LocalBlockHash
,
RouterRequest
,
RouterResponse
,
WorkerSelectionResult
,
WorkerWithDpRank
,
LocalBlockHash
,
RouterRequest
,
RouterResponse
,
WorkerSelectionResult
,
WorkerWithDpRank
,
},
},
scheduler
::{
KvScheduler
,
KvSchedulerError
,
PotentialLoad
,
SchedulingRequest
},
scheduler
::{
KvScheduler
,
KvSchedulerError
,
PotentialLoad
,
SchedulingRequest
},
sequence
::
SequenceError
,
subscriber
::
start_kv_router_background
,
subscriber
::
start_kv_router_background
,
},
},
local_model
::
runtime_config
::
ModelRuntimeConfig
,
local_model
::
runtime_config
::
ModelRuntimeConfig
,
...
@@ -395,22 +396,26 @@ impl KvRouter {
...
@@ -395,22 +396,26 @@ impl KvRouter {
compute_seq_hash_for_block
(
&
block_hashes
)
compute_seq_hash_for_block
(
&
block_hashes
)
});
});
self
.scheduler
if
let
Err
(
e
)
=
self
.scheduler
.add_request
(
.add_request
(
request_id
,
request_id
.clone
()
,
maybe_seq_hashes
,
maybe_seq_hashes
,
isl_tokens
,
isl_tokens
,
overlap_blocks
,
overlap_blocks
,
worker
,
worker
,
)
)
.await
;
.await
{
tracing
::
warn!
(
"Failed to add request {request_id}: {e}"
);
}
}
}
pub
async
fn
mark_prefill_completed
(
&
self
,
request_id
:
&
str
)
->
Result
<
()
>
{
pub
async
fn
mark_prefill_completed
(
&
self
,
request_id
:
&
str
)
->
Result
<
()
,
SequenceError
>
{
self
.scheduler
.mark_prefill_completed
(
request_id
)
.await
self
.scheduler
.mark_prefill_completed
(
request_id
)
.await
}
}
pub
async
fn
free
(
&
self
,
request_id
:
&
str
)
->
Result
<
()
>
{
pub
async
fn
free
(
&
self
,
request_id
:
&
str
)
->
Result
<
()
,
SequenceError
>
{
self
.scheduler
.free
(
request_id
)
.await
self
.scheduler
.free
(
request_id
)
.await
}
}
...
@@ -629,7 +634,7 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutpu
...
@@ -629,7 +634,7 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutpu
if
!
prefill_marked
{
if
!
prefill_marked
{
if
let
Err
(
e
)
=
chooser
.mark_prefill_completed
(
&
context_id
)
.await
{
if
let
Err
(
e
)
=
chooser
.mark_prefill_completed
(
&
context_id
)
.await
{
tracing
::
warn!
(
"Failed to mark prefill completed for request {context_id}: {e
:?
}"
);
tracing
::
warn!
(
"Failed to mark prefill completed for request {context_id}: {e}"
);
}
}
prefill_marked
=
true
;
prefill_marked
=
true
;
}
}
...
@@ -660,7 +665,7 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutpu
...
@@ -660,7 +665,7 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutpu
}
}
if
let
Err
(
e
)
=
chooser
.free
(
&
context_id
)
.await
{
if
let
Err
(
e
)
=
chooser
.free
(
&
context_id
)
.await
{
tracing
::
warn!
(
"Failed to free request {context_id}: {e
:?
}"
);
tracing
::
warn!
(
"Failed to free request {context_id}: {e}"
);
}
}
});
});
Ok
(
ResponseStream
::
new
(
wrapped_stream
,
stream_context
))
Ok
(
ResponseStream
::
new
(
wrapped_stream
,
stream_context
))
...
...
lib/llm/src/kv_router/scheduler.rs
View file @
c0c664a9
...
@@ -19,7 +19,7 @@ use super::RouterConfigOverride;
...
@@ -19,7 +19,7 @@ use super::RouterConfigOverride;
use
super
::
WorkerSelector
;
use
super
::
WorkerSelector
;
use
super
::
indexer
::
OverlapScores
;
use
super
::
indexer
::
OverlapScores
;
use
super
::
protocols
::{
DpRank
,
WorkerId
,
WorkerSelectionResult
,
WorkerWithDpRank
};
use
super
::
protocols
::{
DpRank
,
WorkerId
,
WorkerSelectionResult
,
WorkerWithDpRank
};
use
super
::
sequence
::
ActiveSequencesMultiWorker
;
use
super
::
sequence
::
{
ActiveSequencesMultiWorker
,
SequenceError
}
;
use
crate
::
tokens
::
SequenceHash
;
use
crate
::
tokens
::
SequenceHash
;
...
@@ -263,9 +263,7 @@ impl KvScheduler {
...
@@ -263,9 +263,7 @@ impl KvScheduler {
)
)
.await
.await
{
{
tracing
::
warn!
(
tracing
::
warn!
(
"Failed to add request {request_id}: {e}"
);
"Failed to add request {request_id} to local slot tracker: {e:?}"
);
}
}
}
}
Err
(
KvSchedulerError
::
NoEndpoints
)
=>
{
Err
(
KvSchedulerError
::
NoEndpoints
)
=>
{
...
@@ -332,20 +330,19 @@ impl KvScheduler {
...
@@ -332,20 +330,19 @@ impl KvScheduler {
isl
:
usize
,
isl
:
usize
,
overlap
:
u32
,
overlap
:
u32
,
worker
:
WorkerWithDpRank
,
worker
:
WorkerWithDpRank
,
)
{
)
->
Result
<
(),
SequenceError
>
{
let
_
=
self
self
.slots
.slots
.add_request
(
request_id
,
token_sequence
,
isl
,
overlap
,
worker
)
.add_request
(
request_id
,
token_sequence
,
isl
,
overlap
,
worker
)
.await
;
.await
}
}
pub
async
fn
mark_prefill_completed
(
&
self
,
request_id
:
&
str
)
->
Result
<
()
>
{
pub
async
fn
mark_prefill_completed
(
&
self
,
request_id
:
&
str
)
->
Result
<
()
,
SequenceError
>
{
self
.slots
self
.slots
.mark_prefill_completed
(
&
request_id
.to_string
())
.mark_prefill_completed
(
&
request_id
.to_string
())
.await
.await
}
}
pub
async
fn
free
(
&
self
,
request_id
:
&
str
)
->
Result
<
()
>
{
pub
async
fn
free
(
&
self
,
request_id
:
&
str
)
->
Result
<
()
,
SequenceError
>
{
self
.slots
.free
(
&
request_id
.to_string
())
.await
self
.slots
.free
(
&
request_id
.to_string
())
.await
}
}
...
...
lib/llm/src/kv_router/sequence.rs
View file @
c0c664a9
...
@@ -43,6 +43,28 @@ use crate::kv_router::ACTIVE_SEQUENCES_SUBJECT;
...
@@ -43,6 +43,28 @@ use crate::kv_router::ACTIVE_SEQUENCES_SUBJECT;
use
crate
::
local_model
::
runtime_config
::
ModelRuntimeConfig
;
use
crate
::
local_model
::
runtime_config
::
ModelRuntimeConfig
;
use
dynamo_runtime
::
CancellationToken
;
use
dynamo_runtime
::
CancellationToken
;
/// Errors that can occur during sequence management operations
#[derive(Debug,
thiserror::Error)]
pub
enum
SequenceError
{
#[error(
"Worker {worker:?} not found"
)]
WorkerNotFound
{
worker
:
WorkerWithDpRank
},
#[error(
"Request {request_id} already exists (assigned to worker {worker:?})"
)]
DuplicateRequest
{
request_id
:
String
,
worker
:
WorkerWithDpRank
,
},
#[error(
"Request {request_id} not found"
)]
RequestNotFound
{
request_id
:
String
},
#[error(
"Failed to publish event: {0}"
)]
PublishFailed
(
#[from]
anyhow
::
Error
),
#[error(
"Failed to send command to worker: channel closed"
)]
WorkerChannelClosed
,
}
/// Duration after which stale requests are forcibly expired (5 minutes)
/// Duration after which stale requests are forcibly expired (5 minutes)
const
EXPIRY_DURATION
:
Duration
=
Duration
::
from_secs
(
300
);
const
EXPIRY_DURATION
:
Duration
=
Duration
::
from_secs
(
300
);
...
@@ -620,9 +642,18 @@ impl ActiveSequencesMultiWorker {
...
@@ -620,9 +642,18 @@ impl ActiveSequencesMultiWorker {
isl
:
usize
,
isl
:
usize
,
overlap
:
u32
,
overlap
:
u32
,
worker
:
WorkerWithDpRank
,
worker
:
WorkerWithDpRank
,
)
->
Result
<
()
>
{
)
->
Result
<
(),
SequenceError
>
{
// Check for worker existence
if
!
self
.senders
.contains_key
(
&
worker
)
{
if
!
self
.senders
.contains_key
(
&
worker
)
{
return
Err
(
anyhow
::
anyhow!
(
"Worker {:?} not found"
,
worker
));
return
Err
(
SequenceError
::
WorkerNotFound
{
worker
});
}
// Check for duplicate request
if
let
Some
(
existing_worker
)
=
self
.request_to_worker
.get
(
&
request_id
)
{
return
Err
(
SequenceError
::
DuplicateRequest
{
request_id
,
worker
:
*
existing_worker
,
});
}
}
// Create response channel
// Create response channel
...
@@ -658,12 +689,12 @@ impl ActiveSequencesMultiWorker {
...
@@ -658,12 +689,12 @@ impl ActiveSequencesMultiWorker {
overlap
,
overlap
,
resp_tx
,
resp_tx
,
})
})
.map_err
(|
_
|
anyhow
::
anyhow!
(
"Failed to send add_request command to worker"
)
)
?
;
.map_err
(|
_
|
SequenceError
::
WorkerChannelClosed
)
?
;
// Wait for response and handle removed requests
// Wait for response and handle removed requests
let
removed_requests
=
resp_rx
let
removed_requests
=
resp_rx
.await
.await
.map_err
(|
_
|
anyhow
::
anyhow!
(
"Failed to receive response from worker"
)
)
?
;
.map_err
(|
_
|
SequenceError
::
WorkerChannelClosed
)
?
;
// Remove expired requests from request_to_worker mapping
// Remove expired requests from request_to_worker mapping
for
expired_id
in
&
removed_requests
{
for
expired_id
in
&
removed_requests
{
...
@@ -673,12 +704,21 @@ impl ActiveSequencesMultiWorker {
...
@@ -673,12 +704,21 @@ impl ActiveSequencesMultiWorker {
Ok
(())
Ok
(())
}
}
pub
async
fn
free
(
&
self
,
request_id
:
&
RequestId
)
->
Result
<
()
>
{
/// Free all blocks associated with a request
let
worker
=
self
///
.request_to_worker
/// Note: This operation is idempotent. Calling it multiple times for the same request
.get
(
request_id
)
/// will log a warning but not return an error (double free is allowed).
.map
(|
entry
|
*
entry
)
pub
async
fn
free
(
&
self
,
request_id
:
&
RequestId
)
->
Result
<
(),
SequenceError
>
{
.ok_or_else
(||
anyhow
::
anyhow!
(
"Request ID not found in request_to_worker mapping"
))
?
;
// Check if request exists - if not, it's already been freed (idempotent)
let
Some
(
worker
)
=
self
.request_to_worker
.get
(
request_id
)
.map
(|
entry
|
*
entry
)
else
{
tracing
::
debug!
(
"Request {request_id} not found, already freed (idempotent)"
);
return
Ok
(());
};
// Verify worker still exists
if
!
self
.senders
.contains_key
(
&
worker
)
{
return
Err
(
SequenceError
::
WorkerNotFound
{
worker
});
}
// Publish event only if replica_sync is enabled
// Publish event only if replica_sync is enabled
if
self
.replica_sync
{
if
self
.replica_sync
{
...
@@ -700,7 +740,7 @@ impl ActiveSequencesMultiWorker {
...
@@ -700,7 +740,7 @@ impl ActiveSequencesMultiWorker {
.send
(
UpdateSequences
::
Free
{
.send
(
UpdateSequences
::
Free
{
request_id
:
request_id
.clone
(),
request_id
:
request_id
.clone
(),
})
})
.map_err
(|
_
|
anyhow
::
anyhow!
(
"Failed to send free command to worker"
)
)
?
;
.map_err
(|
_
|
SequenceError
::
WorkerChannelClosed
)
?
;
self
.request_to_worker
.remove
(
request_id
);
self
.request_to_worker
.remove
(
request_id
);
...
@@ -708,12 +748,25 @@ impl ActiveSequencesMultiWorker {
...
@@ -708,12 +748,25 @@ impl ActiveSequencesMultiWorker {
}
}
/// Mark prefill as completed for a request
/// Mark prefill as completed for a request
pub
async
fn
mark_prefill_completed
(
&
self
,
request_id
:
&
RequestId
)
->
Result
<
()
>
{
///
/// Note: Calling this multiple times for the same request is allowed and will be a no-op
/// after the first call (idempotent).
pub
async
fn
mark_prefill_completed
(
&
self
,
request_id
:
&
RequestId
,
)
->
Result
<
(),
SequenceError
>
{
let
worker
=
self
let
worker
=
self
.request_to_worker
.request_to_worker
.get
(
request_id
)
.get
(
request_id
)
.map
(|
entry
|
*
entry
)
.map
(|
entry
|
*
entry
)
.ok_or_else
(||
anyhow
::
anyhow!
(
"Request ID not found in request_to_worker mapping"
))
?
;
.ok_or_else
(||
SequenceError
::
RequestNotFound
{
request_id
:
request_id
.clone
(),
})
?
;
// Verify worker still exists
if
!
self
.senders
.contains_key
(
&
worker
)
{
return
Err
(
SequenceError
::
WorkerNotFound
{
worker
});
}
// Publish event only if replica_sync is enabled
// Publish event only if replica_sync is enabled
if
self
.replica_sync
{
if
self
.replica_sync
{
...
@@ -735,9 +788,7 @@ impl ActiveSequencesMultiWorker {
...
@@ -735,9 +788,7 @@ impl ActiveSequencesMultiWorker {
.send
(
UpdateSequences
::
MarkPrefillCompleted
{
.send
(
UpdateSequences
::
MarkPrefillCompleted
{
request_id
:
request_id
.clone
(),
request_id
:
request_id
.clone
(),
})
})
.map_err
(|
_
|
{
.map_err
(|
_
|
SequenceError
::
WorkerChannelClosed
)
?
;
anyhow
::
anyhow!
(
"Failed to send mark_prefill_completed command to worker"
)
})
?
;
Ok
(())
Ok
(())
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment