Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
bddaaa26
Unverified
Commit
bddaaa26
authored
Mar 12, 2026
by
Yan Ru Pei
Committed by
GitHub
Mar 13, 2026
Browse files
feat(kv-router): pluggable scheduling policy for router queue [DYN-2454] (#7260)
Signed-off-by:
PeaBrane
<
yanrpei@gmail.com
>
parent
12785247
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
23 additions
and
1 deletion
+23
-1
lib/llm/src/kv_router/scheduler.rs
lib/llm/src/kv_router/scheduler.rs
+9
-0
lib/mocker/src/kv_manager/vllm_backend.rs
lib/mocker/src/kv_manager/vllm_backend.rs
+1
-1
lib/runtime/src/config/environment_names.rs
lib/runtime/src/config/environment_names.rs
+13
-0
No files found.
lib/llm/src/kv_router/scheduler.rs
View file @
bddaaa26
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: Apache-2.0
pub
use
dynamo_kv_router
::
scheduling
::
policy
::
RouterSchedulingPolicy
;
pub
use
dynamo_kv_router
::
scheduling
::{
pub
use
dynamo_kv_router
::
scheduling
::{
KvSchedulerError
,
PotentialLoad
,
SchedulingRequest
,
SchedulingResponse
,
KvSchedulerError
,
PotentialLoad
,
SchedulingRequest
,
SchedulingResponse
,
};
};
...
@@ -100,12 +101,20 @@ impl KvScheduler {
...
@@ -100,12 +101,20 @@ impl KvScheduler {
let
(
request_tx
,
request_rx
)
=
tokio
::
sync
::
mpsc
::
channel
::
<
SchedulingRequest
>
(
1024
);
let
(
request_tx
,
request_rx
)
=
tokio
::
sync
::
mpsc
::
channel
::
<
SchedulingRequest
>
(
1024
);
let
scheduler_cancel_token
=
component
.drt
()
.primary_token
();
let
scheduler_cancel_token
=
component
.drt
()
.primary_token
();
let
policy
=
RouterSchedulingPolicy
::
new
(
kv_router_config
.router_queue_policy
,
block_size
as
usize
);
tracing
::
info!
(
"Router queue policy: {}"
,
kv_router_config
.router_queue_policy
);
let
queue
=
Arc
::
new
(
SchedulerQueue
::
new
(
let
queue
=
Arc
::
new
(
SchedulerQueue
::
new
(
slots
.clone
(),
slots
.clone
(),
workers_with_configs
.clone
(),
workers_with_configs
.clone
(),
kv_router_config
.router_queue_threshold
,
kv_router_config
.router_queue_threshold
,
block_size
,
block_size
,
selector
,
selector
,
policy
,
));
));
let
queue_clone
=
queue
.clone
();
let
queue_clone
=
queue
.clone
();
...
...
lib/mocker/src/kv_manager/vllm_backend.rs
View file @
bddaaa26
...
@@ -213,7 +213,7 @@ impl KvManager {
...
@@ -213,7 +213,7 @@ impl KvManager {
// If at max capacity, evict the oldest entry from inactive blocks
// If at max capacity, evict the oldest entry from inactive blocks
if
self
.cache
.is_at_capacity
()
{
if
self
.cache
.is_at_capacity
()
{
let
Some
(
evicted
)
=
self
.cache
.evict_inactive
()
else
{
let
Some
(
evicted
)
=
self
.cache
.evict_inactive
()
else
{
re
turn
allocated
;
b
re
ak
;
};
};
tracing
::
trace!
(
tracing
::
trace!
(
"Evicting block from inactive pool: {evicted:?}, dp_rank={}"
,
"Evicting block from inactive pool: {evicted:?}, dp_rank={}"
,
...
...
lib/runtime/src/config/environment_names.rs
View file @
bddaaa26
...
@@ -331,6 +331,16 @@ pub mod model {
...
@@ -331,6 +331,16 @@ pub mod model {
}
}
}
}
/// KV Router configuration environment variables
pub
mod
router
{
/// Queue threshold fraction for prefill token capacity.
/// When set, requests are queued if all workers exceed this fraction of max_num_batched_tokens.
pub
const
DYN_ROUTER_QUEUE_THRESHOLD
:
&
str
=
"DYN_ROUTER_QUEUE_THRESHOLD"
;
/// Scheduling policy for the router queue ("fcfs" or "wspt").
pub
const
DYN_ROUTER_QUEUE_POLICY
:
&
str
=
"DYN_ROUTER_QUEUE_POLICY"
;
}
/// Event Plane transport environment variables
/// Event Plane transport environment variables
pub
mod
event_plane
{
pub
mod
event_plane
{
/// Event transport selection: "zmq" or "nats". Default: "nats"
/// Event transport selection: "zmq" or "nats". Default: "nats"
...
@@ -481,6 +491,9 @@ mod tests {
...
@@ -481,6 +491,9 @@ mod tests {
model
::
huggingface
::
HF_HUB_CACHE
,
model
::
huggingface
::
HF_HUB_CACHE
,
model
::
huggingface
::
HF_HOME
,
model
::
huggingface
::
HF_HOME
,
model
::
huggingface
::
HF_HUB_OFFLINE
,
model
::
huggingface
::
HF_HUB_OFFLINE
,
// Router
router
::
DYN_ROUTER_QUEUE_THRESHOLD
,
router
::
DYN_ROUTER_QUEUE_POLICY
,
// Event Plane
// Event Plane
event_plane
::
DYN_EVENT_PLANE
,
event_plane
::
DYN_EVENT_PLANE
,
event_plane
::
DYN_EVENT_PLANE_CODEC
,
event_plane
::
DYN_EVENT_PLANE_CODEC
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment