Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
134d484d
Unverified
Commit
134d484d
authored
Apr 15, 2026
by
Yan Ru Pei
Committed by
GitHub
Apr 15, 2026
Browse files
feat(kv-router): add prompt membership index for scheduler reads (#8175)
Signed-off-by:
PeaBrane
<
yanrpei@gmail.com
>
parent
d0d9c030
Changes
34
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
522 additions
and
299 deletions
+522
-299
Cargo.lock
Cargo.lock
+1
-1
lib/bench/Cargo.toml
lib/bench/Cargo.toml
+9
-2
lib/bench/README.md
lib/bench/README.md
+20
-8
lib/bench/kv_router/active_sequences_bench.rs
lib/bench/kv_router/active_sequences_bench.rs
+12
-6
lib/bench/kv_router/common/mod.rs
lib/bench/kv_router/common/mod.rs
+18
-0
lib/bench/multiturn_bench.rs
lib/bench/multiturn_bench.rs
+11
-1
lib/bench/offline_replay_bench.rs
lib/bench/offline_replay_bench.rs
+16
-0
lib/bindings/kvbm/Cargo.lock
lib/bindings/kvbm/Cargo.lock
+85
-87
lib/bindings/python/Cargo.lock
lib/bindings/python/Cargo.lock
+87
-89
lib/kv-router/Cargo.toml
lib/kv-router/Cargo.toml
+0
-1
lib/kv-router/src/active_set.rs
lib/kv-router/src/active_set.rs
+39
-0
lib/kv-router/src/indexer/concurrent_radix_tree.rs
lib/kv-router/src/indexer/concurrent_radix_tree.rs
+3
-10
lib/kv-router/src/indexer/positional.rs
lib/kv-router/src/indexer/positional.rs
+10
-22
lib/kv-router/src/indexer/radix_tree.rs
lib/kv-router/src/indexer/radix_tree.rs
+4
-20
lib/kv-router/src/lib.rs
lib/kv-router/src/lib.rs
+2
-0
lib/kv-router/src/scheduling/local.rs
lib/kv-router/src/scheduling/local.rs
+10
-24
lib/kv-router/src/scheduling/policy.rs
lib/kv-router/src/scheduling/policy.rs
+3
-6
lib/kv-router/src/scheduling/queue.rs
lib/kv-router/src/scheduling/queue.rs
+182
-14
lib/kv-router/src/scheduling/selector.rs
lib/kv-router/src/scheduling/selector.rs
+7
-6
lib/kv-router/src/scheduling/types.rs
lib/kv-router/src/scheduling/types.rs
+3
-2
No files found.
Cargo.lock
View file @
134d484d
...
...
@@ -2328,6 +2328,7 @@ dependencies = [
"serde_json",
"tokio",
"tokio-util",
"tracing-subscriber",
"uuid",
]
...
...
@@ -2355,7 +2356,6 @@ dependencies = [
"async-trait",
"axum 0.8.4",
"dashmap",
"derive-getters",
"derive_builder",
"dynamo-runtime",
"dynamo-tokens",
...
...
lib/bench/Cargo.toml
View file @
134d484d
...
...
@@ -11,9 +11,15 @@ homepage.workspace = true
repository.workspace
=
true
description
=
"Lightweight HTTP benchmarks for Dynamo endpoints"
[[b
in
]]
[[b
ench
]]
name
=
"multiturn_bench"
path
=
"src/bin/multiturn_bench.rs"
path
=
"multiturn_bench.rs"
harness
=
false
[[bench]]
name
=
"offline_replay_bench"
path
=
"offline_replay_bench.rs"
harness
=
false
[[bench]]
name
=
"kv_indexer_bench"
...
...
@@ -50,4 +56,5 @@ minstant = "0.1.7"
plotters
=
{
version
=
"0.3"
,
default-features
=
false
,
features
=
[
"svg_backend"
,
"line_series"
,
"point_series"
,
"full_palette"
]
}
tokio
=
{
workspace
=
true
,
features
=
[
"rt"
,
"macros"
,
"time"
]
}
tokio-util
=
{
workspace
=
true
}
tracing-subscriber
=
{
workspace
=
true
}
uuid
=
{
workspace
=
true
}
lib/bench/
src/bin/
README.md
→
lib/bench/README.md
View file @
134d484d
...
...
@@ -3,7 +3,7 @@ SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
SPDX-License-Identifier: Apache-2.0
-->
#
Multiturn Benchmark
#
Bench Entrypoints
`multiturn_bench`
simulates concurrent multi-turn conversations against an
OpenAI-compatible chat endpoint and reports per-turn TTFT and total latency
...
...
@@ -11,14 +11,14 @@ statistics. It can optionally enable **speculative prefill** — a technique tha
pre-warms the KV cache with the predicted next-turn prefix after each assistant
response, cutting TTFT on subsequent turns.
`offline_replay_bench`
runs the Rust-native replay loop directly for profiling
and throughput measurements without going through the Python wrapper.
## Quick start
```
bash
# Build
cargo build
--release
--package
dynamo-bench
--bin
multiturn_bench
# Smoke test (1 user, 1 turn, ~50 tokens)
./target/release/
multiturn_bench
--ping
cargo bench
--package
dynamo-bench
--bench
multiturn_bench
--
--ping
```
## Speculative prefill demo
...
...
@@ -45,7 +45,7 @@ python -m dynamo.frontend \
### 2. Run baseline (no speculative prefill)
```
bash
./target/release/
multiturn_bench
\
cargo bench
--package
dynamo-bench
--bench
multiturn_bench
--
\
--url
http://localhost:8000
\
--num-users
10
\
--num-turns
5
\
...
...
@@ -59,7 +59,7 @@ python -m dynamo.frontend \
### 3. Run with speculative prefill
```
bash
./target/release/
multiturn_bench
\
cargo bench
--package
dynamo-bench
--bench
multiturn_bench
--
\
--url
http://localhost:8000
\
--num-users
10
\
--num-turns
5
\
...
...
@@ -101,4 +101,16 @@ request arrives.
4.
The KV router routes the speculative request to the same worker, warming its cache.
5.
When the real next-turn request arrives, the KV router sees high cache overlap on that worker and routes there, yielding a much lower TTFT.
See also:
[
Agent Hints documentation
](
../../../../docs/components/frontend/nvext.md#agent-hints
)
See also:
[
Agent Hints documentation
](
../../docs/components/frontend/nvext.md#agent-hints
)
## Offline replay
```
bash
cargo bench
--package
dynamo-bench
--bench
offline_replay_bench
--
\
/path/to/mooncake_trace.jsonl
\
--num-workers
4
\
--router-mode
kv-router
\
--arrival-speedup-ratio
4
\
--trace-block-size
512
\
--block-size
64
```
lib/bench/kv_router/active_sequences_bench.rs
View file @
134d484d
...
...
@@ -289,13 +289,15 @@ async fn run_benchmark(
all_latencies
.extend
(
task
.await
??
);
}
if
progress
.elapsed
()
>
Duration
::
from_millis
(
benchmark_duration_ms
*
11
/
10
)
{
// Keep the post-run drain check out of the measured benchmark interval.
let
total_duration
=
progress
.elapsed
();
multi
.assert_completely_drained
(
Instant
::
now
());
if
total_duration
>
Duration
::
from_millis
(
benchmark_duration_ms
*
11
/
10
)
{
eprintln!
(
"WARNING: Benchmarker could not keep up. Rerun with a larger --benchmark-duration-ms."
);
}
let
total_duration
=
progress
.elapsed
();
let
total_ops
=
all_latencies
.len
();
let
offered_ops_throughput
=
total_ops
as
f32
/
benchmark_duration_ms
as
f32
*
1000.0
;
...
...
@@ -311,10 +313,13 @@ async fn run_benchmark(
};
println!
(
"Ops Throughput: {} ops/s (potential_blocks_and_tokens + add + prefill_complete + free)"
,
ops_throughput
"Ops Throughput: offered={} ops/s achieved={} ops/s (potential_blocks_and_tokens + add + prefill_complete + free)"
,
offered_ops_throughput
,
ops_throughput
);
println!
(
"Block Throughput: offered={} block ops/s achieved={} block ops/s"
,
offered_block_throughput
,
block_throughput
);
println!
(
"Block Throughput: {} block ops/s"
,
block_throughput
);
println!
(
"Latency p99: {}us"
,
latency_p99_us
);
Ok
(
BenchmarkResults
{
...
...
@@ -501,6 +506,7 @@ async fn run_tests() -> anyhow::Result<()> {
#[tokio::main]
async
fn
main
()
->
anyhow
::
Result
<
()
>
{
let
args
=
Args
::
parse
();
init_sequence_logging
(
args
.common.sequence_logs
);
if
args
.common.test
{
return
run_tests
()
.await
;
...
...
lib/bench/kv_router/common/mod.rs
View file @
134d484d
...
...
@@ -28,6 +28,7 @@ use serde::{Deserialize, Serialize};
use
std
::
fs
::
File
;
use
std
::
io
::{
BufRead
,
BufReader
};
use
tokio
::
task
::
JoinHandle
;
use
tracing_subscriber
::
EnvFilter
;
use
uuid
::
Uuid
;
/// Shared CLI arguments for trace-based benchmarks.
...
...
@@ -95,6 +96,23 @@ pub struct CommonArgs {
/// Ignored - passed by cargo bench harness.
#[arg(long,
hide
=
true
,
global
=
true
)]
pub
bench
:
bool
,
/// Opt in to runtime warn/error logs from the mocker and sequence tracker.
#[clap(long)]
pub
sequence_logs
:
bool
,
}
pub
fn
init_sequence_logging
(
enabled
:
bool
)
{
if
!
enabled
{
return
;
}
let
_
=
tracing_subscriber
::
fmt
()
.with_env_filter
(
EnvFilter
::
new
(
"error,dynamo_kv_router::sequences=warn,dynamo_mocker=warn"
,
))
.with_writer
(
std
::
io
::
stderr
)
.try_init
();
}
/// A single request deserialized from the mooncake trace JSONL.
...
...
lib/bench/
src/bin/
multiturn_bench.rs
→
lib/bench/multiturn_bench.rs
View file @
134d484d
...
...
@@ -8,7 +8,7 @@
//! first token) and total request latency per turn, with configurable inter-turn
//! exponential delay.
//!
//! Run with: cargo
run
--package dynamo-bench --b
in
multiturn_bench -- --help
//! Run with: cargo
bench
--package dynamo-bench --b
ench
multiturn_bench -- --help
use
anyhow
::{
Context
,
Result
};
use
clap
::
Parser
;
...
...
@@ -115,6 +115,11 @@ struct AgentHintsBody {
speculative_prefill
:
bool
,
}
fn
is_bench_harness_invocation
()
->
bool
{
let
args
:
Vec
<
_
>
=
std
::
env
::
args_os
()
.skip
(
1
)
.collect
();
args
.is_empty
()
||
args
.iter
()
.all
(|
arg
|
arg
==
"--bench"
)
}
// ---------------------------------------------------------------------------
// Turn result
// ---------------------------------------------------------------------------
...
...
@@ -533,6 +538,11 @@ fn print_per_turn_table(label: &str, stats: &[PerTurnStats]) {
#[tokio::main]
async
fn
main
()
->
Result
<
()
>
{
if
is_bench_harness_invocation
()
{
eprintln!
(
"multiturn_bench: skipping no-arg harness invocation"
);
return
Ok
(());
}
let
mut
args
=
Args
::
parse
();
if
args
.ping
{
...
...
lib/bench/
src/bin/
offline_replay_bench.rs
→
lib/bench/offline_replay_bench.rs
View file @
134d484d
...
...
@@ -5,6 +5,8 @@
//!
//! Useful for profiling replay itself without the Python CLI wrapper. This keeps
//! the default mocker perf model unless CLI overrides are provided.
//!
//! Run with: cargo bench --package dynamo-bench --bench offline_replay_bench -- --help
use
std
::
fs
::
File
;
use
std
::
path
::
PathBuf
;
...
...
@@ -30,6 +32,11 @@ impl From<RouterModeArg> for ReplayRouterMode {
}
}
fn
is_bench_harness_invocation
()
->
bool
{
let
args
:
Vec
<
_
>
=
std
::
env
::
args_os
()
.skip
(
1
)
.collect
();
args
.is_empty
()
||
args
.iter
()
.all
(|
arg
|
arg
==
"--bench"
)
}
#[derive(Parser,
Debug)]
#[command(name
=
"offline_replay_bench"
)]
#[command(about
=
"Run offline replay directly in Rust for benchmarking and profiling"
)]
...
...
@@ -84,6 +91,10 @@ struct Args {
/// Number of times to rerun the same replay in-process
#[arg(long,
default_value_t
=
1
)]
iterations
:
usize
,
/// Ignored -- passed by cargo bench
#[arg(long,
hide
=
true
)]
bench
:
bool
,
}
fn
build_engine_args
(
args
:
&
Args
)
->
Result
<
MockEngineArgs
>
{
...
...
@@ -111,6 +122,11 @@ fn build_engine_args(args: &Args) -> Result<MockEngineArgs> {
}
fn
main
()
->
Result
<
()
>
{
if
is_bench_harness_invocation
()
{
eprintln!
(
"offline_replay_bench: skipping no-arg harness invocation"
);
return
Ok
(());
}
let
args
=
Args
::
parse
();
let
engine_args
=
build_engine_args
(
&
args
)
?
;
let
started_at
=
Instant
::
now
();
...
...
lib/bindings/kvbm/Cargo.lock
View file @
134d484d
...
...
@@ -431,9 +431,9 @@ dependencies = [
[[package]]
name = "axum-macros"
version = "0.5.
0
"
version = "0.5.
1
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c
"
checksum = "
7aa268c23bfbbd2c4363b9cd302a4f504fb2a9dfe7e3451d66f35dd392e20aca
"
dependencies = [
"proc-macro2",
"quote",
...
...
@@ -526,7 +526,7 @@ version = "0.71.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"cexpr",
"clang-sys",
"itertools 0.13.0",
...
...
@@ -584,20 +584,20 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.11.
0
"
version = "2.11.
1
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af
"
checksum = "
c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3
"
dependencies = [
"serde_core",
]
[[package]]
name = "bitstream-io"
version = "4.
9
.0"
version = "4.
10
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
60d4bd9d1db2c6bdf285e223a7fa369d5ce98ec767dec949c6ca62863ce61757
"
checksum = "
7eff00be299a18769011411c9def0d827e8f2d7bf0c3dbf53633147a8867fd1f
"
dependencies = [
"
core
2",
"
no_std_io
2",
]
[[package]]
...
...
@@ -848,7 +848,7 @@ version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34"
dependencies = [
"windows-sys 0.
48.0
",
"windows-sys 0.
61.2
",
]
[[package]]
...
...
@@ -987,15 +987,6 @@ version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "core2"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505"
dependencies = [
"memchr",
]
[[package]]
name = "cpufeatures"
version = "0.2.17"
...
...
@@ -1442,7 +1433,7 @@ dependencies = [
"libc",
"option-ext",
"redox_users",
"windows-sys 0.
59.0
",
"windows-sys 0.
61.2
",
]
[[package]]
...
...
@@ -1451,7 +1442,7 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"objc2",
]
...
...
@@ -1511,7 +1502,6 @@ dependencies = [
"anyhow",
"async-trait",
"dashmap",
"derive-getters",
"derive_builder",
"dynamo-runtime",
"dynamo-tokens",
...
...
@@ -1519,7 +1509,7 @@ dependencies = [
"ordered-float 4.6.0",
"parking_lot",
"prometheus",
"rand 0.9.
2
",
"rand 0.9.
4
",
"rmp-serde",
"rustc-hash 2.1.2",
"serde",
...
...
@@ -1546,7 +1536,7 @@ dependencies = [
"axum-server",
"base64 0.22.1",
"bincode 2.0.1",
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"blake3",
"bs62",
"bytemuck",
...
...
@@ -1587,7 +1577,7 @@ dependencies = [
"parking_lot",
"prometheus",
"prost 0.13.5",
"rand 0.9.
2
",
"rand 0.9.
4
",
"rayon",
"reqwest",
"rmp-serde",
...
...
@@ -1647,7 +1637,7 @@ dependencies = [
"ndarray",
"ndarray-interp",
"ndarray-npy",
"rand 0.9.
2
",
"rand 0.9.
4
",
"rustc-hash 2.1.2",
"serde",
"serde_json",
...
...
@@ -1735,14 +1725,14 @@ dependencies = [
"parking_lot",
"percent-encoding",
"prometheus",
"rand 0.9.
2
",
"rand 0.9.
4
",
"rayon",
"regex",
"reqwest",
"rmp-serde",
"serde",
"serde_json",
"socket2 0.5.
8
",
"socket2 0.5.
10
",
"thiserror 2.0.18",
"tmq",
"tokio",
...
...
@@ -1895,7 +1885,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
"libc",
"windows-sys 0.
59.0
",
"windows-sys 0.
61.2
",
]
[[package]]
...
...
@@ -2454,7 +2444,7 @@ dependencies = [
"libc",
"log",
"num_cpus",
"rand 0.9.
2
",
"rand 0.9.
4
",
"reqwest",
"serde",
"serde_json",
...
...
@@ -2559,9 +2549,9 @@ dependencies = [
[[package]]
name = "hyper-rustls"
version = "0.27.
7
"
version = "0.27.
9
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
e
3c
93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58
"
checksum = "
3
3c
a68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f
"
dependencies = [
"http",
"hyper",
...
...
@@ -2569,7 +2559,6 @@ dependencies = [
"log",
"rustls",
"rustls-native-certs 0.8.3",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tower-service",
...
...
@@ -2980,7 +2969,7 @@ dependencies = [
"portable-atomic",
"portable-atomic-util",
"serde_core",
"windows-sys 0.
59.0
",
"windows-sys 0.
61.2
",
]
[[package]]
...
...
@@ -3285,9 +3274,9 @@ checksum = "7a79a3332a6609480d7d0c9eab957bca6b455b91bb84e66d19f5ff66294b85b8"
[[package]]
name = "libc"
version = "0.2.18
4
"
version = "0.2.18
5
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4a
f"
checksum = "
52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8
f"
[[package]]
name = "libfuzzer-sys"
...
...
@@ -3331,7 +3320,7 @@ version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"libc",
"plain",
"redox_syscall 0.7.4",
...
...
@@ -3769,7 +3758,7 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22f9786d56d972959e1408b6a93be6af13b9c1392036c5c1fafa08a1b0c6ee87"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"byteorder",
"derive_builder",
"getset",
...
...
@@ -3817,7 +3806,7 @@ version = "0.30.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"cfg-if",
"cfg_aliases",
"libc",
...
...
@@ -3856,6 +3845,15 @@ dependencies = [
"signatory",
]
[[package]]
name = "no_std_io2"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b51ed7824b6e07d354605f4abb3d9d300350701299da96642ee084f5ce631550"
dependencies = [
"memchr",
]
[[package]]
name = "nom"
version = "7.1.3"
...
...
@@ -3893,7 +3891,7 @@ version = "6.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"filetime",
"fsevent-sys",
"inotify",
...
...
@@ -3911,7 +3909,7 @@ version = "0.50.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
dependencies = [
"windows-sys 0.
59.0
",
"windows-sys 0.
61.2
",
]
[[package]]
...
...
@@ -4028,7 +4026,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73ad74d880bb43877038da939b7427bba67e9dd42004a18b809ba7d87cee241c"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"objc2",
"objc2-foundation",
]
...
...
@@ -4049,7 +4047,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"dispatch2",
"objc2",
]
...
...
@@ -4060,7 +4058,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e022c9d066895efa1345f8e33e584b9f958da2fd4cd116792e15e07e4720a807"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"dispatch2",
"objc2",
"objc2-core-foundation",
...
...
@@ -4093,7 +4091,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cde0dfb48d25d2b4862161a4d5fcc0e3c24367869ad306b0c9ec0073bfed92d"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"objc2",
"objc2-core-foundation",
"objc2-core-graphics",
...
...
@@ -4111,7 +4109,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"block2",
"libc",
"objc2",
...
...
@@ -4124,7 +4122,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180788110936d59bab6bd83b6060ffdfffb3b922ba1396b312ae795e1de9d81d"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"objc2",
"objc2-core-foundation",
]
...
...
@@ -4135,7 +4133,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96c1358452b371bf9f104e21ec536d37a650eb10f7ee379fff67d2e08d537f1f"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"objc2",
"objc2-core-foundation",
"objc2-foundation",
...
...
@@ -4147,7 +4145,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d87d638e33c06f577498cbcc50491496a3ed4246998a7fbba7ccb98b1e7eab22"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"block2",
"objc2",
"objc2-cloud-kit",
...
...
@@ -4194,7 +4192,7 @@ dependencies = [
"parking_lot",
"percent-encoding",
"quick-xml",
"rand 0.9.
2
",
"rand 0.9.
4
",
"reqwest",
"ring",
"rustls-pemfile",
...
...
@@ -4244,7 +4242,7 @@ version = "6.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"libc",
"once_cell",
"onig_sys",
...
...
@@ -4378,7 +4376,7 @@ dependencies = [
"futures-util",
"opentelemetry",
"percent-encoding",
"rand 0.9.
2
",
"rand 0.9.
4
",
"thiserror 2.0.18",
"tokio",
"tokio-stream",
...
...
@@ -4697,9 +4695,9 @@ dependencies = [
[[package]]
name = "pkg-config"
version = "0.3.3
2
"
version = "0.3.3
3
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c
"
checksum = "
19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e
"
[[package]]
name = "plain"
...
...
@@ -4713,7 +4711,7 @@ version = "0.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"crc32fast",
"fdeflate",
"flate2",
...
...
@@ -4978,7 +4976,7 @@ version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"memchr",
"unicase",
]
...
...
@@ -5156,7 +5154,7 @@ dependencies = [
"bytes",
"getrandom 0.3.4",
"lru-slab",
"rand 0.9.
2
",
"rand 0.9.
4
",
"ring",
"rustc-hash 2.1.2",
"rustls",
...
...
@@ -5179,7 +5177,7 @@ dependencies = [
"once_cell",
"socket2 0.6.3",
"tracing",
"windows-sys 0.
59.0
",
"windows-sys 0.
60.2
",
]
[[package]]
...
...
@@ -5216,9 +5214,9 @@ dependencies = [
[[package]]
name = "rand"
version = "0.9.
2
"
version = "0.9.
4
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1
"
checksum = "
44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea
"
dependencies = [
"rand_chacha 0.9.0",
"rand_core 0.9.5",
...
...
@@ -5289,7 +5287,7 @@ dependencies = [
"num-traits",
"paste",
"profiling",
"rand 0.9.
2
",
"rand 0.9.
4
",
"rand_chacha 0.9.0",
"simd_helpers",
"thiserror 2.0.18",
...
...
@@ -5320,9 +5318,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
[[package]]
name = "rayon"
version = "1.1
1
.0"
version = "1.1
2
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f
"
checksum = "
fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d
"
dependencies = [
"either",
"rayon-core",
...
...
@@ -5355,7 +5353,7 @@ version = "0.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
]
[[package]]
...
...
@@ -5364,7 +5362,7 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f450ad9c3b1da563fb6948a8e0fb0fb9269711c9c73d9ea1de5058c79c8d643a"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
]
[[package]]
...
...
@@ -5429,9 +5427,9 @@ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]]
name = "reqwest"
version = "0.12.2
4
"
version = "0.12.2
8
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f
"
checksum = "
eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147
"
dependencies = [
"base64 0.22.1",
"bytes",
...
...
@@ -5519,7 +5517,7 @@ version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4147b952f3f819eca0e99527022f7d6a8d05f111aeb0a62960c74eb283bec8fc"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"once_cell",
"serde",
"serde_derive",
...
...
@@ -5598,7 +5596,7 @@ version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"errno",
"libc",
"linux-raw-sys 0.4.15",
...
...
@@ -5611,25 +5609,25 @@ version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"errno",
"libc",
"linux-raw-sys 0.12.1",
"windows-sys 0.
59.0
",
"windows-sys 0.
61.2
",
]
[[package]]
name = "rustls"
version = "0.23.3
7
"
version = "0.23.3
8
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4
"
checksum = "
69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21
"
dependencies = [
"aws-lc-rs",
"log",
"once_cell",
"ring",
"rustls-pki-types",
"rustls-webpki 0.103.1
1
",
"rustls-webpki 0.103.1
2
",
"subtle",
"zeroize",
]
...
...
@@ -5690,9 +5688,9 @@ dependencies = [
[[package]]
name = "rustls-webpki"
version = "0.103.1
1
"
version = "0.103.1
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
20a6af516fea4b20eccceaf166e8aa666ac996208e8a644ce3ef5aa783bc7cd4
"
checksum = "
8279bb85272c9f10811ae6a6c547ff594d6a7f3c6c6b02ee9726d1d0dcfcdd06
"
dependencies = [
"aws-lc-rs",
"ring",
...
...
@@ -5845,7 +5843,7 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"core-foundation 0.9.4",
"core-foundation-sys",
"libc",
...
...
@@ -5858,7 +5856,7 @@ version = "3.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"core-foundation 0.10.1",
"core-foundation-sys",
"libc",
...
...
@@ -6182,9 +6180,9 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "socket2"
version = "0.5.
8
"
version = "0.5.
10
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe
8"
checksum = "
e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c067
8"
dependencies = [
"libc",
"windows-sys 0.52.0",
...
...
@@ -6324,7 +6322,7 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"core-foundation 0.9.4",
"system-configuration-sys",
]
...
...
@@ -6368,7 +6366,7 @@ dependencies = [
"getrandom 0.4.2",
"once_cell",
"rustix 1.1.4",
"windows-sys 0.
59.0
",
"windows-sys 0.
61.2
",
]
[[package]]
...
...
@@ -6558,7 +6556,7 @@ dependencies = [
"monostate",
"onig",
"paste",
"rand 0.9.
2
",
"rand 0.9.
4
",
"rayon",
"rayon-cond",
"regex",
...
...
@@ -6772,7 +6770,7 @@ dependencies = [
"percent-encoding",
"pin-project",
"prost 0.13.5",
"socket2 0.5.
8
",
"socket2 0.5.
10
",
"tokio",
"tokio-stream",
"tower",
...
...
@@ -6890,7 +6888,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
dependencies = [
"base64 0.22.1",
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"bytes",
"futures-util",
"http",
...
...
@@ -7520,7 +7518,7 @@ version = "0.244.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"hashbrown 0.15.5",
"indexmap 2.14.0",
"semver",
...
...
@@ -7592,7 +7590,7 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys 0.
48.0
",
"windows-sys 0.
61.2
",
]
[[package]]
...
...
@@ -7978,7 +7976,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
dependencies = [
"anyhow",
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"indexmap 2.14.0",
"log",
"serde",
...
...
lib/bindings/python/Cargo.lock
View file @
134d484d
...
...
@@ -431,9 +431,9 @@ dependencies = [
[[package]]
name = "axum-macros"
version = "0.5.
0
"
version = "0.5.
1
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c
"
checksum = "
7aa268c23bfbbd2c4363b9cd302a4f504fb2a9dfe7e3451d66f35dd392e20aca
"
dependencies = [
"proc-macro2",
"quote",
...
...
@@ -526,7 +526,7 @@ version = "0.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"cexpr",
"clang-sys",
"itertools 0.13.0",
...
...
@@ -544,7 +544,7 @@ version = "0.71.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"cexpr",
"clang-sys",
"itertools 0.13.0",
...
...
@@ -602,20 +602,20 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.11.
0
"
version = "2.11.
1
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af
"
checksum = "
c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3
"
dependencies = [
"serde_core",
]
[[package]]
name = "bitstream-io"
version = "4.
9
.0"
version = "4.
10
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
60d4bd9d1db2c6bdf285e223a7fa369d5ce98ec767dec949c6ca62863ce61757
"
checksum = "
7eff00be299a18769011411c9def0d827e8f2d7bf0c3dbf53633147a8867fd1f
"
dependencies = [
"
core
2",
"
no_std_io
2",
]
[[package]]
...
...
@@ -866,7 +866,7 @@ version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34"
dependencies = [
"windows-sys 0.
48.0
",
"windows-sys 0.
61.2
",
]
[[package]]
...
...
@@ -1005,15 +1005,6 @@ version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "core2"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505"
dependencies = [
"memchr",
]
[[package]]
name = "cpufeatures"
version = "0.2.17"
...
...
@@ -1460,7 +1451,7 @@ dependencies = [
"libc",
"option-ext",
"redox_users",
"windows-sys 0.
59.0
",
"windows-sys 0.
61.2
",
]
[[package]]
...
...
@@ -1469,7 +1460,7 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"objc2",
]
...
...
@@ -1520,7 +1511,6 @@ dependencies = [
"async-trait",
"axum",
"dashmap",
"derive-getters",
"derive_builder",
"dynamo-runtime",
"dynamo-tokens",
...
...
@@ -1528,7 +1518,7 @@ dependencies = [
"ordered-float 4.6.0",
"parking_lot",
"prometheus",
"rand 0.9.
2
",
"rand 0.9.
4
",
"reqwest",
"rmp-serde",
"rustc-hash 2.1.2",
...
...
@@ -1558,7 +1548,7 @@ dependencies = [
"axum-server",
"base64 0.22.1",
"bincode 2.0.1",
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"blake3",
"bs62",
"bytemuck",
...
...
@@ -1601,7 +1591,7 @@ dependencies = [
"parking_lot",
"prometheus",
"prost 0.13.5",
"rand 0.9.
2
",
"rand 0.9.
4
",
"rayon",
"reqwest",
"rmp-serde",
...
...
@@ -1662,7 +1652,7 @@ dependencies = [
"ndarray",
"ndarray-interp",
"ndarray-npy",
"rand 0.9.
2
",
"rand 0.9.
4
",
"rustc-hash 2.1.2",
"serde",
"serde_json",
...
...
@@ -1782,14 +1772,14 @@ dependencies = [
"parking_lot",
"percent-encoding",
"prometheus",
"rand 0.9.
2
",
"rand 0.9.
4
",
"rayon",
"regex",
"reqwest",
"rmp-serde",
"serde",
"serde_json",
"socket2 0.5.
8
",
"socket2 0.5.
10
",
"thiserror 2.0.18",
"tmq",
"tokio",
...
...
@@ -1942,7 +1932,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
"libc",
"windows-sys 0.
59.0
",
"windows-sys 0.
61.2
",
]
[[package]]
...
...
@@ -2093,7 +2083,7 @@ version = "7.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da02698288e0275e442a47fc12ca26d50daf0d48b15398ba5906f20ac2e2a9f9"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"ffmpeg-sys-next",
"libc",
]
...
...
@@ -2526,7 +2516,7 @@ dependencies = [
"libc",
"log",
"num_cpus",
"rand 0.9.
2
",
"rand 0.9.
4
",
"reqwest",
"serde",
"serde_json",
...
...
@@ -2631,9 +2621,9 @@ dependencies = [
[[package]]
name = "hyper-rustls"
version = "0.27.
7
"
version = "0.27.
9
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
e
3c
93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58
"
checksum = "
3
3c
a68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f
"
dependencies = [
"http",
"hyper",
...
...
@@ -2641,7 +2631,6 @@ dependencies = [
"log",
"rustls",
"rustls-native-certs 0.8.3",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tower-service",
...
...
@@ -3052,7 +3041,7 @@ dependencies = [
"portable-atomic",
"portable-atomic-util",
"serde_core",
"windows-sys 0.
59.0
",
"windows-sys 0.
61.2
",
]
[[package]]
...
...
@@ -3336,9 +3325,9 @@ checksum = "7a79a3332a6609480d7d0c9eab957bca6b455b91bb84e66d19f5ff66294b85b8"
[[package]]
name = "libc"
version = "0.2.18
4
"
version = "0.2.18
5
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4a
f"
checksum = "
52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8
f"
[[package]]
name = "libfuzzer-sys"
...
...
@@ -3382,7 +3371,7 @@ version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"libc",
"plain",
"redox_syscall 0.7.4",
...
...
@@ -3829,7 +3818,7 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22f9786d56d972959e1408b6a93be6af13b9c1392036c5c1fafa08a1b0c6ee87"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"byteorder",
"derive_builder",
"getset",
...
...
@@ -3877,7 +3866,7 @@ version = "0.30.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"cfg-if",
"cfg_aliases",
"libc",
...
...
@@ -3916,6 +3905,15 @@ dependencies = [
"signatory",
]
[[package]]
name = "no_std_io2"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b51ed7824b6e07d354605f4abb3d9d300350701299da96642ee084f5ce631550"
dependencies = [
"memchr",
]
[[package]]
name = "nom"
version = "7.1.3"
...
...
@@ -3953,7 +3951,7 @@ version = "6.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"filetime",
"fsevent-sys",
"inotify",
...
...
@@ -3971,7 +3969,7 @@ version = "0.50.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
dependencies = [
"windows-sys 0.
59.0
",
"windows-sys 0.
61.2
",
]
[[package]]
...
...
@@ -4088,7 +4086,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73ad74d880bb43877038da939b7427bba67e9dd42004a18b809ba7d87cee241c"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"objc2",
"objc2-foundation",
]
...
...
@@ -4109,7 +4107,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"dispatch2",
"objc2",
]
...
...
@@ -4120,7 +4118,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e022c9d066895efa1345f8e33e584b9f958da2fd4cd116792e15e07e4720a807"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"dispatch2",
"objc2",
"objc2-core-foundation",
...
...
@@ -4153,7 +4151,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cde0dfb48d25d2b4862161a4d5fcc0e3c24367869ad306b0c9ec0073bfed92d"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"objc2",
"objc2-core-foundation",
"objc2-core-graphics",
...
...
@@ -4171,7 +4169,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"block2",
"libc",
"objc2",
...
...
@@ -4184,7 +4182,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180788110936d59bab6bd83b6060ffdfffb3b922ba1396b312ae795e1de9d81d"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"objc2",
"objc2-core-foundation",
]
...
...
@@ -4195,7 +4193,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96c1358452b371bf9f104e21ec536d37a650eb10f7ee379fff67d2e08d537f1f"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"objc2",
"objc2-core-foundation",
"objc2-foundation",
...
...
@@ -4207,7 +4205,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d87d638e33c06f577498cbcc50491496a3ed4246998a7fbba7ccb98b1e7eab22"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"block2",
"objc2",
"objc2-cloud-kit",
...
...
@@ -4254,7 +4252,7 @@ dependencies = [
"parking_lot",
"percent-encoding",
"quick-xml",
"rand 0.9.
2
",
"rand 0.9.
4
",
"reqwest",
"ring",
"rustls-pemfile",
...
...
@@ -4304,7 +4302,7 @@ version = "6.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"libc",
"once_cell",
"onig_sys",
...
...
@@ -4438,7 +4436,7 @@ dependencies = [
"futures-util",
"opentelemetry",
"percent-encoding",
"rand 0.9.
2
",
"rand 0.9.
4
",
"thiserror 2.0.18",
"tokio",
"tokio-stream",
...
...
@@ -4757,9 +4755,9 @@ dependencies = [
[[package]]
name = "pkg-config"
version = "0.3.3
2
"
version = "0.3.3
3
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c
"
checksum = "
19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e
"
[[package]]
name = "plain"
...
...
@@ -4773,7 +4771,7 @@ version = "0.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"crc32fast",
"fdeflate",
"flate2",
...
...
@@ -5038,7 +5036,7 @@ version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"memchr",
"unicase",
]
...
...
@@ -5226,7 +5224,7 @@ dependencies = [
"bytes",
"getrandom 0.3.4",
"lru-slab",
"rand 0.9.
2
",
"rand 0.9.
4
",
"ring",
"rustc-hash 2.1.2",
"rustls",
...
...
@@ -5249,7 +5247,7 @@ dependencies = [
"once_cell",
"socket2 0.6.3",
"tracing",
"windows-sys 0.
59.0
",
"windows-sys 0.
60.2
",
]
[[package]]
...
...
@@ -5286,9 +5284,9 @@ dependencies = [
[[package]]
name = "rand"
version = "0.9.
2
"
version = "0.9.
4
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1
"
checksum = "
44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea
"
dependencies = [
"rand_chacha 0.9.0",
"rand_core 0.9.5",
...
...
@@ -5359,7 +5357,7 @@ dependencies = [
"num-traits",
"paste",
"profiling",
"rand 0.9.
2
",
"rand 0.9.
4
",
"rand_chacha 0.9.0",
"simd_helpers",
"thiserror 2.0.18",
...
...
@@ -5390,9 +5388,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
[[package]]
name = "rayon"
version = "1.1
1
.0"
version = "1.1
2
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f
"
checksum = "
fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d
"
dependencies = [
"either",
"rayon-core",
...
...
@@ -5425,7 +5423,7 @@ version = "0.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
]
[[package]]
...
...
@@ -5434,7 +5432,7 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f450ad9c3b1da563fb6948a8e0fb0fb9269711c9c73d9ea1de5058c79c8d643a"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
]
[[package]]
...
...
@@ -5499,9 +5497,9 @@ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]]
name = "reqwest"
version = "0.12.2
4
"
version = "0.12.2
8
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f
"
checksum = "
eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147
"
dependencies = [
"base64 0.22.1",
"bytes",
...
...
@@ -5589,7 +5587,7 @@ version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4147b952f3f819eca0e99527022f7d6a8d05f111aeb0a62960c74eb283bec8fc"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"once_cell",
"serde",
"serde_derive",
...
...
@@ -5668,7 +5666,7 @@ version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"errno",
"libc",
"linux-raw-sys 0.4.15",
...
...
@@ -5681,25 +5679,25 @@ version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"errno",
"libc",
"linux-raw-sys 0.12.1",
"windows-sys 0.
59.0
",
"windows-sys 0.
61.2
",
]
[[package]]
name = "rustls"
version = "0.23.3
7
"
version = "0.23.3
8
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4
"
checksum = "
69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21
"
dependencies = [
"aws-lc-rs",
"log",
"once_cell",
"ring",
"rustls-pki-types",
"rustls-webpki 0.103.1
1
",
"rustls-webpki 0.103.1
2
",
"subtle",
"zeroize",
]
...
...
@@ -5760,9 +5758,9 @@ dependencies = [
[[package]]
name = "rustls-webpki"
version = "0.103.1
1
"
version = "0.103.1
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
20a6af516fea4b20eccceaf166e8aa666ac996208e8a644ce3ef5aa783bc7cd4
"
checksum = "
8279bb85272c9f10811ae6a6c547ff594d6a7f3c6c6b02ee9726d1d0dcfcdd06
"
dependencies = [
"aws-lc-rs",
"ring",
...
...
@@ -5915,7 +5913,7 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"core-foundation 0.9.4",
"core-foundation-sys",
"libc",
...
...
@@ -5928,7 +5926,7 @@ version = "3.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"core-foundation 0.10.1",
"core-foundation-sys",
"libc",
...
...
@@ -6252,9 +6250,9 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "socket2"
version = "0.5.
8
"
version = "0.5.
10
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe
8"
checksum = "
e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c067
8"
dependencies = [
"libc",
"windows-sys 0.52.0",
...
...
@@ -6394,7 +6392,7 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"core-foundation 0.9.4",
"system-configuration-sys",
]
...
...
@@ -6438,7 +6436,7 @@ dependencies = [
"getrandom 0.4.2",
"once_cell",
"rustix 1.1.4",
"windows-sys 0.
59.0
",
"windows-sys 0.
61.2
",
]
[[package]]
...
...
@@ -6628,7 +6626,7 @@ dependencies = [
"monostate",
"onig",
"paste",
"rand 0.9.
2
",
"rand 0.9.
4
",
"rayon",
"rayon-cond",
"regex",
...
...
@@ -6842,7 +6840,7 @@ dependencies = [
"percent-encoding",
"pin-project",
"prost 0.13.5",
"socket2 0.5.
8
",
"socket2 0.5.
10
",
"tokio",
"tokio-stream",
"tower",
...
...
@@ -6960,7 +6958,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
dependencies = [
"base64 0.22.1",
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"bytes",
"futures-util",
"http",
...
...
@@ -7607,7 +7605,7 @@ version = "0.244.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
dependencies = [
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"hashbrown 0.15.5",
"indexmap 2.14.0",
"semver",
...
...
@@ -7679,7 +7677,7 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys 0.
48.0
",
"windows-sys 0.
61.2
",
]
[[package]]
...
...
@@ -8065,7 +8063,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
dependencies = [
"anyhow",
"bitflags 2.11.
0
",
"bitflags 2.11.
1
",
"indexmap 2.14.0",
"log",
"serde",
...
...
lib/kv-router/Cargo.toml
View file @
134d484d
...
...
@@ -30,7 +30,6 @@ async-trait = { workspace = true }
dashmap
=
{
workspace
=
true
}
ordered-float
=
{
workspace
=
true
}
derive_builder
=
{
workspace
=
true
}
derive-getters
=
{
workspace
=
true
}
prometheus
=
{
workspace
=
true
,
optional
=
true
}
rand
=
{
workspace
=
true
}
serde
=
{
workspace
=
true
}
...
...
lib/kv-router/src/active_set.rs
0 → 100644
View file @
134d484d
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
rustc_hash
::
FxHashSet
;
use
crate
::
protocols
::
WorkerWithDpRank
;
#[inline]
pub
(
crate
)
fn
reconcile_active_workers
(
active
:
&
mut
FxHashSet
<
WorkerWithDpRank
>
,
next
:
&
FxHashSet
<
WorkerWithDpRank
>
,
mut
on_drop
:
impl
FnMut
(
WorkerWithDpRank
),
)
{
let
active_count
=
active
.len
();
let
next_count
=
next
.len
();
if
next_count
==
active_count
{
return
;
}
if
next_count
<
active_count
&&
next
.iter
()
.all
(|
worker
|
active
.contains
(
worker
))
{
for
&
worker
in
active
.iter
()
{
if
!
next
.contains
(
&
worker
)
{
on_drop
(
worker
);
}
}
active
.clone_from
(
next
);
return
;
}
active
.retain
(|
worker
|
{
if
next
.contains
(
worker
)
{
true
}
else
{
on_drop
(
*
worker
);
false
}
});
}
lib/kv-router/src/indexer/concurrent_radix_tree.rs
View file @
134d484d
...
...
@@ -33,6 +33,7 @@ use std::collections::VecDeque;
use
std
::
sync
::
atomic
::{
AtomicUsize
,
Ordering
};
use
super
::{
EventKind
,
KvIndexerMetrics
,
SyncIndexer
,
WorkerTask
};
use
crate
::
active_set
::
reconcile_active_workers
;
use
crate
::
protocols
::
*
;
/// Thread-safe shared reference to a Block.
...
...
@@ -236,16 +237,8 @@ impl ConcurrentRadixTree {
let
child_count
=
guard
.workers
.len
();
if
child_count
!=
active_count
{
// Workers changed: either dropped out (child < active) or
// stale entries exist (child > active). In both cases,
// retain only workers present in the child, scoring dropouts.
active
.retain
(|
w
|
{
if
guard
.workers
.contains
(
w
)
{
true
}
else
{
scores
.scores
.insert
(
*
w
,
matched_depth
);
false
}
reconcile_active_workers
(
&
mut
active
,
&
guard
.workers
,
|
worker
|
{
scores
.scores
.insert
(
worker
,
matched_depth
);
});
active_count
=
active
.len
();
...
...
lib/kv-router/src/indexer/positional.rs
View file @
134d484d
...
...
@@ -26,6 +26,7 @@ use std::sync::Arc;
use
std
::
sync
::
atomic
::{
AtomicUsize
,
Ordering
};
use
super
::{
EventKind
,
KvIndexerMetrics
,
SyncIndexer
,
WorkerTask
};
use
crate
::
active_set
::
reconcile_active_workers
;
use
crate
::
protocols
::{
DpRank
,
ExternalSequenceBlockHash
,
KvCacheEvent
,
KvCacheEventData
,
KvCacheEventError
,
KvCacheStoreData
,
KvCacheStoredBlockData
,
LocalBlockHash
,
OverlapScores
,
RouterEvent
,
WorkerId
,
...
...
@@ -471,18 +472,6 @@ impl PositionalIndexer {
// -----------------------------------------------------------------------------
impl
PositionalIndexer
{
/// Score all active workers at the given position and clear the active set.
#[inline]
fn
drain_active
(
active
:
&
mut
FxHashSet
<
WorkerWithDpRank
>
,
scores
:
&
mut
OverlapScores
,
pos
:
usize
,
)
{
for
worker
in
active
.drain
()
{
scores
.scores
.insert
(
worker
,
pos
as
u32
);
}
}
/// Compute sequence hash incrementally from previous hash and current local hash.
#[inline]
fn
compute_next_seq_hash
(
prev_seq_hash
:
u64
,
current_local_hash
:
u64
)
->
u64
{
...
...
@@ -581,24 +570,23 @@ impl PositionalIndexer {
}
let
Some
(
entry
)
=
self
.index
.get
(
&
(
pos
,
sequence
[
pos
]))
else
{
Self
::
drain_active
(
active
,
scores
,
pos
);
for
worker
in
active
.drain
()
{
scores
.scores
.insert
(
worker
,
pos
as
u32
);
}
break
;
};
Self
::
ensure_seq_hash_computed
(
seq_hashes
,
pos
,
sequence
);
let
Some
(
workers
)
=
entry
.get
(
seq_hashes
[
pos
])
else
{
Self
::
drain_active
(
active
,
scores
,
pos
);
for
worker
in
active
.drain
()
{
scores
.scores
.insert
(
worker
,
pos
as
u32
);
}
break
;
};
if
workers
.len
()
<
active
.len
()
{
active
.retain
(|
w
|
{
if
workers
.contains
(
w
)
{
true
}
else
{
scores
.scores
.insert
(
*
w
,
pos
as
u32
);
false
}
if
workers
.len
()
!=
active
.len
()
{
reconcile_active_workers
(
active
,
workers
,
|
worker
|
{
scores
.scores
.insert
(
worker
,
pos
as
u32
);
});
}
...
...
lib/kv-router/src/indexer/radix_tree.rs
View file @
134d484d
...
...
@@ -23,6 +23,7 @@ use std::{
use
rustc_hash
::{
FxHashMap
,
FxHashSet
};
use
crate
::
active_set
::
reconcile_active_workers
;
use
crate
::
protocols
::
*
;
/// A shared reference to a [`RadixBlock`].
...
...
@@ -251,26 +252,9 @@ impl RadixTree {
let
borrow
=
block
.borrow
();
let
child_count
=
borrow
.workers
.len
();
if
child_count
<
active_count
{
// Workers dropped out. Record scores for those that left.
// Score = matched_depth (number of nodes they were present at).
for
worker
in
&
active
{
if
!
borrow
.workers
.contains
(
worker
)
{
scores
.scores
.insert
(
*
worker
,
matched_depth
);
}
}
active
.clone_from
(
&
borrow
.workers
);
active_count
=
child_count
;
}
else
if
child_count
>
active_count
{
// Stale entries: child retains workers already removed from
// an ancestor. Fall back to full membership check.
active
.retain
(|
w
|
{
if
borrow
.workers
.contains
(
w
)
{
true
}
else
{
scores
.scores
.insert
(
*
w
,
matched_depth
);
false
}
if
child_count
!=
active_count
{
reconcile_active_workers
(
&
mut
active
,
&
borrow
.workers
,
|
worker
|
{
scores
.scores
.insert
(
worker
,
matched_depth
);
});
active_count
=
active
.len
();
}
...
...
lib/kv-router/src/lib.rs
View file @
134d484d
...
...
@@ -6,6 +6,8 @@
//! This crate provides the core radix tree implementation and protocols for
//! efficient KV cache lookup and routing in distributed LLM inference systems.
mod
active_set
;
pub
mod
indexer
;
pub
mod
protocols
;
pub
mod
recovery
;
...
...
lib/kv-router/src/scheduling/local.rs
View file @
134d484d
...
...
@@ -5,7 +5,8 @@ use std::collections::{HashMap, HashSet};
use
std
::
sync
::
Arc
;
use
std
::
time
::
Duration
;
use
tokio
::
sync
::{
mpsc
,
watch
};
use
rustc_hash
::{
FxHashMap
,
FxHashSet
};
use
tokio
::
sync
::
watch
;
use
tokio
::
time
::
Instant
;
use
tokio_util
::
sync
::
CancellationToken
;
...
...
@@ -27,7 +28,6 @@ where
S
:
SchedulingPolicy
,
Sel
:
WorkerSelector
<
C
>
,
{
request_tx
:
mpsc
::
Sender
<
SchedulingRequest
>
,
slots
:
Arc
<
ActiveSequencesMultiWorker
<
P
>>
,
queue
:
Arc
<
SchedulerQueue
<
P
,
C
,
S
,
Sel
>>
,
queue_updates
:
watch
::
Sender
<
()
>
,
...
...
@@ -109,9 +109,8 @@ where
prefill_load_estimator
,
));
let
(
queue_updates
,
_
)
=
watch
::
channel
(());
let
(
request_tx
,
request_rx
)
=
mpsc
::
channel
::
<
SchedulingRequest
>
(
1024
);
let
queue_clone
=
Arc
::
clone
(
&
queue
);
let
queue_remote_updates
=
Arc
::
clone
(
&
queue
);
let
queue_periodic_updates
=
Arc
::
clone
(
&
queue
);
let
mut
remote_state_updates
=
slots
.subscribe_remote_state_changes
();
let
remote_update_cancel_token
=
cancellation_token
.clone
();
let
queue_updates_remote
=
queue_updates
.clone
();
...
...
@@ -138,33 +137,23 @@ where
});
tokio
::
spawn
(
async
move
{
let
mut
request_rx
=
request_rx
;
let
mut
recheck_interval
=
tokio
::
time
::
interval
(
recheck_interval
);
tracing
::
trace!
(
"LocalScheduler
background
task started"
);
tracing
::
trace!
(
"LocalScheduler
periodic queue update
task started"
);
loop
{
tokio
::
select!
{
_
=
cancellation_token
.cancelled
()
=>
{
tracing
::
trace!
(
"LocalScheduler
background
task shutting down"
);
tracing
::
trace!
(
"LocalScheduler
periodic queue update
task shutting down"
);
break
;
}
request
=
request_rx
.recv
()
=>
{
let
Some
(
request
)
=
request
else
{
tracing
::
warn!
(
"LocalScheduler request channel closed"
);
break
;
};
tracing
::
trace!
(
"received request to be scheduled"
);
queue_clone
.enqueue
(
request
)
.await
;
}
_
=
recheck_interval
.tick
()
=>
{
queue_
clone
.update
()
.await
;
queue_
periodic_updates
.update
()
.await
;
}
}
}
});
Self
{
request_tx
,
slots
,
queue
,
queue_updates
,
...
...
@@ -197,8 +186,8 @@ where
token_seq
,
isl_tokens
,
overlaps
,
decode_blocks
:
HashMap
::
new
(),
prefill_tokens
:
HashMap
::
new
(),
decode_blocks
:
Fx
HashMap
::
default
(),
prefill_tokens
:
Fx
HashMap
::
default
(),
track_prefill_tokens
,
router_config_override
:
router_config_override
.cloned
(),
update_states
,
...
...
@@ -210,10 +199,7 @@ where
resp_tx
:
Some
(
resp_tx
),
};
self
.request_tx
.send
(
request
)
.await
.map_err
(|
_
|
KvSchedulerError
::
SubscriberShutdown
)
?
;
self
.queue
.enqueue
(
request
)
.await
;
resp_rx
.await
...
...
@@ -284,7 +270,7 @@ where
decay_now
,
);
let
mut
workers
:
HashSet
<
WorkerWithDpRank
>
=
HashSet
::
new
();
let
mut
workers
:
Fx
HashSet
<
WorkerWithDpRank
>
=
Fx
HashSet
::
default
();
workers
.extend
(
decode_blocks
.keys
()
.copied
());
workers
.extend
(
prefill_tokens
.keys
()
.copied
());
...
...
lib/kv-router/src/scheduling/policy.rs
View file @
134d484d
...
...
@@ -3,10 +3,9 @@
use
std
::
time
::
Duration
;
use
ordered_float
::
OrderedFloat
;
use
super
::
config
::
RouterQueuePolicy
;
use
super
::
types
::
SchedulingRequest
;
use
ordered_float
::
OrderedFloat
;
/// Pluggable scheduling policy that determines queue ordering.
/// Monomorphized for zero-cost inlining on the hot comparison path.
///
...
...
@@ -115,8 +114,6 @@ impl SchedulingPolicy for RouterSchedulingPolicy {
#[cfg(test)]
mod
tests
{
use
std
::
collections
::
HashMap
;
use
rustc_hash
::
FxHashMap
;
use
super
::
*
;
...
...
@@ -132,8 +129,8 @@ mod tests {
token_seq
:
None
,
isl_tokens
,
overlaps
,
decode_blocks
:
HashMap
::
new
(),
prefill_tokens
:
HashMap
::
new
(),
decode_blocks
:
Fx
HashMap
::
default
(),
prefill_tokens
:
Fx
HashMap
::
default
(),
track_prefill_tokens
:
true
,
router_config_override
:
None
,
update_states
:
false
,
...
...
lib/kv-router/src/scheduling/queue.rs
View file @
134d484d
...
...
@@ -57,6 +57,8 @@ pub struct SchedulerQueue<
Sel
:
WorkerSelector
<
C
>
=
DefaultWorkerSelector
,
>
{
pending
:
Mutex
<
BinaryHeap
<
QueueEntry
<
S
::
Key
>>>
,
/// Serializes admission so worker selection always sees prior bookings.
admission_gate
:
Mutex
<
()
>
,
/// Number of requests currently parked in the pending queue.
/// Incremented after push, decremented after pop. Lock-free reads via `Relaxed` load.
pending_count
:
AtomicUsize
,
...
...
@@ -96,6 +98,7 @@ impl<
}
Self
{
pending
:
Mutex
::
new
(
BinaryHeap
::
new
()),
admission_gate
:
Mutex
::
new
(()),
pending_count
:
AtomicUsize
::
new
(
0
),
pending_isl_tokens
:
AtomicUsize
::
new
(
0
),
slots
,
...
...
@@ -145,17 +148,19 @@ impl<
return
;
}
let
_
admission
=
self
.admission_gate
.lock
()
.await
;
let
decay_now
=
Instant
::
now
();
let
Some
(
threshold
)
=
self
.threshold_frac
else
{
self
.
schedul
e
(
request
,
Instant
::
now
()
)
.await
;
self
.
admit_on
e
(
request
,
decay_
now
)
.await
;
return
;
};
if
request
.bypass_capacity_check
()
{
self
.
schedul
e
(
request
,
Instant
::
now
()
)
.await
;
self
.
admit_on
e
(
request
,
decay_
now
)
.await
;
return
;
}
let
decay_now
=
Instant
::
now
();
if
self
.all_workers_busy
(
threshold
,
request
.allowed_worker_ids
.as_ref
(),
...
...
@@ -171,7 +176,7 @@ impl<
self
.pending_isl_tokens
.fetch_add
(
isl_tokens
,
AtomicOrdering
::
Relaxed
);
}
else
{
self
.
schedul
e
(
request
,
decay_now
)
.await
;
self
.
admit_on
e
(
request
,
decay_now
)
.await
;
}
}
...
...
@@ -198,6 +203,7 @@ impl<
}
loop
{
let
_
admission
=
self
.admission_gate
.lock
()
.await
;
let
decay_now
=
Instant
::
now
();
let
mut
heap
=
self
.pending
.lock
()
.await
;
let
Some
(
front
)
=
heap
.peek
()
else
{
...
...
@@ -221,13 +227,13 @@ impl<
self
.pending_isl_tokens
.fetch_sub
(
entry
.request.isl_tokens
,
AtomicOrdering
::
Relaxed
);
tracing
::
debug!
(
"scheduling request from pending queue"
);
self
.
schedul
e
(
entry
.request
,
decay_now
)
.await
;
self
.
admit_on
e
(
entry
.request
,
decay_now
)
.await
;
}
}
/// Run the full scheduling pipeline for a single request:
/// compute potential load -> select worker -> respond -> book via add_request.
async
fn
schedul
e
(
&
self
,
mut
request
:
SchedulingRequest
,
decay_now
:
Instant
)
{
async
fn
admit_on
e
(
&
self
,
mut
request
:
SchedulingRequest
,
decay_now
:
Instant
)
{
let
(
decode_blocks
,
prefill_tokens
)
=
self
.slots
.potential_blocks_and_tokens_with_prefill_tracking
(
...
...
@@ -396,17 +402,18 @@ impl<
#[cfg(test)]
mod
tests
{
use
std
::
collections
::
HashMap
;
use
std
::
sync
::
Arc
;
use
std
::
sync
::
{
Arc
,
Condvar
,
Mutex
as
StdMutex
}
;
use
std
::
time
::
Duration
;
use
tokio
::
sync
::
watch
;
use
rustc_hash
::
FxHashMap
;
use
tokio
::
sync
::{
Barrier
,
watch
};
use
super
::
*
;
use
crate
::
protocols
::
OverlapScores
;
use
crate
::
protocols
::
{
OverlapScores
,
WorkerSelectionResult
,
WorkerWithDpRank
}
;
use
crate
::
scheduling
::
types
::
KvSchedulerError
;
use
crate
::
selector
::
DefaultWorkerSelector
;
use
crate
::
sequences
::
ActiveSequencesMultiWorker
;
use
crate
::
test_utils
::{
NoopSequencePublisher
,
SimpleWorkerConfig
};
use
crate
::{
DefaultWorkerSelector
,
WorkerSelector
};
fn
decay_now
()
->
Instant
{
Instant
::
now
()
...
...
@@ -427,6 +434,77 @@ mod tests {
}
}
#[derive(Default)]
struct
SelectorRendezvous
{
arrivals
:
StdMutex
<
usize
>
,
cv
:
Condvar
,
}
impl
SelectorRendezvous
{
fn
wait_for_peer
(
&
self
)
{
let
mut
arrivals
=
self
.arrivals
.lock
()
.unwrap
();
*
arrivals
+=
1
;
if
*
arrivals
==
1
{
let
_
=
self
.cv
.wait_timeout
(
arrivals
,
Duration
::
from_millis
(
100
))
.unwrap
();
return
;
}
self
.cv
.notify_all
();
}
}
#[derive(Clone)]
struct
MinDecodeSelector
{
rendezvous
:
Option
<
Arc
<
SelectorRendezvous
>>
,
}
impl
WorkerSelector
<
SimpleWorkerConfig
>
for
MinDecodeSelector
{
fn
select_worker
(
&
self
,
workers
:
&
HashMap
<
WorkerId
,
SimpleWorkerConfig
>
,
request
:
&
SchedulingRequest
,
block_size
:
u32
,
)
->
Result
<
WorkerSelectionResult
,
KvSchedulerError
>
{
if
let
Some
(
rendezvous
)
=
&
self
.rendezvous
{
rendezvous
.wait_for_peer
();
}
let
Some
(
worker
)
=
workers
.iter
()
.flat_map
(|(
worker_id
,
config
)|
{
let
dp_start
=
config
.data_parallel_start_rank
();
let
dp_end
=
dp_start
+
config
.data_parallel_size
();
(
dp_start
..
dp_end
)
.map
(
move
|
dp_rank
|
WorkerWithDpRank
::
new
(
*
worker_id
,
dp_rank
))
})
.min_by_key
(|
worker
|
{
(
request
.prefill_tokens
.get
(
worker
)
.copied
()
.unwrap_or
(
request
.isl_tokens
),
request
.decode_blocks
.get
(
worker
)
.copied
()
.unwrap_or
(
0
),
worker
.worker_id
,
worker
.dp_rank
,
)
})
else
{
return
Err
(
KvSchedulerError
::
NoEndpoints
);
};
Ok
(
WorkerSelectionResult
{
worker
,
required_blocks
:
request
.isl_tokens
.div_ceil
(
block_size
as
usize
)
as
u64
,
overlap_blocks
:
request
.overlaps.scores
.get
(
&
worker
)
.copied
()
.unwrap_or
(
0
),
})
}
}
fn
make_queue
(
num_workers
:
usize
,
block_size
:
u32
,
...
...
@@ -441,6 +519,53 @@ mod tests {
(
queue
,
slots
)
}
#[allow(clippy::type_complexity)]
fn
make_queue_with_custom_selector
<
Sel
:
WorkerSelector
<
SimpleWorkerConfig
>>
(
num_workers
:
usize
,
block_size
:
u32
,
isl
:
usize
,
threshold_frac
:
Option
<
f64
>
,
selector
:
Sel
,
)
->
(
Arc
<
SchedulerQueue
<
NoopSequencePublisher
,
SimpleWorkerConfig
,
FcfsPolicy
,
Sel
>>
,
Arc
<
ActiveSequencesMultiWorker
<
NoopSequencePublisher
>>
,
)
{
let
dp_range
:
HashMap
<
u64
,
(
u32
,
u32
)
>
=
(
0
..
num_workers
as
u64
)
.map
(|
id
|
(
id
,
(
0
,
1
)))
.collect
();
let
slots
=
Arc
::
new
(
ActiveSequencesMultiWorker
::
new
(
NoopSequencePublisher
,
block_size
as
usize
,
dp_range
,
false
,
0
,
"test"
,
));
let
mut
configs
:
HashMap
<
u64
,
SimpleWorkerConfig
>
=
HashMap
::
new
();
for
id
in
0
..
num_workers
as
u64
{
configs
.insert
(
id
,
SimpleWorkerConfig
{
max_num_batched_tokens
:
Some
(
isl
as
u64
),
..
Default
::
default
()
},
);
}
let
(
_
cfg_tx
,
cfg_rx
)
=
watch
::
channel
(
configs
);
let
queue
=
Arc
::
new
(
SchedulerQueue
::
new
(
Arc
::
clone
(
&
slots
),
cfg_rx
,
threshold_frac
,
block_size
,
selector
,
FcfsPolicy
,
None
,
));
(
queue
,
slots
)
}
#[allow(clippy::type_complexity)]
fn
make_queue_with_sender
(
num_workers
:
usize
,
...
...
@@ -505,8 +630,8 @@ mod tests {
token_seq
:
None
,
isl_tokens
,
overlaps
:
OverlapScores
::
default
(),
decode_blocks
:
HashMap
::
new
(),
prefill_tokens
:
HashMap
::
new
(),
decode_blocks
:
Fx
HashMap
::
default
(),
prefill_tokens
:
Fx
HashMap
::
default
(),
track_prefill_tokens
:
true
,
router_config_override
:
None
,
update_states
:
true
,
...
...
@@ -560,6 +685,49 @@ mod tests {
}
}
#[tokio::test(flavor
=
"multi_thread"
)]
async
fn
test_concurrent_immediate_admissions_see_prior_booking
()
{
let
selector
=
MinDecodeSelector
{
rendezvous
:
Some
(
Arc
::
new
(
SelectorRendezvous
::
default
())),
};
let
(
queue
,
slots
)
=
make_queue_with_custom_selector
(
2
,
16
,
512
,
None
,
selector
);
let
barrier
=
Arc
::
new
(
Barrier
::
new
(
3
));
let
(
req1
,
rx1
)
=
make_request
(
"req-1"
,
512
);
let
queue1
=
Arc
::
clone
(
&
queue
);
let
barrier1
=
Arc
::
clone
(
&
barrier
);
let
handle1
=
tokio
::
spawn
(
async
move
{
barrier1
.wait
()
.await
;
queue1
.enqueue
(
req1
)
.await
;
});
let
(
req2
,
rx2
)
=
make_request
(
"req-2"
,
512
);
let
queue2
=
Arc
::
clone
(
&
queue
);
let
barrier2
=
Arc
::
clone
(
&
barrier
);
let
handle2
=
tokio
::
spawn
(
async
move
{
barrier2
.wait
()
.await
;
queue2
.enqueue
(
req2
)
.await
;
});
barrier
.wait
()
.await
;
handle1
.await
.unwrap
();
handle2
.await
.unwrap
();
let
resp1
=
rx1
.await
.unwrap
()
.unwrap
();
let
resp2
=
rx2
.await
.unwrap
()
.unwrap
();
assert_ne!
(
resp1
.best_worker
,
resp2
.best_worker
,
"second admission should see the first booking and choose the other idle worker"
);
for
request_id
in
[
"req-1"
,
"req-2"
]
{
slots
.mark_prefill_completed
(
&
request_id
.to_string
(),
decay_now
())
.unwrap
();
slots
.free
(
&
request_id
.to_string
(),
decay_now
())
.unwrap
();
}
}
#[tokio::test(flavor
=
"multi_thread"
)]
async
fn
test_queueing_under_pressure
()
{
let
block_size
=
16
;
...
...
@@ -853,8 +1021,8 @@ mod tests {
token_seq
:
None
,
isl_tokens
:
isl
,
overlaps
:
OverlapScores
::
default
(),
decode_blocks
:
HashMap
::
new
(),
prefill_tokens
:
HashMap
::
new
(),
decode_blocks
:
Fx
HashMap
::
default
(),
prefill_tokens
:
Fx
HashMap
::
default
(),
track_prefill_tokens
:
true
,
router_config_override
:
None
,
update_states
:
true
,
...
...
lib/kv-router/src/scheduling/selector.rs
View file @
134d484d
...
...
@@ -4,6 +4,7 @@
use
std
::
collections
::
HashMap
;
use
rand
::
Rng
;
use
rustc_hash
::
FxHashMap
;
use
super
::
config
::
KvRouterConfig
;
use
super
::
types
::{
KvSchedulerError
,
SchedulingRequest
,
pinned_worker_config
};
...
...
@@ -24,7 +25,7 @@ pub trait WorkerSelector<C: WorkerConfigLike> {
/// Helper function for softmax sampling.
/// Returns the selected worker and its logit.
fn
softmax_sample
(
logits
:
&
HashMap
<
WorkerWithDpRank
,
f64
>
,
logits
:
&
Fx
HashMap
<
WorkerWithDpRank
,
f64
>
,
temperature
:
f64
,
)
->
(
WorkerWithDpRank
,
f64
)
{
let
mut
rng
=
rand
::
rng
();
...
...
@@ -32,7 +33,7 @@ fn softmax_sample(
}
fn
softmax_sample_with_sample
(
logits
:
&
HashMap
<
WorkerWithDpRank
,
f64
>
,
logits
:
&
Fx
HashMap
<
WorkerWithDpRank
,
f64
>
,
temperature
:
f64
,
sample
:
f64
,
)
->
(
WorkerWithDpRank
,
f64
)
{
...
...
@@ -260,7 +261,7 @@ impl<C: WorkerConfigLike> WorkerSelector<C> for DefaultWorkerSelector {
(
min_workers
[
0
],
min_score
)
}
}
else
{
let
mut
worker_logits
=
HashMap
::
new
();
let
mut
worker_logits
=
Fx
HashMap
::
default
();
for
worker
in
worker_iter
{
let
score
=
get_score
(
worker
);
worker_logits
.insert
(
worker
,
score
);
...
...
@@ -324,7 +325,7 @@ mod tests {
#[test]
fn
test_softmax_sample_single_key
()
{
let
mut
logits
=
HashMap
::
new
();
let
mut
logits
=
Fx
HashMap
::
default
();
let
worker
=
WorkerWithDpRank
::
from_worker_id
(
42
);
for
(
logit
,
temperature
)
in
[
(
0.5
,
0.1
),
...
...
@@ -346,7 +347,7 @@ mod tests {
#[test]
fn
test_softmax_sample_zero_temperature
()
{
let
mut
logits
=
HashMap
::
new
();
let
mut
logits
=
Fx
HashMap
::
default
();
let
worker1
=
WorkerWithDpRank
::
from_worker_id
(
1
);
let
worker2
=
WorkerWithDpRank
::
from_worker_id
(
2
);
let
worker3
=
WorkerWithDpRank
::
from_worker_id
(
3
);
...
...
@@ -403,7 +404,7 @@ mod tests {
let
worker2
=
WorkerWithDpRank
::
from_worker_id
(
2
);
let
worker3
=
WorkerWithDpRank
::
from_worker_id
(
3
);
let
logits
=
HashMap
::
from
([(
worker1
,
0.0
),
(
worker2
,
3.0
),
(
worker3
,
9.0
)]);
let
logits
=
Fx
HashMap
::
from
_iter
([(
worker1
,
0.0
),
(
worker2
,
3.0
),
(
worker3
,
9.0
)]);
let
entries
:
Vec
<
_
>
=
logits
.iter
()
.map
(|(
worker
,
logit
)|
(
*
worker
,
*
logit
))
...
...
lib/kv-router/src/scheduling/types.rs
View file @
134d484d
...
...
@@ -4,6 +4,7 @@
use
std
::
collections
::{
HashMap
,
HashSet
};
use
dynamo_tokens
::
SequenceHash
;
use
rustc_hash
::
FxHashMap
;
use
serde
::{
Deserialize
,
Serialize
};
use
super
::
config
::
RouterConfigOverride
;
...
...
@@ -43,8 +44,8 @@ pub struct SchedulingRequest {
pub
token_seq
:
Option
<
Vec
<
SequenceHash
>>
,
pub
isl_tokens
:
usize
,
pub
overlaps
:
OverlapScores
,
pub
decode_blocks
:
HashMap
<
WorkerWithDpRank
,
usize
>
,
pub
prefill_tokens
:
HashMap
<
WorkerWithDpRank
,
usize
>
,
pub
decode_blocks
:
Fx
HashMap
<
WorkerWithDpRank
,
usize
>
,
pub
prefill_tokens
:
Fx
HashMap
<
WorkerWithDpRank
,
usize
>
,
pub
track_prefill_tokens
:
bool
,
pub
router_config_override
:
Option
<
RouterConfigOverride
>
,
pub
update_states
:
bool
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment