Unverified Commit 134d484d authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

feat(kv-router): add prompt membership index for scheduler reads (#8175)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent d0d9c030
......@@ -2328,6 +2328,7 @@ dependencies = [
"serde_json",
"tokio",
"tokio-util",
"tracing-subscriber",
"uuid",
]
......@@ -2355,7 +2356,6 @@ dependencies = [
"async-trait",
"axum 0.8.4",
"dashmap",
"derive-getters",
"derive_builder",
"dynamo-runtime",
"dynamo-tokens",
......
......@@ -11,9 +11,15 @@ homepage.workspace = true
repository.workspace = true
description = "Lightweight HTTP benchmarks for Dynamo endpoints"
[[bin]]
[[bench]]
name = "multiturn_bench"
path = "src/bin/multiturn_bench.rs"
path = "multiturn_bench.rs"
harness = false
[[bench]]
name = "offline_replay_bench"
path = "offline_replay_bench.rs"
harness = false
[[bench]]
name = "kv_indexer_bench"
......@@ -50,4 +56,5 @@ minstant = "0.1.7"
plotters = { version = "0.3", default-features = false, features = ["svg_backend", "line_series", "point_series", "full_palette"] }
tokio = { workspace = true, features = ["rt", "macros", "time"] }
tokio-util = { workspace = true }
tracing-subscriber = { workspace = true }
uuid = { workspace = true }
......@@ -3,7 +3,7 @@ SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
SPDX-License-Identifier: Apache-2.0
-->
# Multiturn Benchmark
# Bench Entrypoints
`multiturn_bench` simulates concurrent multi-turn conversations against an
OpenAI-compatible chat endpoint and reports per-turn TTFT and total latency
......@@ -11,14 +11,14 @@ statistics. It can optionally enable **speculative prefill** — a technique tha
pre-warms the KV cache with the predicted next-turn prefix after each assistant
response, cutting TTFT on subsequent turns.
`offline_replay_bench` runs the Rust-native replay loop directly for profiling
and throughput measurements without going through the Python wrapper.
## Quick start
```bash
# Build
cargo build --release --package dynamo-bench --bin multiturn_bench
# Smoke test (1 user, 1 turn, ~50 tokens)
./target/release/multiturn_bench --ping
cargo bench --package dynamo-bench --bench multiturn_bench -- --ping
```
## Speculative prefill demo
......@@ -45,7 +45,7 @@ python -m dynamo.frontend \
### 2. Run baseline (no speculative prefill)
```bash
./target/release/multiturn_bench \
cargo bench --package dynamo-bench --bench multiturn_bench -- \
--url http://localhost:8000 \
--num-users 10 \
--num-turns 5 \
......@@ -59,7 +59,7 @@ python -m dynamo.frontend \
### 3. Run with speculative prefill
```bash
./target/release/multiturn_bench \
cargo bench --package dynamo-bench --bench multiturn_bench -- \
--url http://localhost:8000 \
--num-users 10 \
--num-turns 5 \
......@@ -101,4 +101,16 @@ request arrives.
4. The KV router routes the speculative request to the same worker, warming its cache.
5. When the real next-turn request arrives, the KV router sees high cache overlap on that worker and routes there, yielding a much lower TTFT.
See also: [Agent Hints documentation](../../../../docs/components/frontend/nvext.md#agent-hints)
See also: [Agent Hints documentation](../../docs/components/frontend/nvext.md#agent-hints)
## Offline replay
```bash
cargo bench --package dynamo-bench --bench offline_replay_bench -- \
/path/to/mooncake_trace.jsonl \
--num-workers 4 \
--router-mode kv-router \
--arrival-speedup-ratio 4 \
--trace-block-size 512 \
--block-size 64
```
......@@ -289,13 +289,15 @@ async fn run_benchmark(
all_latencies.extend(task.await??);
}
if progress.elapsed() > Duration::from_millis(benchmark_duration_ms * 11 / 10) {
// Keep the post-run drain check out of the measured benchmark interval.
let total_duration = progress.elapsed();
multi.assert_completely_drained(Instant::now());
if total_duration > Duration::from_millis(benchmark_duration_ms * 11 / 10) {
eprintln!(
"WARNING: Benchmarker could not keep up. Rerun with a larger --benchmark-duration-ms."
);
}
let total_duration = progress.elapsed();
let total_ops = all_latencies.len();
let offered_ops_throughput = total_ops as f32 / benchmark_duration_ms as f32 * 1000.0;
......@@ -311,10 +313,13 @@ async fn run_benchmark(
};
println!(
"Ops Throughput: {} ops/s (potential_blocks_and_tokens + add + prefill_complete + free)",
ops_throughput
"Ops Throughput: offered={} ops/s achieved={} ops/s (potential_blocks_and_tokens + add + prefill_complete + free)",
offered_ops_throughput, ops_throughput
);
println!(
"Block Throughput: offered={} block ops/s achieved={} block ops/s",
offered_block_throughput, block_throughput
);
println!("Block Throughput: {} block ops/s", block_throughput);
println!("Latency p99: {}us", latency_p99_us);
Ok(BenchmarkResults {
......@@ -501,6 +506,7 @@ async fn run_tests() -> anyhow::Result<()> {
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let args = Args::parse();
init_sequence_logging(args.common.sequence_logs);
if args.common.test {
return run_tests().await;
......
......@@ -28,6 +28,7 @@ use serde::{Deserialize, Serialize};
use std::fs::File;
use std::io::{BufRead, BufReader};
use tokio::task::JoinHandle;
use tracing_subscriber::EnvFilter;
use uuid::Uuid;
/// Shared CLI arguments for trace-based benchmarks.
......@@ -95,6 +96,23 @@ pub struct CommonArgs {
/// Ignored - passed by cargo bench harness.
#[arg(long, hide = true, global = true)]
pub bench: bool,
/// Opt in to runtime warn/error logs from the mocker and sequence tracker.
#[clap(long)]
pub sequence_logs: bool,
}
pub fn init_sequence_logging(enabled: bool) {
if !enabled {
return;
}
let _ = tracing_subscriber::fmt()
.with_env_filter(EnvFilter::new(
"error,dynamo_kv_router::sequences=warn,dynamo_mocker=warn",
))
.with_writer(std::io::stderr)
.try_init();
}
/// A single request deserialized from the mooncake trace JSONL.
......
......@@ -8,7 +8,7 @@
//! first token) and total request latency per turn, with configurable inter-turn
//! exponential delay.
//!
//! Run with: cargo run --package dynamo-bench --bin multiturn_bench -- --help
//! Run with: cargo bench --package dynamo-bench --bench multiturn_bench -- --help
use anyhow::{Context, Result};
use clap::Parser;
......@@ -115,6 +115,11 @@ struct AgentHintsBody {
speculative_prefill: bool,
}
fn is_bench_harness_invocation() -> bool {
let args: Vec<_> = std::env::args_os().skip(1).collect();
args.is_empty() || args.iter().all(|arg| arg == "--bench")
}
// ---------------------------------------------------------------------------
// Turn result
// ---------------------------------------------------------------------------
......@@ -533,6 +538,11 @@ fn print_per_turn_table(label: &str, stats: &[PerTurnStats]) {
#[tokio::main]
async fn main() -> Result<()> {
if is_bench_harness_invocation() {
eprintln!("multiturn_bench: skipping no-arg harness invocation");
return Ok(());
}
let mut args = Args::parse();
if args.ping {
......
......@@ -5,6 +5,8 @@
//!
//! Useful for profiling replay itself without the Python CLI wrapper. This keeps
//! the default mocker perf model unless CLI overrides are provided.
//!
//! Run with: cargo bench --package dynamo-bench --bench offline_replay_bench -- --help
use std::fs::File;
use std::path::PathBuf;
......@@ -30,6 +32,11 @@ impl From<RouterModeArg> for ReplayRouterMode {
}
}
fn is_bench_harness_invocation() -> bool {
let args: Vec<_> = std::env::args_os().skip(1).collect();
args.is_empty() || args.iter().all(|arg| arg == "--bench")
}
#[derive(Parser, Debug)]
#[command(name = "offline_replay_bench")]
#[command(about = "Run offline replay directly in Rust for benchmarking and profiling")]
......@@ -84,6 +91,10 @@ struct Args {
/// Number of times to rerun the same replay in-process
#[arg(long, default_value_t = 1)]
iterations: usize,
/// Ignored -- passed by cargo bench
#[arg(long, hide = true)]
bench: bool,
}
fn build_engine_args(args: &Args) -> Result<MockEngineArgs> {
......@@ -111,6 +122,11 @@ fn build_engine_args(args: &Args) -> Result<MockEngineArgs> {
}
fn main() -> Result<()> {
if is_bench_harness_invocation() {
eprintln!("offline_replay_bench: skipping no-arg harness invocation");
return Ok(());
}
let args = Args::parse();
let engine_args = build_engine_args(&args)?;
let started_at = Instant::now();
......
......@@ -431,9 +431,9 @@ dependencies = [
[[package]]
name = "axum-macros"
version = "0.5.0"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c"
checksum = "7aa268c23bfbbd2c4363b9cd302a4f504fb2a9dfe7e3451d66f35dd392e20aca"
dependencies = [
"proc-macro2",
"quote",
......@@ -526,7 +526,7 @@ version = "0.71.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"cexpr",
"clang-sys",
"itertools 0.13.0",
......@@ -584,20 +584,20 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.11.0"
version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
dependencies = [
"serde_core",
]
[[package]]
name = "bitstream-io"
version = "4.9.0"
version = "4.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60d4bd9d1db2c6bdf285e223a7fa369d5ce98ec767dec949c6ca62863ce61757"
checksum = "7eff00be299a18769011411c9def0d827e8f2d7bf0c3dbf53633147a8867fd1f"
dependencies = [
"core2",
"no_std_io2",
]
[[package]]
......@@ -848,7 +848,7 @@ version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -987,15 +987,6 @@ version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "core2"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505"
dependencies = [
"memchr",
]
[[package]]
name = "cpufeatures"
version = "0.2.17"
......@@ -1442,7 +1433,7 @@ dependencies = [
"libc",
"option-ext",
"redox_users",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -1451,7 +1442,7 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"objc2",
]
......@@ -1511,7 +1502,6 @@ dependencies = [
"anyhow",
"async-trait",
"dashmap",
"derive-getters",
"derive_builder",
"dynamo-runtime",
"dynamo-tokens",
......@@ -1519,7 +1509,7 @@ dependencies = [
"ordered-float 4.6.0",
"parking_lot",
"prometheus",
"rand 0.9.2",
"rand 0.9.4",
"rmp-serde",
"rustc-hash 2.1.2",
"serde",
......@@ -1546,7 +1536,7 @@ dependencies = [
"axum-server",
"base64 0.22.1",
"bincode 2.0.1",
"bitflags 2.11.0",
"bitflags 2.11.1",
"blake3",
"bs62",
"bytemuck",
......@@ -1587,7 +1577,7 @@ dependencies = [
"parking_lot",
"prometheus",
"prost 0.13.5",
"rand 0.9.2",
"rand 0.9.4",
"rayon",
"reqwest",
"rmp-serde",
......@@ -1647,7 +1637,7 @@ dependencies = [
"ndarray",
"ndarray-interp",
"ndarray-npy",
"rand 0.9.2",
"rand 0.9.4",
"rustc-hash 2.1.2",
"serde",
"serde_json",
......@@ -1735,14 +1725,14 @@ dependencies = [
"parking_lot",
"percent-encoding",
"prometheus",
"rand 0.9.2",
"rand 0.9.4",
"rayon",
"regex",
"reqwest",
"rmp-serde",
"serde",
"serde_json",
"socket2 0.5.8",
"socket2 0.5.10",
"thiserror 2.0.18",
"tmq",
"tokio",
......@@ -1895,7 +1885,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
"libc",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -2454,7 +2444,7 @@ dependencies = [
"libc",
"log",
"num_cpus",
"rand 0.9.2",
"rand 0.9.4",
"reqwest",
"serde",
"serde_json",
......@@ -2559,9 +2549,9 @@ dependencies = [
[[package]]
name = "hyper-rustls"
version = "0.27.7"
version = "0.27.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f"
dependencies = [
"http",
"hyper",
......@@ -2569,7 +2559,6 @@ dependencies = [
"log",
"rustls",
"rustls-native-certs 0.8.3",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tower-service",
......@@ -2980,7 +2969,7 @@ dependencies = [
"portable-atomic",
"portable-atomic-util",
"serde_core",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -3285,9 +3274,9 @@ checksum = "7a79a3332a6609480d7d0c9eab957bca6b455b91bb84e66d19f5ff66294b85b8"
[[package]]
name = "libc"
version = "0.2.184"
version = "0.2.185"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af"
checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f"
[[package]]
name = "libfuzzer-sys"
......@@ -3331,7 +3320,7 @@ version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"libc",
"plain",
"redox_syscall 0.7.4",
......@@ -3769,7 +3758,7 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22f9786d56d972959e1408b6a93be6af13b9c1392036c5c1fafa08a1b0c6ee87"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"byteorder",
"derive_builder",
"getset",
......@@ -3817,7 +3806,7 @@ version = "0.30.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"cfg-if",
"cfg_aliases",
"libc",
......@@ -3856,6 +3845,15 @@ dependencies = [
"signatory",
]
[[package]]
name = "no_std_io2"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b51ed7824b6e07d354605f4abb3d9d300350701299da96642ee084f5ce631550"
dependencies = [
"memchr",
]
[[package]]
name = "nom"
version = "7.1.3"
......@@ -3893,7 +3891,7 @@ version = "6.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"filetime",
"fsevent-sys",
"inotify",
......@@ -3911,7 +3909,7 @@ version = "0.50.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
dependencies = [
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -4028,7 +4026,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73ad74d880bb43877038da939b7427bba67e9dd42004a18b809ba7d87cee241c"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"objc2",
"objc2-foundation",
]
......@@ -4049,7 +4047,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"dispatch2",
"objc2",
]
......@@ -4060,7 +4058,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e022c9d066895efa1345f8e33e584b9f958da2fd4cd116792e15e07e4720a807"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"dispatch2",
"objc2",
"objc2-core-foundation",
......@@ -4093,7 +4091,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cde0dfb48d25d2b4862161a4d5fcc0e3c24367869ad306b0c9ec0073bfed92d"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"objc2",
"objc2-core-foundation",
"objc2-core-graphics",
......@@ -4111,7 +4109,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"block2",
"libc",
"objc2",
......@@ -4124,7 +4122,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180788110936d59bab6bd83b6060ffdfffb3b922ba1396b312ae795e1de9d81d"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"objc2",
"objc2-core-foundation",
]
......@@ -4135,7 +4133,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96c1358452b371bf9f104e21ec536d37a650eb10f7ee379fff67d2e08d537f1f"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"objc2",
"objc2-core-foundation",
"objc2-foundation",
......@@ -4147,7 +4145,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d87d638e33c06f577498cbcc50491496a3ed4246998a7fbba7ccb98b1e7eab22"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"block2",
"objc2",
"objc2-cloud-kit",
......@@ -4194,7 +4192,7 @@ dependencies = [
"parking_lot",
"percent-encoding",
"quick-xml",
"rand 0.9.2",
"rand 0.9.4",
"reqwest",
"ring",
"rustls-pemfile",
......@@ -4244,7 +4242,7 @@ version = "6.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"libc",
"once_cell",
"onig_sys",
......@@ -4378,7 +4376,7 @@ dependencies = [
"futures-util",
"opentelemetry",
"percent-encoding",
"rand 0.9.2",
"rand 0.9.4",
"thiserror 2.0.18",
"tokio",
"tokio-stream",
......@@ -4697,9 +4695,9 @@ dependencies = [
[[package]]
name = "pkg-config"
version = "0.3.32"
version = "0.3.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e"
[[package]]
name = "plain"
......@@ -4713,7 +4711,7 @@ version = "0.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"crc32fast",
"fdeflate",
"flate2",
......@@ -4978,7 +4976,7 @@ version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"memchr",
"unicase",
]
......@@ -5156,7 +5154,7 @@ dependencies = [
"bytes",
"getrandom 0.3.4",
"lru-slab",
"rand 0.9.2",
"rand 0.9.4",
"ring",
"rustc-hash 2.1.2",
"rustls",
......@@ -5179,7 +5177,7 @@ dependencies = [
"once_cell",
"socket2 0.6.3",
"tracing",
"windows-sys 0.59.0",
"windows-sys 0.60.2",
]
[[package]]
......@@ -5216,9 +5214,9 @@ dependencies = [
[[package]]
name = "rand"
version = "0.9.2"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
dependencies = [
"rand_chacha 0.9.0",
"rand_core 0.9.5",
......@@ -5289,7 +5287,7 @@ dependencies = [
"num-traits",
"paste",
"profiling",
"rand 0.9.2",
"rand 0.9.4",
"rand_chacha 0.9.0",
"simd_helpers",
"thiserror 2.0.18",
......@@ -5320,9 +5318,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
[[package]]
name = "rayon"
version = "1.11.0"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d"
dependencies = [
"either",
"rayon-core",
......@@ -5355,7 +5353,7 @@ version = "0.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
]
[[package]]
......@@ -5364,7 +5362,7 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f450ad9c3b1da563fb6948a8e0fb0fb9269711c9c73d9ea1de5058c79c8d643a"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
]
[[package]]
......@@ -5429,9 +5427,9 @@ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]]
name = "reqwest"
version = "0.12.24"
version = "0.12.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f"
checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
dependencies = [
"base64 0.22.1",
"bytes",
......@@ -5519,7 +5517,7 @@ version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4147b952f3f819eca0e99527022f7d6a8d05f111aeb0a62960c74eb283bec8fc"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"once_cell",
"serde",
"serde_derive",
......@@ -5598,7 +5596,7 @@ version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"errno",
"libc",
"linux-raw-sys 0.4.15",
......@@ -5611,25 +5609,25 @@ version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"errno",
"libc",
"linux-raw-sys 0.12.1",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
name = "rustls"
version = "0.23.37"
version = "0.23.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4"
checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21"
dependencies = [
"aws-lc-rs",
"log",
"once_cell",
"ring",
"rustls-pki-types",
"rustls-webpki 0.103.11",
"rustls-webpki 0.103.12",
"subtle",
"zeroize",
]
......@@ -5690,9 +5688,9 @@ dependencies = [
[[package]]
name = "rustls-webpki"
version = "0.103.11"
version = "0.103.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20a6af516fea4b20eccceaf166e8aa666ac996208e8a644ce3ef5aa783bc7cd4"
checksum = "8279bb85272c9f10811ae6a6c547ff594d6a7f3c6c6b02ee9726d1d0dcfcdd06"
dependencies = [
"aws-lc-rs",
"ring",
......@@ -5845,7 +5843,7 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"core-foundation 0.9.4",
"core-foundation-sys",
"libc",
......@@ -5858,7 +5856,7 @@ version = "3.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"core-foundation 0.10.1",
"core-foundation-sys",
"libc",
......@@ -6182,9 +6180,9 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "socket2"
version = "0.5.8"
version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8"
checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
dependencies = [
"libc",
"windows-sys 0.52.0",
......@@ -6324,7 +6322,7 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"core-foundation 0.9.4",
"system-configuration-sys",
]
......@@ -6368,7 +6366,7 @@ dependencies = [
"getrandom 0.4.2",
"once_cell",
"rustix 1.1.4",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -6558,7 +6556,7 @@ dependencies = [
"monostate",
"onig",
"paste",
"rand 0.9.2",
"rand 0.9.4",
"rayon",
"rayon-cond",
"regex",
......@@ -6772,7 +6770,7 @@ dependencies = [
"percent-encoding",
"pin-project",
"prost 0.13.5",
"socket2 0.5.8",
"socket2 0.5.10",
"tokio",
"tokio-stream",
"tower",
......@@ -6890,7 +6888,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
dependencies = [
"base64 0.22.1",
"bitflags 2.11.0",
"bitflags 2.11.1",
"bytes",
"futures-util",
"http",
......@@ -7520,7 +7518,7 @@ version = "0.244.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"hashbrown 0.15.5",
"indexmap 2.14.0",
"semver",
......@@ -7592,7 +7590,7 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -7978,7 +7976,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
dependencies = [
"anyhow",
"bitflags 2.11.0",
"bitflags 2.11.1",
"indexmap 2.14.0",
"log",
"serde",
......
......@@ -431,9 +431,9 @@ dependencies = [
[[package]]
name = "axum-macros"
version = "0.5.0"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c"
checksum = "7aa268c23bfbbd2c4363b9cd302a4f504fb2a9dfe7e3451d66f35dd392e20aca"
dependencies = [
"proc-macro2",
"quote",
......@@ -526,7 +526,7 @@ version = "0.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"cexpr",
"clang-sys",
"itertools 0.13.0",
......@@ -544,7 +544,7 @@ version = "0.71.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"cexpr",
"clang-sys",
"itertools 0.13.0",
......@@ -602,20 +602,20 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.11.0"
version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
dependencies = [
"serde_core",
]
[[package]]
name = "bitstream-io"
version = "4.9.0"
version = "4.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60d4bd9d1db2c6bdf285e223a7fa369d5ce98ec767dec949c6ca62863ce61757"
checksum = "7eff00be299a18769011411c9def0d827e8f2d7bf0c3dbf53633147a8867fd1f"
dependencies = [
"core2",
"no_std_io2",
]
[[package]]
......@@ -866,7 +866,7 @@ version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -1005,15 +1005,6 @@ version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "core2"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505"
dependencies = [
"memchr",
]
[[package]]
name = "cpufeatures"
version = "0.2.17"
......@@ -1460,7 +1451,7 @@ dependencies = [
"libc",
"option-ext",
"redox_users",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -1469,7 +1460,7 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"objc2",
]
......@@ -1520,7 +1511,6 @@ dependencies = [
"async-trait",
"axum",
"dashmap",
"derive-getters",
"derive_builder",
"dynamo-runtime",
"dynamo-tokens",
......@@ -1528,7 +1518,7 @@ dependencies = [
"ordered-float 4.6.0",
"parking_lot",
"prometheus",
"rand 0.9.2",
"rand 0.9.4",
"reqwest",
"rmp-serde",
"rustc-hash 2.1.2",
......@@ -1558,7 +1548,7 @@ dependencies = [
"axum-server",
"base64 0.22.1",
"bincode 2.0.1",
"bitflags 2.11.0",
"bitflags 2.11.1",
"blake3",
"bs62",
"bytemuck",
......@@ -1601,7 +1591,7 @@ dependencies = [
"parking_lot",
"prometheus",
"prost 0.13.5",
"rand 0.9.2",
"rand 0.9.4",
"rayon",
"reqwest",
"rmp-serde",
......@@ -1662,7 +1652,7 @@ dependencies = [
"ndarray",
"ndarray-interp",
"ndarray-npy",
"rand 0.9.2",
"rand 0.9.4",
"rustc-hash 2.1.2",
"serde",
"serde_json",
......@@ -1782,14 +1772,14 @@ dependencies = [
"parking_lot",
"percent-encoding",
"prometheus",
"rand 0.9.2",
"rand 0.9.4",
"rayon",
"regex",
"reqwest",
"rmp-serde",
"serde",
"serde_json",
"socket2 0.5.8",
"socket2 0.5.10",
"thiserror 2.0.18",
"tmq",
"tokio",
......@@ -1942,7 +1932,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
"libc",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -2093,7 +2083,7 @@ version = "7.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da02698288e0275e442a47fc12ca26d50daf0d48b15398ba5906f20ac2e2a9f9"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"ffmpeg-sys-next",
"libc",
]
......@@ -2526,7 +2516,7 @@ dependencies = [
"libc",
"log",
"num_cpus",
"rand 0.9.2",
"rand 0.9.4",
"reqwest",
"serde",
"serde_json",
......@@ -2631,9 +2621,9 @@ dependencies = [
[[package]]
name = "hyper-rustls"
version = "0.27.7"
version = "0.27.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f"
dependencies = [
"http",
"hyper",
......@@ -2641,7 +2631,6 @@ dependencies = [
"log",
"rustls",
"rustls-native-certs 0.8.3",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tower-service",
......@@ -3052,7 +3041,7 @@ dependencies = [
"portable-atomic",
"portable-atomic-util",
"serde_core",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -3336,9 +3325,9 @@ checksum = "7a79a3332a6609480d7d0c9eab957bca6b455b91bb84e66d19f5ff66294b85b8"
[[package]]
name = "libc"
version = "0.2.184"
version = "0.2.185"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af"
checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f"
[[package]]
name = "libfuzzer-sys"
......@@ -3382,7 +3371,7 @@ version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"libc",
"plain",
"redox_syscall 0.7.4",
......@@ -3829,7 +3818,7 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22f9786d56d972959e1408b6a93be6af13b9c1392036c5c1fafa08a1b0c6ee87"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"byteorder",
"derive_builder",
"getset",
......@@ -3877,7 +3866,7 @@ version = "0.30.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"cfg-if",
"cfg_aliases",
"libc",
......@@ -3916,6 +3905,15 @@ dependencies = [
"signatory",
]
[[package]]
name = "no_std_io2"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b51ed7824b6e07d354605f4abb3d9d300350701299da96642ee084f5ce631550"
dependencies = [
"memchr",
]
[[package]]
name = "nom"
version = "7.1.3"
......@@ -3953,7 +3951,7 @@ version = "6.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"filetime",
"fsevent-sys",
"inotify",
......@@ -3971,7 +3969,7 @@ version = "0.50.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
dependencies = [
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -4088,7 +4086,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73ad74d880bb43877038da939b7427bba67e9dd42004a18b809ba7d87cee241c"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"objc2",
"objc2-foundation",
]
......@@ -4109,7 +4107,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"dispatch2",
"objc2",
]
......@@ -4120,7 +4118,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e022c9d066895efa1345f8e33e584b9f958da2fd4cd116792e15e07e4720a807"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"dispatch2",
"objc2",
"objc2-core-foundation",
......@@ -4153,7 +4151,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cde0dfb48d25d2b4862161a4d5fcc0e3c24367869ad306b0c9ec0073bfed92d"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"objc2",
"objc2-core-foundation",
"objc2-core-graphics",
......@@ -4171,7 +4169,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"block2",
"libc",
"objc2",
......@@ -4184,7 +4182,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180788110936d59bab6bd83b6060ffdfffb3b922ba1396b312ae795e1de9d81d"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"objc2",
"objc2-core-foundation",
]
......@@ -4195,7 +4193,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96c1358452b371bf9f104e21ec536d37a650eb10f7ee379fff67d2e08d537f1f"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"objc2",
"objc2-core-foundation",
"objc2-foundation",
......@@ -4207,7 +4205,7 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d87d638e33c06f577498cbcc50491496a3ed4246998a7fbba7ccb98b1e7eab22"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"block2",
"objc2",
"objc2-cloud-kit",
......@@ -4254,7 +4252,7 @@ dependencies = [
"parking_lot",
"percent-encoding",
"quick-xml",
"rand 0.9.2",
"rand 0.9.4",
"reqwest",
"ring",
"rustls-pemfile",
......@@ -4304,7 +4302,7 @@ version = "6.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"libc",
"once_cell",
"onig_sys",
......@@ -4438,7 +4436,7 @@ dependencies = [
"futures-util",
"opentelemetry",
"percent-encoding",
"rand 0.9.2",
"rand 0.9.4",
"thiserror 2.0.18",
"tokio",
"tokio-stream",
......@@ -4757,9 +4755,9 @@ dependencies = [
[[package]]
name = "pkg-config"
version = "0.3.32"
version = "0.3.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e"
[[package]]
name = "plain"
......@@ -4773,7 +4771,7 @@ version = "0.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"crc32fast",
"fdeflate",
"flate2",
......@@ -5038,7 +5036,7 @@ version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"memchr",
"unicase",
]
......@@ -5226,7 +5224,7 @@ dependencies = [
"bytes",
"getrandom 0.3.4",
"lru-slab",
"rand 0.9.2",
"rand 0.9.4",
"ring",
"rustc-hash 2.1.2",
"rustls",
......@@ -5249,7 +5247,7 @@ dependencies = [
"once_cell",
"socket2 0.6.3",
"tracing",
"windows-sys 0.59.0",
"windows-sys 0.60.2",
]
[[package]]
......@@ -5286,9 +5284,9 @@ dependencies = [
[[package]]
name = "rand"
version = "0.9.2"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
dependencies = [
"rand_chacha 0.9.0",
"rand_core 0.9.5",
......@@ -5359,7 +5357,7 @@ dependencies = [
"num-traits",
"paste",
"profiling",
"rand 0.9.2",
"rand 0.9.4",
"rand_chacha 0.9.0",
"simd_helpers",
"thiserror 2.0.18",
......@@ -5390,9 +5388,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
[[package]]
name = "rayon"
version = "1.11.0"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d"
dependencies = [
"either",
"rayon-core",
......@@ -5425,7 +5423,7 @@ version = "0.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
]
[[package]]
......@@ -5434,7 +5432,7 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f450ad9c3b1da563fb6948a8e0fb0fb9269711c9c73d9ea1de5058c79c8d643a"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
]
[[package]]
......@@ -5499,9 +5497,9 @@ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]]
name = "reqwest"
version = "0.12.24"
version = "0.12.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f"
checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
dependencies = [
"base64 0.22.1",
"bytes",
......@@ -5589,7 +5587,7 @@ version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4147b952f3f819eca0e99527022f7d6a8d05f111aeb0a62960c74eb283bec8fc"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"once_cell",
"serde",
"serde_derive",
......@@ -5668,7 +5666,7 @@ version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"errno",
"libc",
"linux-raw-sys 0.4.15",
......@@ -5681,25 +5679,25 @@ version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"errno",
"libc",
"linux-raw-sys 0.12.1",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
name = "rustls"
version = "0.23.37"
version = "0.23.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4"
checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21"
dependencies = [
"aws-lc-rs",
"log",
"once_cell",
"ring",
"rustls-pki-types",
"rustls-webpki 0.103.11",
"rustls-webpki 0.103.12",
"subtle",
"zeroize",
]
......@@ -5760,9 +5758,9 @@ dependencies = [
[[package]]
name = "rustls-webpki"
version = "0.103.11"
version = "0.103.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20a6af516fea4b20eccceaf166e8aa666ac996208e8a644ce3ef5aa783bc7cd4"
checksum = "8279bb85272c9f10811ae6a6c547ff594d6a7f3c6c6b02ee9726d1d0dcfcdd06"
dependencies = [
"aws-lc-rs",
"ring",
......@@ -5915,7 +5913,7 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"core-foundation 0.9.4",
"core-foundation-sys",
"libc",
......@@ -5928,7 +5926,7 @@ version = "3.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"core-foundation 0.10.1",
"core-foundation-sys",
"libc",
......@@ -6252,9 +6250,9 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "socket2"
version = "0.5.8"
version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8"
checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
dependencies = [
"libc",
"windows-sys 0.52.0",
......@@ -6394,7 +6392,7 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"core-foundation 0.9.4",
"system-configuration-sys",
]
......@@ -6438,7 +6436,7 @@ dependencies = [
"getrandom 0.4.2",
"once_cell",
"rustix 1.1.4",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -6628,7 +6626,7 @@ dependencies = [
"monostate",
"onig",
"paste",
"rand 0.9.2",
"rand 0.9.4",
"rayon",
"rayon-cond",
"regex",
......@@ -6842,7 +6840,7 @@ dependencies = [
"percent-encoding",
"pin-project",
"prost 0.13.5",
"socket2 0.5.8",
"socket2 0.5.10",
"tokio",
"tokio-stream",
"tower",
......@@ -6960,7 +6958,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
dependencies = [
"base64 0.22.1",
"bitflags 2.11.0",
"bitflags 2.11.1",
"bytes",
"futures-util",
"http",
......@@ -7607,7 +7605,7 @@ version = "0.244.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
dependencies = [
"bitflags 2.11.0",
"bitflags 2.11.1",
"hashbrown 0.15.5",
"indexmap 2.14.0",
"semver",
......@@ -7679,7 +7677,7 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -8065,7 +8063,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
dependencies = [
"anyhow",
"bitflags 2.11.0",
"bitflags 2.11.1",
"indexmap 2.14.0",
"log",
"serde",
......
......@@ -30,7 +30,6 @@ async-trait = { workspace = true }
dashmap = { workspace = true }
ordered-float = { workspace = true }
derive_builder = { workspace = true }
derive-getters = { workspace = true }
prometheus = { workspace = true, optional = true }
rand = { workspace = true }
serde = { workspace = true }
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use rustc_hash::FxHashSet;
use crate::protocols::WorkerWithDpRank;
#[inline]
pub(crate) fn reconcile_active_workers(
active: &mut FxHashSet<WorkerWithDpRank>,
next: &FxHashSet<WorkerWithDpRank>,
mut on_drop: impl FnMut(WorkerWithDpRank),
) {
let active_count = active.len();
let next_count = next.len();
if next_count == active_count {
return;
}
if next_count < active_count && next.iter().all(|worker| active.contains(worker)) {
for &worker in active.iter() {
if !next.contains(&worker) {
on_drop(worker);
}
}
active.clone_from(next);
return;
}
active.retain(|worker| {
if next.contains(worker) {
true
} else {
on_drop(*worker);
false
}
});
}
......@@ -33,6 +33,7 @@ use std::collections::VecDeque;
use std::sync::atomic::{AtomicUsize, Ordering};
use super::{EventKind, KvIndexerMetrics, SyncIndexer, WorkerTask};
use crate::active_set::reconcile_active_workers;
use crate::protocols::*;
/// Thread-safe shared reference to a Block.
......@@ -236,16 +237,8 @@ impl ConcurrentRadixTree {
let child_count = guard.workers.len();
if child_count != active_count {
// Workers changed: either dropped out (child < active) or
// stale entries exist (child > active). In both cases,
// retain only workers present in the child, scoring dropouts.
active.retain(|w| {
if guard.workers.contains(w) {
true
} else {
scores.scores.insert(*w, matched_depth);
false
}
reconcile_active_workers(&mut active, &guard.workers, |worker| {
scores.scores.insert(worker, matched_depth);
});
active_count = active.len();
......
......@@ -26,6 +26,7 @@ use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use super::{EventKind, KvIndexerMetrics, SyncIndexer, WorkerTask};
use crate::active_set::reconcile_active_workers;
use crate::protocols::{
DpRank, ExternalSequenceBlockHash, KvCacheEvent, KvCacheEventData, KvCacheEventError,
KvCacheStoreData, KvCacheStoredBlockData, LocalBlockHash, OverlapScores, RouterEvent, WorkerId,
......@@ -471,18 +472,6 @@ impl PositionalIndexer {
// -----------------------------------------------------------------------------
impl PositionalIndexer {
/// Score all active workers at the given position and clear the active set.
#[inline]
fn drain_active(
active: &mut FxHashSet<WorkerWithDpRank>,
scores: &mut OverlapScores,
pos: usize,
) {
for worker in active.drain() {
scores.scores.insert(worker, pos as u32);
}
}
/// Compute sequence hash incrementally from previous hash and current local hash.
#[inline]
fn compute_next_seq_hash(prev_seq_hash: u64, current_local_hash: u64) -> u64 {
......@@ -581,24 +570,23 @@ impl PositionalIndexer {
}
let Some(entry) = self.index.get(&(pos, sequence[pos])) else {
Self::drain_active(active, scores, pos);
for worker in active.drain() {
scores.scores.insert(worker, pos as u32);
}
break;
};
Self::ensure_seq_hash_computed(seq_hashes, pos, sequence);
let Some(workers) = entry.get(seq_hashes[pos]) else {
Self::drain_active(active, scores, pos);
for worker in active.drain() {
scores.scores.insert(worker, pos as u32);
}
break;
};
if workers.len() < active.len() {
active.retain(|w| {
if workers.contains(w) {
true
} else {
scores.scores.insert(*w, pos as u32);
false
}
if workers.len() != active.len() {
reconcile_active_workers(active, workers, |worker| {
scores.scores.insert(worker, pos as u32);
});
}
......
......@@ -23,6 +23,7 @@ use std::{
use rustc_hash::{FxHashMap, FxHashSet};
use crate::active_set::reconcile_active_workers;
use crate::protocols::*;
/// A shared reference to a [`RadixBlock`].
......@@ -251,26 +252,9 @@ impl RadixTree {
let borrow = block.borrow();
let child_count = borrow.workers.len();
if child_count < active_count {
// Workers dropped out. Record scores for those that left.
// Score = matched_depth (number of nodes they were present at).
for worker in &active {
if !borrow.workers.contains(worker) {
scores.scores.insert(*worker, matched_depth);
}
}
active.clone_from(&borrow.workers);
active_count = child_count;
} else if child_count > active_count {
// Stale entries: child retains workers already removed from
// an ancestor. Fall back to full membership check.
active.retain(|w| {
if borrow.workers.contains(w) {
true
} else {
scores.scores.insert(*w, matched_depth);
false
}
if child_count != active_count {
reconcile_active_workers(&mut active, &borrow.workers, |worker| {
scores.scores.insert(worker, matched_depth);
});
active_count = active.len();
}
......
......@@ -6,6 +6,8 @@
//! This crate provides the core radix tree implementation and protocols for
//! efficient KV cache lookup and routing in distributed LLM inference systems.
mod active_set;
pub mod indexer;
pub mod protocols;
pub mod recovery;
......
......@@ -5,7 +5,8 @@ use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::{mpsc, watch};
use rustc_hash::{FxHashMap, FxHashSet};
use tokio::sync::watch;
use tokio::time::Instant;
use tokio_util::sync::CancellationToken;
......@@ -27,7 +28,6 @@ where
S: SchedulingPolicy,
Sel: WorkerSelector<C>,
{
request_tx: mpsc::Sender<SchedulingRequest>,
slots: Arc<ActiveSequencesMultiWorker<P>>,
queue: Arc<SchedulerQueue<P, C, S, Sel>>,
queue_updates: watch::Sender<()>,
......@@ -109,9 +109,8 @@ where
prefill_load_estimator,
));
let (queue_updates, _) = watch::channel(());
let (request_tx, request_rx) = mpsc::channel::<SchedulingRequest>(1024);
let queue_clone = Arc::clone(&queue);
let queue_remote_updates = Arc::clone(&queue);
let queue_periodic_updates = Arc::clone(&queue);
let mut remote_state_updates = slots.subscribe_remote_state_changes();
let remote_update_cancel_token = cancellation_token.clone();
let queue_updates_remote = queue_updates.clone();
......@@ -138,33 +137,23 @@ where
});
tokio::spawn(async move {
let mut request_rx = request_rx;
let mut recheck_interval = tokio::time::interval(recheck_interval);
tracing::trace!("LocalScheduler background task started");
tracing::trace!("LocalScheduler periodic queue update task started");
loop {
tokio::select! {
_ = cancellation_token.cancelled() => {
tracing::trace!("LocalScheduler background task shutting down");
tracing::trace!("LocalScheduler periodic queue update task shutting down");
break;
}
request = request_rx.recv() => {
let Some(request) = request else {
tracing::warn!("LocalScheduler request channel closed");
break;
};
tracing::trace!("received request to be scheduled");
queue_clone.enqueue(request).await;
}
_ = recheck_interval.tick() => {
queue_clone.update().await;
queue_periodic_updates.update().await;
}
}
}
});
Self {
request_tx,
slots,
queue,
queue_updates,
......@@ -197,8 +186,8 @@ where
token_seq,
isl_tokens,
overlaps,
decode_blocks: HashMap::new(),
prefill_tokens: HashMap::new(),
decode_blocks: FxHashMap::default(),
prefill_tokens: FxHashMap::default(),
track_prefill_tokens,
router_config_override: router_config_override.cloned(),
update_states,
......@@ -210,10 +199,7 @@ where
resp_tx: Some(resp_tx),
};
self.request_tx
.send(request)
.await
.map_err(|_| KvSchedulerError::SubscriberShutdown)?;
self.queue.enqueue(request).await;
resp_rx
.await
......@@ -284,7 +270,7 @@ where
decay_now,
);
let mut workers: HashSet<WorkerWithDpRank> = HashSet::new();
let mut workers: FxHashSet<WorkerWithDpRank> = FxHashSet::default();
workers.extend(decode_blocks.keys().copied());
workers.extend(prefill_tokens.keys().copied());
......
......@@ -3,10 +3,9 @@
use std::time::Duration;
use ordered_float::OrderedFloat;
use super::config::RouterQueuePolicy;
use super::types::SchedulingRequest;
use ordered_float::OrderedFloat;
/// Pluggable scheduling policy that determines queue ordering.
/// Monomorphized for zero-cost inlining on the hot comparison path.
///
......@@ -115,8 +114,6 @@ impl SchedulingPolicy for RouterSchedulingPolicy {
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use rustc_hash::FxHashMap;
use super::*;
......@@ -132,8 +129,8 @@ mod tests {
token_seq: None,
isl_tokens,
overlaps,
decode_blocks: HashMap::new(),
prefill_tokens: HashMap::new(),
decode_blocks: FxHashMap::default(),
prefill_tokens: FxHashMap::default(),
track_prefill_tokens: true,
router_config_override: None,
update_states: false,
......
......@@ -57,6 +57,8 @@ pub struct SchedulerQueue<
Sel: WorkerSelector<C> = DefaultWorkerSelector,
> {
pending: Mutex<BinaryHeap<QueueEntry<S::Key>>>,
/// Serializes admission so worker selection always sees prior bookings.
admission_gate: Mutex<()>,
/// Number of requests currently parked in the pending queue.
/// Incremented after push, decremented after pop. Lock-free reads via `Relaxed` load.
pending_count: AtomicUsize,
......@@ -96,6 +98,7 @@ impl<
}
Self {
pending: Mutex::new(BinaryHeap::new()),
admission_gate: Mutex::new(()),
pending_count: AtomicUsize::new(0),
pending_isl_tokens: AtomicUsize::new(0),
slots,
......@@ -145,17 +148,19 @@ impl<
return;
}
let _admission = self.admission_gate.lock().await;
let decay_now = Instant::now();
let Some(threshold) = self.threshold_frac else {
self.schedule(request, Instant::now()).await;
self.admit_one(request, decay_now).await;
return;
};
if request.bypass_capacity_check() {
self.schedule(request, Instant::now()).await;
self.admit_one(request, decay_now).await;
return;
}
let decay_now = Instant::now();
if self.all_workers_busy(
threshold,
request.allowed_worker_ids.as_ref(),
......@@ -171,7 +176,7 @@ impl<
self.pending_isl_tokens
.fetch_add(isl_tokens, AtomicOrdering::Relaxed);
} else {
self.schedule(request, decay_now).await;
self.admit_one(request, decay_now).await;
}
}
......@@ -198,6 +203,7 @@ impl<
}
loop {
let _admission = self.admission_gate.lock().await;
let decay_now = Instant::now();
let mut heap = self.pending.lock().await;
let Some(front) = heap.peek() else {
......@@ -221,13 +227,13 @@ impl<
self.pending_isl_tokens
.fetch_sub(entry.request.isl_tokens, AtomicOrdering::Relaxed);
tracing::debug!("scheduling request from pending queue");
self.schedule(entry.request, decay_now).await;
self.admit_one(entry.request, decay_now).await;
}
}
/// Run the full scheduling pipeline for a single request:
/// compute potential load -> select worker -> respond -> book via add_request.
async fn schedule(&self, mut request: SchedulingRequest, decay_now: Instant) {
async fn admit_one(&self, mut request: SchedulingRequest, decay_now: Instant) {
let (decode_blocks, prefill_tokens) = self
.slots
.potential_blocks_and_tokens_with_prefill_tracking(
......@@ -396,17 +402,18 @@ impl<
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use std::sync::Arc;
use std::sync::{Arc, Condvar, Mutex as StdMutex};
use std::time::Duration;
use tokio::sync::watch;
use rustc_hash::FxHashMap;
use tokio::sync::{Barrier, watch};
use super::*;
use crate::protocols::OverlapScores;
use crate::protocols::{OverlapScores, WorkerSelectionResult, WorkerWithDpRank};
use crate::scheduling::types::KvSchedulerError;
use crate::selector::DefaultWorkerSelector;
use crate::sequences::ActiveSequencesMultiWorker;
use crate::test_utils::{NoopSequencePublisher, SimpleWorkerConfig};
use crate::{DefaultWorkerSelector, WorkerSelector};
fn decay_now() -> Instant {
Instant::now()
......@@ -427,6 +434,77 @@ mod tests {
}
}
#[derive(Default)]
struct SelectorRendezvous {
arrivals: StdMutex<usize>,
cv: Condvar,
}
impl SelectorRendezvous {
fn wait_for_peer(&self) {
let mut arrivals = self.arrivals.lock().unwrap();
*arrivals += 1;
if *arrivals == 1 {
let _ = self
.cv
.wait_timeout(arrivals, Duration::from_millis(100))
.unwrap();
return;
}
self.cv.notify_all();
}
}
#[derive(Clone)]
struct MinDecodeSelector {
rendezvous: Option<Arc<SelectorRendezvous>>,
}
impl WorkerSelector<SimpleWorkerConfig> for MinDecodeSelector {
fn select_worker(
&self,
workers: &HashMap<WorkerId, SimpleWorkerConfig>,
request: &SchedulingRequest,
block_size: u32,
) -> Result<WorkerSelectionResult, KvSchedulerError> {
if let Some(rendezvous) = &self.rendezvous {
rendezvous.wait_for_peer();
}
let Some(worker) = workers
.iter()
.flat_map(|(worker_id, config)| {
let dp_start = config.data_parallel_start_rank();
let dp_end = dp_start + config.data_parallel_size();
(dp_start..dp_end)
.map(move |dp_rank| WorkerWithDpRank::new(*worker_id, dp_rank))
})
.min_by_key(|worker| {
(
request
.prefill_tokens
.get(worker)
.copied()
.unwrap_or(request.isl_tokens),
request.decode_blocks.get(worker).copied().unwrap_or(0),
worker.worker_id,
worker.dp_rank,
)
})
else {
return Err(KvSchedulerError::NoEndpoints);
};
Ok(WorkerSelectionResult {
worker,
required_blocks: request.isl_tokens.div_ceil(block_size as usize) as u64,
overlap_blocks: request.overlaps.scores.get(&worker).copied().unwrap_or(0),
})
}
}
fn make_queue(
num_workers: usize,
block_size: u32,
......@@ -441,6 +519,53 @@ mod tests {
(queue, slots)
}
#[allow(clippy::type_complexity)]
fn make_queue_with_custom_selector<Sel: WorkerSelector<SimpleWorkerConfig>>(
num_workers: usize,
block_size: u32,
isl: usize,
threshold_frac: Option<f64>,
selector: Sel,
) -> (
Arc<SchedulerQueue<NoopSequencePublisher, SimpleWorkerConfig, FcfsPolicy, Sel>>,
Arc<ActiveSequencesMultiWorker<NoopSequencePublisher>>,
) {
let dp_range: HashMap<u64, (u32, u32)> =
(0..num_workers as u64).map(|id| (id, (0, 1))).collect();
let slots = Arc::new(ActiveSequencesMultiWorker::new(
NoopSequencePublisher,
block_size as usize,
dp_range,
false,
0,
"test",
));
let mut configs: HashMap<u64, SimpleWorkerConfig> = HashMap::new();
for id in 0..num_workers as u64 {
configs.insert(
id,
SimpleWorkerConfig {
max_num_batched_tokens: Some(isl as u64),
..Default::default()
},
);
}
let (_cfg_tx, cfg_rx) = watch::channel(configs);
let queue = Arc::new(SchedulerQueue::new(
Arc::clone(&slots),
cfg_rx,
threshold_frac,
block_size,
selector,
FcfsPolicy,
None,
));
(queue, slots)
}
#[allow(clippy::type_complexity)]
fn make_queue_with_sender(
num_workers: usize,
......@@ -505,8 +630,8 @@ mod tests {
token_seq: None,
isl_tokens,
overlaps: OverlapScores::default(),
decode_blocks: HashMap::new(),
prefill_tokens: HashMap::new(),
decode_blocks: FxHashMap::default(),
prefill_tokens: FxHashMap::default(),
track_prefill_tokens: true,
router_config_override: None,
update_states: true,
......@@ -560,6 +685,49 @@ mod tests {
}
}
#[tokio::test(flavor = "multi_thread")]
async fn test_concurrent_immediate_admissions_see_prior_booking() {
let selector = MinDecodeSelector {
rendezvous: Some(Arc::new(SelectorRendezvous::default())),
};
let (queue, slots) = make_queue_with_custom_selector(2, 16, 512, None, selector);
let barrier = Arc::new(Barrier::new(3));
let (req1, rx1) = make_request("req-1", 512);
let queue1 = Arc::clone(&queue);
let barrier1 = Arc::clone(&barrier);
let handle1 = tokio::spawn(async move {
barrier1.wait().await;
queue1.enqueue(req1).await;
});
let (req2, rx2) = make_request("req-2", 512);
let queue2 = Arc::clone(&queue);
let barrier2 = Arc::clone(&barrier);
let handle2 = tokio::spawn(async move {
barrier2.wait().await;
queue2.enqueue(req2).await;
});
barrier.wait().await;
handle1.await.unwrap();
handle2.await.unwrap();
let resp1 = rx1.await.unwrap().unwrap();
let resp2 = rx2.await.unwrap().unwrap();
assert_ne!(
resp1.best_worker, resp2.best_worker,
"second admission should see the first booking and choose the other idle worker"
);
for request_id in ["req-1", "req-2"] {
slots
.mark_prefill_completed(&request_id.to_string(), decay_now())
.unwrap();
slots.free(&request_id.to_string(), decay_now()).unwrap();
}
}
#[tokio::test(flavor = "multi_thread")]
async fn test_queueing_under_pressure() {
let block_size = 16;
......@@ -853,8 +1021,8 @@ mod tests {
token_seq: None,
isl_tokens: isl,
overlaps: OverlapScores::default(),
decode_blocks: HashMap::new(),
prefill_tokens: HashMap::new(),
decode_blocks: FxHashMap::default(),
prefill_tokens: FxHashMap::default(),
track_prefill_tokens: true,
router_config_override: None,
update_states: true,
......
......@@ -4,6 +4,7 @@
use std::collections::HashMap;
use rand::Rng;
use rustc_hash::FxHashMap;
use super::config::KvRouterConfig;
use super::types::{KvSchedulerError, SchedulingRequest, pinned_worker_config};
......@@ -24,7 +25,7 @@ pub trait WorkerSelector<C: WorkerConfigLike> {
/// Helper function for softmax sampling.
/// Returns the selected worker and its logit.
fn softmax_sample(
logits: &HashMap<WorkerWithDpRank, f64>,
logits: &FxHashMap<WorkerWithDpRank, f64>,
temperature: f64,
) -> (WorkerWithDpRank, f64) {
let mut rng = rand::rng();
......@@ -32,7 +33,7 @@ fn softmax_sample(
}
fn softmax_sample_with_sample(
logits: &HashMap<WorkerWithDpRank, f64>,
logits: &FxHashMap<WorkerWithDpRank, f64>,
temperature: f64,
sample: f64,
) -> (WorkerWithDpRank, f64) {
......@@ -260,7 +261,7 @@ impl<C: WorkerConfigLike> WorkerSelector<C> for DefaultWorkerSelector {
(min_workers[0], min_score)
}
} else {
let mut worker_logits = HashMap::new();
let mut worker_logits = FxHashMap::default();
for worker in worker_iter {
let score = get_score(worker);
worker_logits.insert(worker, score);
......@@ -324,7 +325,7 @@ mod tests {
#[test]
fn test_softmax_sample_single_key() {
let mut logits = HashMap::new();
let mut logits = FxHashMap::default();
let worker = WorkerWithDpRank::from_worker_id(42);
for (logit, temperature) in [
(0.5, 0.1),
......@@ -346,7 +347,7 @@ mod tests {
#[test]
fn test_softmax_sample_zero_temperature() {
let mut logits = HashMap::new();
let mut logits = FxHashMap::default();
let worker1 = WorkerWithDpRank::from_worker_id(1);
let worker2 = WorkerWithDpRank::from_worker_id(2);
let worker3 = WorkerWithDpRank::from_worker_id(3);
......@@ -403,7 +404,7 @@ mod tests {
let worker2 = WorkerWithDpRank::from_worker_id(2);
let worker3 = WorkerWithDpRank::from_worker_id(3);
let logits = HashMap::from([(worker1, 0.0), (worker2, 3.0), (worker3, 9.0)]);
let logits = FxHashMap::from_iter([(worker1, 0.0), (worker2, 3.0), (worker3, 9.0)]);
let entries: Vec<_> = logits
.iter()
.map(|(worker, logit)| (*worker, *logit))
......
......@@ -4,6 +4,7 @@
use std::collections::{HashMap, HashSet};
use dynamo_tokens::SequenceHash;
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use super::config::RouterConfigOverride;
......@@ -43,8 +44,8 @@ pub struct SchedulingRequest {
pub token_seq: Option<Vec<SequenceHash>>,
pub isl_tokens: usize,
pub overlaps: OverlapScores,
pub decode_blocks: HashMap<WorkerWithDpRank, usize>,
pub prefill_tokens: HashMap<WorkerWithDpRank, usize>,
pub decode_blocks: FxHashMap<WorkerWithDpRank, usize>,
pub prefill_tokens: FxHashMap<WorkerWithDpRank, usize>,
pub track_prefill_tokens: bool,
pub router_config_override: Option<RouterConfigOverride>,
pub update_states: bool,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment