Unverified Commit 3f99cf21 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore: move ActiveSequences to kv-router and add unit tests (#6600)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
Co-authored-by: default avatarClaude Opus 4.6 <noreply@anthropic.com>
parent 4c648b11
...@@ -1880,6 +1880,7 @@ dependencies = [ ...@@ -1880,6 +1880,7 @@ dependencies = [
"async-trait", "async-trait",
"clap 4.5.60", "clap 4.5.60",
"dashmap 6.1.0", "dashmap 6.1.0",
"derive-getters",
"dynamo-bench", "dynamo-bench",
"dynamo-mocker", "dynamo-mocker",
"dynamo-runtime", "dynamo-runtime",
......
...@@ -788,9 +788,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" ...@@ -788,9 +788,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
[[package]] [[package]]
name = "chrono" name = "chrono"
version = "0.4.43" version = "0.4.44"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
dependencies = [ dependencies = [
"iana-time-zone", "iana-time-zone",
"js-sys", "js-sys",
...@@ -1106,9 +1106,9 @@ dependencies = [ ...@@ -1106,9 +1106,9 @@ dependencies = [
[[package]] [[package]]
name = "cudarc" name = "cudarc"
version = "0.19.2" version = "0.19.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aed81f178e780f3d5d354d12b4c5c5a484c4a9c329ecd037ac57f2a0e0648397" checksum = "6468cb7fa330840f3ebcd8df51edc0e7bf5c18df524792ce6004c6821851cdf3"
dependencies = [ dependencies = [
"libloading 0.9.0", "libloading 0.9.0",
] ]
...@@ -1270,9 +1270,9 @@ dependencies = [ ...@@ -1270,9 +1270,9 @@ dependencies = [
[[package]] [[package]]
name = "deranged" name = "deranged"
version = "0.5.6" version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4" checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c"
dependencies = [ dependencies = [
"powerfmt", "powerfmt",
"serde_core", "serde_core",
...@@ -1520,6 +1520,7 @@ dependencies = [ ...@@ -1520,6 +1520,7 @@ dependencies = [
"anyhow", "anyhow",
"async-trait", "async-trait",
"dashmap 6.1.0", "dashmap 6.1.0",
"derive-getters",
"dynamo-runtime", "dynamo-runtime",
"dynamo-tokens", "dynamo-tokens",
"flume", "flume",
...@@ -1532,6 +1533,7 @@ dependencies = [ ...@@ -1532,6 +1533,7 @@ dependencies = [
"tokio", "tokio",
"tokio-util", "tokio-util",
"tracing", "tracing",
"uuid",
"xxhash-rust", "xxhash-rust",
] ]
...@@ -2960,9 +2962,9 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" ...@@ -2960,9 +2962,9 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
[[package]] [[package]]
name = "jiff" name = "jiff"
version = "0.2.20" version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c867c356cc096b33f4981825ab281ecba3db0acefe60329f044c1789d94c6543" checksum = "b3e3d65f018c6ae946ab16e80944b97096ed73c35b221d1c478a6c81d8f57940"
dependencies = [ dependencies = [
"jiff-static", "jiff-static",
"jiff-tzdb-platform", "jiff-tzdb-platform",
...@@ -2975,9 +2977,9 @@ dependencies = [ ...@@ -2975,9 +2977,9 @@ dependencies = [
[[package]] [[package]]
name = "jiff-static" name = "jiff-static"
version = "0.2.20" version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7946b4325269738f270bb55b3c19ab5c5040525f83fd625259422a9d25d9be5" checksum = "a17c2b211d863c7fde02cbea8a3c1a439b98e109286554f2860bdded7ff83818"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
...@@ -3011,9 +3013,9 @@ dependencies = [ ...@@ -3011,9 +3013,9 @@ dependencies = [
[[package]] [[package]]
name = "js-sys" name = "js-sys"
version = "0.3.87" version = "0.3.90"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93f0862381daaec758576dcc22eb7bbf4d7efd67328553f3b45a412a51a3fb21" checksum = "14dc6f6450b3f6d4ed5b16327f38fed626d375a886159ca555bd7822c0c3a5a6"
dependencies = [ dependencies = [
"once_cell", "once_cell",
"wasm-bindgen", "wasm-bindgen",
...@@ -3331,7 +3333,7 @@ checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" ...@@ -3331,7 +3333,7 @@ checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616"
dependencies = [ dependencies = [
"bitflags 2.11.0", "bitflags 2.11.0",
"libc", "libc",
"redox_syscall 0.7.1", "redox_syscall 0.7.2",
] ]
[[package]] [[package]]
...@@ -3342,9 +3344,9 @@ checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" ...@@ -3342,9 +3344,9 @@ checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
[[package]] [[package]]
name = "linux-raw-sys" name = "linux-raw-sys"
version = "0.11.0" version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
[[package]] [[package]]
name = "litemap" name = "litemap"
...@@ -4964,9 +4966,9 @@ dependencies = [ ...@@ -4964,9 +4966,9 @@ dependencies = [
[[package]] [[package]]
name = "pulldown-cmark" name = "pulldown-cmark"
version = "0.13.0" version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0" checksum = "83c41efbf8f90ac44de7f3a868f0867851d261b56291732d0cbf7cceaaeb55a6"
dependencies = [ dependencies = [
"bitflags 2.11.0", "bitflags 2.11.0",
"memchr", "memchr",
...@@ -5347,9 +5349,9 @@ dependencies = [ ...@@ -5347,9 +5349,9 @@ dependencies = [
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.7.1" version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35985aa610addc02e24fc232012c86fd11f14111180f902b67e2d5331f8ebf2b" checksum = "6d94dd2f7cd932d4dc02cc8b2b50dfd38bd079a4e5d79198b99743d7fcf9a4b4"
dependencies = [ dependencies = [
"bitflags 2.11.0", "bitflags 2.11.0",
] ]
...@@ -5410,9 +5412,9 @@ dependencies = [ ...@@ -5410,9 +5412,9 @@ dependencies = [
[[package]] [[package]]
name = "regex-syntax" name = "regex-syntax"
version = "0.8.9" version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]] [[package]]
name = "reqwest" name = "reqwest"
...@@ -5479,9 +5481,9 @@ dependencies = [ ...@@ -5479,9 +5481,9 @@ dependencies = [
[[package]] [[package]]
name = "rgb" name = "rgb"
version = "0.8.52" version = "0.8.53"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c6a884d2998352bb4daf0183589aec883f16a6da1f4dde84d8e2e9a5409a1ce" checksum = "47b34b781b31e5d73e9fbc8689c70551fd1ade9a19e3e28cfec8580a79290cc4"
[[package]] [[package]]
name = "ring" name = "ring"
...@@ -5610,22 +5612,22 @@ dependencies = [ ...@@ -5610,22 +5612,22 @@ dependencies = [
[[package]] [[package]]
name = "rustix" name = "rustix"
version = "1.1.3" version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
dependencies = [ dependencies = [
"bitflags 2.11.0", "bitflags 2.11.0",
"errno", "errno",
"libc", "libc",
"linux-raw-sys 0.11.0", "linux-raw-sys 0.12.1",
"windows-sys 0.61.2", "windows-sys 0.61.2",
] ]
[[package]] [[package]]
name = "rustls" name = "rustls"
version = "0.23.36" version = "0.23.37"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4"
dependencies = [ dependencies = [
"aws-lc-rs", "aws-lc-rs",
"log", "log",
...@@ -6031,9 +6033,9 @@ dependencies = [ ...@@ -6031,9 +6033,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_with" name = "serde_with"
version = "3.16.1" version = "3.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fa237f2807440d238e0364a218270b98f767a00d3dada77b1c53ae88940e2e7" checksum = "381b283ce7bc6b476d903296fb59d0d36633652b633b27f64db4fb46dcbfc3b9"
dependencies = [ dependencies = [
"base64 0.22.1", "base64 0.22.1",
"chrono", "chrono",
...@@ -6050,9 +6052,9 @@ dependencies = [ ...@@ -6050,9 +6052,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_with_macros" name = "serde_with_macros"
version = "3.16.1" version = "3.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52a8e3ca0ca629121f70ab50f95249e5a6f925cc0f6ffe8256c45b728875706c" checksum = "a6d4e30573c8cb306ed6ab1dca8423eec9a463ea0e155f45399455e0368b27e0"
dependencies = [ dependencies = [
"darling 0.21.3", "darling 0.21.3",
"proc-macro2", "proc-macro2",
...@@ -6361,14 +6363,14 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" ...@@ -6361,14 +6363,14 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
[[package]] [[package]]
name = "tempfile" name = "tempfile"
version = "3.25.0" version = "3.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0"
dependencies = [ dependencies = [
"fastrand", "fastrand",
"getrandom 0.4.1", "getrandom 0.4.1",
"once_cell", "once_cell",
"rustix 1.1.3", "rustix 1.1.4",
"windows-sys 0.61.2", "windows-sys 0.61.2",
] ]
...@@ -7418,9 +7420,9 @@ dependencies = [ ...@@ -7418,9 +7420,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen" name = "wasm-bindgen"
version = "0.2.110" version = "0.2.113"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1de241cdc66a9d91bd84f097039eb140cdc6eec47e0cdbaf9d932a1dd6c35866" checksum = "60722a937f594b7fde9adb894d7c092fc1bb6612897c46368d18e7a20208eff2"
dependencies = [ dependencies = [
"cfg-if 1.0.4", "cfg-if 1.0.4",
"once_cell", "once_cell",
...@@ -7431,9 +7433,9 @@ dependencies = [ ...@@ -7431,9 +7433,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-futures" name = "wasm-bindgen-futures"
version = "0.4.60" version = "0.4.63"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a42e96ea38f49b191e08a1bab66c7ffdba24b06f9995b39a9dd60222e5b6f1da" checksum = "8a89f4650b770e4521aa6573724e2aed4704372151bd0de9d16a3bbabb87441a"
dependencies = [ dependencies = [
"cfg-if 1.0.4", "cfg-if 1.0.4",
"futures-util", "futures-util",
...@@ -7445,9 +7447,9 @@ dependencies = [ ...@@ -7445,9 +7447,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-macro" name = "wasm-bindgen-macro"
version = "0.2.110" version = "0.2.113"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e12fdf6649048f2e3de6d7d5ff3ced779cdedee0e0baffd7dff5cdfa3abc8a52" checksum = "0fac8c6395094b6b91c4af293f4c79371c163f9a6f56184d2c9a85f5a95f3950"
dependencies = [ dependencies = [
"quote", "quote",
"wasm-bindgen-macro-support", "wasm-bindgen-macro-support",
...@@ -7455,9 +7457,9 @@ dependencies = [ ...@@ -7455,9 +7457,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-macro-support" name = "wasm-bindgen-macro-support"
version = "0.2.110" version = "0.2.113"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e63d1795c565ac3462334c1e396fd46dbf481c40f51f5072c310717bc4fb309" checksum = "ab3fabce6159dc20728033842636887e4877688ae94382766e00b180abac9d60"
dependencies = [ dependencies = [
"bumpalo", "bumpalo",
"proc-macro2", "proc-macro2",
...@@ -7468,9 +7470,9 @@ dependencies = [ ...@@ -7468,9 +7470,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-shared" name = "wasm-bindgen-shared"
version = "0.2.110" version = "0.2.113"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9f9cdac23a5ce71f6bf9f8824898a501e511892791ea2a0c6b8568c68b9cb53" checksum = "de0e091bdb824da87dc01d967388880d017a0a9bc4f3bdc0d86ee9f9336e3bb5"
dependencies = [ dependencies = [
"unicode-ident", "unicode-ident",
] ]
...@@ -7524,9 +7526,9 @@ dependencies = [ ...@@ -7524,9 +7526,9 @@ dependencies = [
[[package]] [[package]]
name = "web-sys" name = "web-sys"
version = "0.3.87" version = "0.3.90"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2c7c5718134e770ee62af3b6b4a84518ec10101aad610c024b64d6ff29bb1ff" checksum = "705eceb4ce901230f8625bd1d665128056ccbe4b7408faa625eec1ba80f59a97"
dependencies = [ dependencies = [
"js-sys", "js-sys",
"wasm-bindgen", "wasm-bindgen",
......
...@@ -1528,6 +1528,7 @@ dependencies = [ ...@@ -1528,6 +1528,7 @@ dependencies = [
"anyhow", "anyhow",
"async-trait", "async-trait",
"dashmap 6.1.0", "dashmap 6.1.0",
"derive-getters",
"dynamo-runtime", "dynamo-runtime",
"dynamo-tokens", "dynamo-tokens",
"flume", "flume",
...@@ -1540,6 +1541,7 @@ dependencies = [ ...@@ -1540,6 +1541,7 @@ dependencies = [
"tokio", "tokio",
"tokio-util", "tokio-util",
"tracing", "tracing",
"uuid",
"xxhash-rust", "xxhash-rust",
] ]
......
...@@ -13,7 +13,7 @@ repository.workspace = true ...@@ -13,7 +13,7 @@ repository.workspace = true
[features] [features]
default = [] default = []
metrics = ["dep:dynamo-runtime"] metrics = ["dep:dynamo-runtime"]
bench = ["dep:clap", "dep:indicatif", "dep:serde_json", "dynamo-runtime/integration", "dep:uuid", "dep:plotters"] bench = ["dep:clap", "dep:indicatif", "dep:serde_json", "dynamo-runtime/integration", "dep:plotters"]
[dependencies] [dependencies]
# repo # repo
...@@ -24,6 +24,7 @@ dynamo-tokens = { workspace = true } ...@@ -24,6 +24,7 @@ dynamo-tokens = { workspace = true }
anyhow = { workspace = true } anyhow = { workspace = true }
async-trait = { workspace = true } async-trait = { workspace = true }
dashmap = { workspace = true } dashmap = { workspace = true }
derive-getters = { workspace = true }
prometheus = { workspace = true } prometheus = { workspace = true }
rand = { workspace = true } rand = { workspace = true }
serde = { workspace = true } serde = { workspace = true }
...@@ -32,6 +33,7 @@ thiserror = { workspace = true } ...@@ -32,6 +33,7 @@ thiserror = { workspace = true }
tokio = { workspace = true } tokio = { workspace = true }
tokio-util = { workspace = true } tokio-util = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }
uuid = { workspace = true }
xxhash-rust = { workspace = true } xxhash-rust = { workspace = true }
# dependencies # dependencies
...@@ -41,7 +43,6 @@ parking_lot = { workspace = true } ...@@ -41,7 +43,6 @@ parking_lot = { workspace = true }
# bench (optional) # bench (optional)
clap = { version = "4.5", features = ["derive"], optional = true } clap = { version = "4.5", features = ["derive"], optional = true }
indicatif = { version = "0.18.0", optional = true } indicatif = { version = "0.18.0", optional = true }
uuid = { workspace = true, optional = true }
plotters = { version = "0.3", optional = true, default-features = false, features = ["svg_backend", "line_series", "point_series", "full_palette"] } plotters = { version = "0.3", optional = true, default-features = false, features = ["svg_backend", "line_series", "point_series", "full_palette"] }
rustc-hash = "2.1.1" rustc-hash = "2.1.1"
......
...@@ -36,12 +36,12 @@ use std::time::Instant; ...@@ -36,12 +36,12 @@ use std::time::Instant;
use async_trait::async_trait; use async_trait::async_trait;
use dashmap::DashMap; use dashmap::DashMap;
use dynamo_runtime::error::DynamoError;
#[cfg(feature = "metrics")] #[cfg(feature = "metrics")]
pub use dynamo_runtime::protocols::maybe_error::MaybeError; pub use dynamo_runtime::protocols::maybe_error::MaybeError;
#[cfg(feature = "metrics")] #[cfg(feature = "metrics")]
use dynamo_runtime::{ use dynamo_runtime::{
component::Component, component::Component,
error::DynamoError,
metrics::{MetricsHierarchy, prometheus_names::kvrouter}, metrics::{MetricsHierarchy, prometheus_names::kvrouter},
}; };
use prometheus::{IntCounterVec, Opts}; use prometheus::{IntCounterVec, Opts};
...@@ -54,7 +54,7 @@ pub trait MaybeError { ...@@ -54,7 +54,7 @@ pub trait MaybeError {
/// Construct an instance from an error. /// Construct an instance from an error.
fn from_err(err: impl std::error::Error + 'static) -> Self; fn from_err(err: impl std::error::Error + 'static) -> Self;
/// Convert to an error instance if this represents an error. /// Convert to an error instance if this represents an error.
fn err(&self) -> Option<DynamoError>; fn err(&self) -> Option<Box<dyn std::error::Error + Send + Sync>>;
} }
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[cfg(feature = "metrics")] #[cfg(feature = "metrics")]
......
...@@ -11,11 +11,13 @@ pub mod approx; ...@@ -11,11 +11,13 @@ pub mod approx;
pub mod bench_utils; pub mod bench_utils;
pub mod concurrent_radix_tree; pub mod concurrent_radix_tree;
pub mod indexer; pub mod indexer;
pub mod multi_worker_sequence;
#[cfg(feature = "bench")] #[cfg(feature = "bench")]
pub mod naive_indexers; pub mod naive_indexers;
pub mod nested_map; pub mod nested_map;
pub mod protocols; pub mod protocols;
pub mod radix_tree; pub mod radix_tree;
pub mod sequence;
#[cfg(test)] #[cfg(test)]
pub(crate) mod test_utils; pub(crate) mod test_utils;
...@@ -23,6 +25,10 @@ pub(crate) mod test_utils; ...@@ -23,6 +25,10 @@ pub(crate) mod test_utils;
// Re-export key types for convenience // Re-export key types for convenience
pub use concurrent_radix_tree::ConcurrentRadixTree; pub use concurrent_radix_tree::ConcurrentRadixTree;
pub use indexer::{MaybeError, SyncIndexer, ThreadPoolIndexer}; pub use indexer::{MaybeError, SyncIndexer, ThreadPoolIndexer};
pub use multi_worker_sequence::{
ActiveSequencesMultiWorker, SequenceError, SequencePublisher, SequenceRequest,
SequenceSubscriber,
};
#[cfg(feature = "bench")] #[cfg(feature = "bench")]
pub use naive_indexers::{InvertedIndex, NaiveNestedMap}; pub use naive_indexers::{InvertedIndex, NaiveNestedMap};
pub use nested_map::PositionalIndexer; pub use nested_map::PositionalIndexer;
...@@ -31,3 +37,4 @@ pub use protocols::{ ...@@ -31,3 +37,4 @@ pub use protocols::{
compute_block_hash_for_seq, compute_block_hash_for_seq,
}; };
pub use radix_tree::RadixTree; pub use radix_tree::RadixTree;
pub use sequence::{ActiveSequences, RequestId};
This diff is collapsed.
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! KV Cache Sequence Management for LLM Inference
//!
//! This module provides efficient management of token sequences and their associated KV cache blocks
//! for distributed LLM inference. It implements a shared block system where multiple requests can
//! reuse the same KV cache blocks for common token prefixes, significantly reducing memory usage.
//!
//! # Key Components
//!
//! - [`ActiveSequences`]: Per-worker sequence manager that tracks active requests and their
//! token sequences, managing shared KV cache blocks efficiently.
//!
//! # Architecture
//!
//! The system uses a block-based approach where token sequences are divided into fixed-size blocks.
//! Each block is identified by a hash of its contents, allowing for deduplication when multiple
//! requests share common prefixes (e.g., system prompts, few-shot examples).
use derive_getters::Getters;
use dynamo_tokens::SequenceHash;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use std::time::Duration;
use tokio::time::Instant;
use uuid::Uuid;
/// Duration after which stale requests are forcibly expired (5 minutes)
const EXPIRY_DURATION: Duration = Duration::from_secs(300);
// TODO: use the common request_id if it exists in the repo
pub type RequestId = String;
/// A multi-request sequence manager that handles multiple active sequences with shared KV cache
#[derive(Debug, Getters)]
pub struct ActiveSequences {
active_seqs: HashMap<RequestId, Vec<(SequenceHash, Arc<()>)>>,
prefill_tokens: HashMap<RequestId, usize>,
/// Expected output tokens per request (used for resource estimation)
expected_output_tokens: HashMap<RequestId, u32>,
unique_blocks: HashMap<SequenceHash, std::sync::Weak<()>>,
/// Fractional block counts for blocks that are partially cached
/// When a block is in both unique_blocks and fractional_blocks,
/// it contributes the fractional value instead of 1 to active_blocks()
fractional_blocks: HashMap<SequenceHash, f64>,
#[getter(copy)]
block_size: usize,
#[getter(copy)]
active_tokens: usize,
/// Timer for when to force expiry of stale requests
expiry_timer: Instant,
/// Set of request IDs to check for expiry
expiry_requests: HashSet<RequestId>,
}
impl ActiveSequences {
/// Create a new SharedSequenceManager instance
pub fn new(block_size: usize) -> Self {
// TODO: make this not a hard req
assert!(block_size > 1, "block_size must be greater than 1");
Self {
active_seqs: HashMap::new(),
prefill_tokens: HashMap::new(),
expected_output_tokens: HashMap::new(),
unique_blocks: HashMap::new(),
fractional_blocks: HashMap::new(),
block_size,
active_tokens: 0,
expiry_timer: Instant::now() + EXPIRY_DURATION,
expiry_requests: HashSet::new(),
}
}
fn touch_block(&mut self, block: &SequenceHash) -> Arc<()> {
if let Some(weak) = self.unique_blocks.get(block)
&& let Some(rc) = weak.upgrade()
{
return rc;
}
let rc = Arc::new(());
self.unique_blocks.insert(*block, Arc::downgrade(&rc));
rc
}
fn try_remove_block(&mut self, block: &SequenceHash) {
if let Some(weak) = self.unique_blocks.get(block)
&& weak.strong_count() == 0
{
self.unique_blocks.remove(block);
self.fractional_blocks.remove(block);
}
}
pub fn active_blocks(&self) -> usize {
let mut count = self.unique_blocks.len() as f64;
for (hash, frac) in &self.fractional_blocks {
if self.unique_blocks.contains_key(hash) {
// Subtract 1 (the full block) and add the fractional value
count = count - 1.0 + frac;
}
}
count.round() as usize
}
/// Find all blocks in a request that have only a single strong reference (only used by this request)
/// and insert them into fractional_blocks with the given fraction value.
pub fn set_single_ref_blocks_as_fractional(&mut self, request_id: &RequestId, fraction: f64) {
let Some(blocks) = self.active_seqs.get(request_id) else {
tracing::warn!(
"Request {request_id} not found for set_single_ref_blocks_as_fractional"
);
return;
};
for (hash, rc) in blocks {
// A block with strong_count == 1 means only this request holds a reference
if Arc::strong_count(rc) == 1 {
self.fractional_blocks.insert(*hash, fraction);
}
}
}
/// Add a new request with its initial tokens
/// Returns the set of expired request IDs that were removed during cleanup
pub fn add_request(
&mut self,
request_id: RequestId,
token_sequence: Option<Vec<SequenceHash>>,
isl: usize,
overlap: u32,
expected_output_tokens: Option<u32>,
) -> HashSet<RequestId> {
// Check for double-add and log error, returning early
if self.active_seqs.contains_key(&request_id) {
tracing::error!("Request {request_id} is already active. Ignoring duplicate add.");
return HashSet::new();
}
// Lazily check and clean up expired requests, capturing removed IDs
let removed_requests = self.force_expiry();
let prefill_tokens = self.new_tokens(isl, overlap);
self.prefill_tokens
.insert(request_id.clone(), prefill_tokens);
self.active_tokens += prefill_tokens;
// Store expected output tokens if provided
if let Some(tokens) = expected_output_tokens {
self.expected_output_tokens
.insert(request_id.clone(), tokens);
}
if let Some(sequence) = token_sequence {
let sequence_with_refs: Vec<(SequenceHash, Arc<()>)> = sequence
.iter()
.map(|block| (*block, self.touch_block(block)))
.collect();
self.active_seqs
.insert(request_id.clone(), sequence_with_refs);
} else {
// dummy empty sequence
self.active_seqs.insert(request_id.clone(), Vec::new());
}
removed_requests
}
/// Mark prefill as completed for a request, removing it from prefill_tokens tracking
pub fn mark_prefill_completed(&mut self, request_id: &RequestId) {
if let Some(tokens) = self.prefill_tokens.remove(request_id) {
self.active_tokens = self
.active_tokens
.checked_sub(tokens)
.expect("active_tokens underflow");
}
}
pub fn new_tokens(&self, isl: usize, overlap: u32) -> usize {
let cached_tokens = (overlap as usize) * self.block_size;
isl.checked_sub(cached_tokens)
.unwrap_or_else(|| {
tracing::error!(
"prefill_tokens < 0 with ISL {isl} < cached_tokens {cached_tokens} (overlap {overlap} * block_size {}), returning 0",
self.block_size
);
0
})
}
pub fn potential_blocks_and_tokens(
&self,
token_sequence: Option<&[SequenceHash]>,
isl: usize,
overlap: u32,
) -> (usize, usize) {
let potential_blocks = if let Some(token_seq) = token_sequence {
self.new_blocks(token_seq) + self.active_blocks()
} else {
self.active_blocks()
};
let potential_tokens = self.new_tokens(isl, overlap) + self.active_tokens;
(potential_blocks, potential_tokens)
}
/// Match a request against existing blocks and return the number of new blocks that would be added
pub fn new_blocks(&self, token_sequence: &[SequenceHash]) -> usize {
token_sequence
.iter()
.filter(|block| !self.unique_blocks.contains_key(block))
.count()
}
/// Return the total number of blocks that would be used if the token sequence was added
/// This is the sum of new blocks that would be added plus the current active blocks
pub fn potential_blocks(&self, token_sequence: &[SequenceHash]) -> usize {
self.new_blocks(token_sequence) + self.active_blocks()
}
/// Free all blocks associated with a request
pub fn free(&mut self, request_id: &RequestId) -> usize {
self.mark_prefill_completed(request_id);
self.expiry_requests.remove(request_id);
// Remove expected output tokens tracking
self.expected_output_tokens.remove(request_id);
// Remove from active_seqs and get the token sequence
let token_seq = match self.active_seqs.remove(request_id) {
Some(seq) => seq,
None => {
tracing::warn!("Trying to free non-existent request {request_id}");
return self.active_blocks();
}
};
// Drop each Rc reference, then clean up the corresponding weak reference
for (block_hash, rc) in token_seq {
drop(rc);
self.try_remove_block(&block_hash);
}
self.active_blocks()
}
/// Add an output block with a random hash and optional fractional decay weight.
///
/// This is used during generation to track output blocks as they are created.
/// The decay_fraction (if provided) represents how "temporary" the block is:
/// - 1.0 means fully counted (early in generation)
/// - 0.0 means not counted (near end of expected output)
/// - Computed as: 1 - (current_osl / expected_output_tokens)
///
/// Returns true if the block was added, false if the request was not found.
pub fn add_output_block(
&mut self,
request_id: &RequestId,
decay_fraction: Option<f64>,
) -> bool {
// Check if request exists first (immutable borrow)
if !self.active_seqs.contains_key(request_id) {
tracing::warn!("Request {request_id} not found for add_output_block");
return false;
}
// Generate a random block hash using UUID
let random_hash: SequenceHash = Uuid::new_v4().as_u64_pair().0;
// Touch the block (adds to unique_blocks)
let rc = self.touch_block(&random_hash);
// Now we can safely get_mut and push
self.active_seqs
.get_mut(request_id)
.unwrap()
.push((random_hash, rc));
// Apply fractional decay to all single-ref blocks in this request if provided
if let Some(frac) = decay_fraction {
self.set_single_ref_blocks_as_fractional(request_id, frac);
}
true
}
/// Force expiry of stale requests if the timer has elapsed
/// Returns the set of expired request IDs that were removed
pub fn force_expiry(&mut self) -> HashSet<RequestId> {
let now = Instant::now();
// Early return if timer hasn't expired yet
if now < self.expiry_timer {
return HashSet::new();
}
// Process expired requests - drain to avoid clone
let expired_requests: HashSet<RequestId> = self.expiry_requests.drain().collect();
for request_id in &expired_requests {
tracing::warn!("Force expiring stale request: {}", request_id);
self.free(request_id);
}
self.expiry_timer = now + EXPIRY_DURATION;
self.expiry_requests = self.active_seqs.keys().cloned().collect();
expired_requests
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_active_sequences_shared_blocks() {
let block_size = 4;
let mut seq_manager = ActiveSequences::new(block_size);
seq_manager.add_request("request_1".to_string(), Some(vec![1, 2, 3]), 12, 0, None);
assert_eq!(seq_manager.active_blocks(), 3);
assert_eq!(seq_manager.active_tokens(), 12);
seq_manager.add_request("request_2".to_string(), Some(vec![4]), 4, 0, None);
assert_eq!(seq_manager.active_blocks(), 4);
assert_eq!(seq_manager.active_tokens(), 16);
seq_manager.add_request("request_3".to_string(), Some(vec![1, 2, 3, 4]), 16, 4, None);
assert_eq!(seq_manager.active_blocks(), 4);
assert_eq!(seq_manager.active_tokens(), 16);
seq_manager.free(&"request_2".to_string());
assert_eq!(seq_manager.active_blocks(), 4);
assert_eq!(seq_manager.active_tokens(), 12);
seq_manager.free(&"request_3".to_string());
assert_eq!(seq_manager.active_blocks(), 3);
assert_eq!(seq_manager.active_tokens(), 12);
seq_manager.free(&"request_1".to_string());
assert_eq!(seq_manager.active_blocks(), 0);
assert_eq!(seq_manager.active_tokens(), 0);
}
#[test]
fn test_output_blocks_with_fractional_decay() {
let block_size = 4;
let mut seq_manager = ActiveSequences::new(block_size);
// Add request with 3 prefill blocks
seq_manager.add_request("r1".to_string(), Some(vec![1, 2, 3]), 12, 0, None);
assert_eq!(seq_manager.active_blocks(), 3);
// Add output block with 0.5 decay fraction.
// This adds a random block and sets all single-ref blocks to 0.5.
assert!(seq_manager.add_output_block(&"r1".to_string(), Some(0.5)));
// 4 unique blocks, all single-ref → all fractional at 0.5
// active_blocks = 4 - 4 + 4*0.5 = 2
assert_eq!(seq_manager.active_blocks(), 2);
// Add second request sharing prefix [1, 2]
seq_manager.add_request("r2".to_string(), Some(vec![1, 2]), 8, 0, None);
// Blocks 1,2 now have strong_count=2 but still have fractional 0.5 from before
// No new unique blocks → active_blocks = 4 - 4 + 2.0 = 2
assert_eq!(seq_manager.active_blocks(), 2);
// Add another output block with 0.0 decay for r1.
// set_single_ref_blocks_as_fractional updates only single-ref blocks:
// blocks 1,2: strong_count=2, NOT updated (remain 0.5)
// block 3, old output, new output: strong_count=1, set to 0.0
// active_blocks = 5 - 5 + (0.5+0.5+0.0+0.0+0.0) = 1
assert!(seq_manager.add_output_block(&"r1".to_string(), Some(0.0)));
assert_eq!(seq_manager.active_blocks(), 1);
// Free both requests, verify clean state
seq_manager.free(&"r2".to_string());
seq_manager.free(&"r1".to_string());
assert_eq!(seq_manager.active_blocks(), 0);
assert_eq!(seq_manager.active_tokens(), 0);
}
#[test]
fn test_mark_prefill_completed() {
let block_size = 4;
let mut seq_manager = ActiveSequences::new(block_size);
// Add request with isl=12, overlap=0 → active_tokens=12
seq_manager.add_request("r1".to_string(), Some(vec![1, 2, 3]), 12, 0, None);
assert_eq!(seq_manager.active_tokens(), 12);
// Mark prefill completed → active_tokens drops to 0
seq_manager.mark_prefill_completed(&"r1".to_string());
assert_eq!(seq_manager.active_tokens(), 0);
// Double-mark: no panic, still 0
seq_manager.mark_prefill_completed(&"r1".to_string());
assert_eq!(seq_manager.active_tokens(), 0);
// Add second request with isl=8
seq_manager.add_request("r2".to_string(), Some(vec![4, 5]), 8, 0, None);
assert_eq!(seq_manager.active_tokens(), 8);
// Free it (internally calls mark_prefill_completed) → active_tokens=0
seq_manager.free(&"r2".to_string());
assert_eq!(seq_manager.active_tokens(), 0);
}
#[tokio::test(start_paused = true)]
async fn test_force_expiry() {
let block_size = 4;
let mut seq_manager = ActiveSequences::new(block_size);
// Add two requests
seq_manager.add_request("r1".to_string(), Some(vec![1, 2]), 8, 0, None);
seq_manager.add_request("r2".to_string(), Some(vec![3, 4]), 8, 0, None);
assert_eq!(seq_manager.active_blocks(), 4);
// First expiry cycle: advance past EXPIRY_DURATION.
// This populates expiry_requests with {r1, r2} but doesn't expire anything
// since expiry_requests started empty.
tokio::time::advance(Duration::from_secs(301)).await;
let expired = seq_manager.force_expiry();
assert!(expired.is_empty());
// Second expiry cycle: advance again so the timer expires.
// Adding r3 triggers force_expiry which drains {r1, r2}.
tokio::time::advance(Duration::from_secs(301)).await;
let expired = seq_manager.add_request("r3".to_string(), Some(vec![5]), 4, 0, None);
assert_eq!(expired, HashSet::from(["r1".to_string(), "r2".to_string()]));
// Only r3's block remains
assert_eq!(seq_manager.active_blocks(), 1);
assert_eq!(seq_manager.active_tokens(), 4);
}
}
...@@ -11,7 +11,7 @@ use tokio::sync::Mutex; ...@@ -11,7 +11,7 @@ use tokio::sync::Mutex;
use super::WorkerSelector; use super::WorkerSelector;
use super::protocols::WorkerWithDpRank; use super::protocols::WorkerWithDpRank;
use super::scheduler::{SchedulingRequest, SchedulingResponse}; use super::scheduler::{SchedulingRequest, SchedulingResponse};
use super::sequence::{ActiveSequencesMultiWorker, SequenceRequest}; use super::sequence::{ActiveSequencesMulti, SequenceRequest};
use crate::discovery::RuntimeConfigWatch; use crate::discovery::RuntimeConfigWatch;
/// Large default for max_num_batched_tokens when not configured (effectively disables queueing for that worker) /// Large default for max_num_batched_tokens when not configured (effectively disables queueing for that worker)
...@@ -51,7 +51,7 @@ impl PartialOrd for QueueEntry { ...@@ -51,7 +51,7 @@ impl PartialOrd for QueueEntry {
/// If queueing is disabled (threshold_frac is None), requests are scheduled immediately. /// If queueing is disabled (threshold_frac is None), requests are scheduled immediately.
pub struct SchedulerQueue { pub struct SchedulerQueue {
pending: Mutex<BinaryHeap<QueueEntry>>, pending: Mutex<BinaryHeap<QueueEntry>>,
slots: Arc<ActiveSequencesMultiWorker>, slots: Arc<ActiveSequencesMulti>,
workers_with_configs: RuntimeConfigWatch, workers_with_configs: RuntimeConfigWatch,
/// Cached threshold fraction; None means queueing is disabled. /// Cached threshold fraction; None means queueing is disabled.
threshold_frac: Option<f64>, threshold_frac: Option<f64>,
...@@ -63,7 +63,7 @@ pub struct SchedulerQueue { ...@@ -63,7 +63,7 @@ pub struct SchedulerQueue {
impl SchedulerQueue { impl SchedulerQueue {
pub fn new( pub fn new(
slots: Arc<ActiveSequencesMultiWorker>, slots: Arc<ActiveSequencesMulti>,
workers_with_configs: RuntimeConfigWatch, workers_with_configs: RuntimeConfigWatch,
threshold_frac: Option<f64>, threshold_frac: Option<f64>,
block_size: u32, block_size: u32,
......
...@@ -6,7 +6,9 @@ use super::RouterConfigOverride; ...@@ -6,7 +6,9 @@ use super::RouterConfigOverride;
use super::WorkerSelector; use super::WorkerSelector;
use super::protocols::{DpRank, OverlapScores, WorkerId, WorkerSelectionResult, WorkerWithDpRank}; use super::protocols::{DpRank, OverlapScores, WorkerId, WorkerSelectionResult, WorkerWithDpRank};
use super::queue::SchedulerQueue; use super::queue::SchedulerQueue;
use super::sequence::{ActiveSequencesMultiWorker, SequenceError, SequenceRequest}; use super::sequence::{
ActiveSequencesMulti, SequenceError, SequenceRequest, create_multi_worker_sequences,
};
use crate::discovery::RuntimeConfigWatch; use crate::discovery::RuntimeConfigWatch;
use crate::local_model::runtime_config::ModelRuntimeConfig; use crate::local_model::runtime_config::ModelRuntimeConfig;
use anyhow::Result; use anyhow::Result;
...@@ -82,7 +84,7 @@ impl SchedulingRequest { ...@@ -82,7 +84,7 @@ impl SchedulingRequest {
pub struct KvScheduler { pub struct KvScheduler {
request_tx: tokio::sync::mpsc::Sender<SchedulingRequest>, request_tx: tokio::sync::mpsc::Sender<SchedulingRequest>,
slots: Arc<ActiveSequencesMultiWorker>, slots: Arc<ActiveSequencesMulti>,
queue: Arc<SchedulerQueue>, queue: Arc<SchedulerQueue>,
} }
...@@ -103,18 +105,16 @@ impl KvScheduler { ...@@ -103,18 +105,16 @@ impl KvScheduler {
workers_with_configs.borrow().clone(); workers_with_configs.borrow().clone();
let router_id = component.drt().discovery().instance_id(); let router_id = component.drt().discovery().instance_id();
let slots = Arc::new( let slots = create_multi_worker_sequences(
ActiveSequencesMultiWorker::new( component.clone(),
component.clone(), block_size as usize,
block_size as usize, initial_workers,
initial_workers, kv_router_config.router_replica_sync,
kv_router_config.router_replica_sync, router_id,
router_id, worker_type,
worker_type, )
) .await
.await .map_err(|e| KvSchedulerError::InitFailed(e.to_string()))?;
.map_err(|e| KvSchedulerError::InitFailed(e.to_string()))?,
);
// Spawn background task to sync slots when the watch value changes. // Spawn background task to sync slots when the watch value changes.
let slots_monitor = slots.clone(); let slots_monitor = slots.clone();
...@@ -141,7 +141,11 @@ impl KvScheduler { ...@@ -141,7 +141,11 @@ impl KvScheduler {
let current_workers = monitor_rx.borrow_and_update().clone(); let current_workers = monitor_rx.borrow_and_update().clone();
if current_workers != last_workers { if current_workers != last_workers {
slots_monitor.update_workers(current_workers.clone()); let dp_sizes: HashMap<u64, u32> = current_workers
.iter()
.map(|(&id, c)| (id, c.data_parallel_size))
.collect();
slots_monitor.update_workers(dp_sizes);
last_workers = current_workers; last_workers = current_workers;
} }
} }
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment