Unverified Commit 008683d6 authored by Ryan Olson's avatar Ryan Olson Committed by GitHub
Browse files

feat: adding kvbm-engine (#6773)


Signed-off-by: default avatarRyan Olson <rolson@nvidia.com>
parent cf79c4fc
......@@ -247,7 +247,7 @@ dependencies = [
"thiserror 1.0.69",
"time",
"tokio",
"tokio-rustls",
"tokio-rustls 0.26.4",
"tokio-stream",
"tokio-util",
"tokio-websockets",
......@@ -383,6 +383,48 @@ dependencies = [
"arrayvec",
]
[[package]]
name = "aws-config"
version = "1.8.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a8fc176d53d6fe85017f230405e3255cedb4a02221cb55ed6d76dccbbb099b2"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-sdk-sso",
"aws-sdk-ssooidc",
"aws-sdk-sts",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-types",
"bytes",
"fastrand",
"hex",
"http 1.4.0",
"ring",
"time",
"tokio",
"tracing",
"url",
"zeroize",
]
[[package]]
name = "aws-credential-types"
version = "1.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e26bbf46abc608f2dc61fd6cb3b7b0665497cc259a21520151ed98f8b37d2c79"
dependencies = [
"aws-smithy-async",
"aws-smithy-runtime-api",
"aws-smithy-types",
"zeroize",
]
[[package]]
name = "aws-lc-rs"
version = "1.16.2"
......@@ -405,6 +447,384 @@ dependencies = [
"fs_extra",
]
[[package]]
name = "aws-runtime"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0f92058d22a46adf53ec57a6a96f34447daf02bff52e8fb956c66bcd5c6ac12"
dependencies = [
"aws-credential-types",
"aws-sigv4",
"aws-smithy-async",
"aws-smithy-eventstream",
"aws-smithy-http",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-types",
"bytes",
"bytes-utils",
"fastrand",
"http 0.2.12",
"http 1.4.0",
"http-body 0.4.6",
"http-body 1.0.1",
"percent-encoding",
"pin-project-lite",
"tracing",
"uuid",
]
[[package]]
name = "aws-sdk-s3"
version = "1.123.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c018f22146966fdd493a664f62ee2483dff256b42a08c125ab6a084bde7b77fe"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-sigv4",
"aws-smithy-async",
"aws-smithy-checksums",
"aws-smithy-eventstream",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-observability",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-smithy-xml",
"aws-types",
"bytes",
"fastrand",
"hex",
"hmac",
"http 0.2.12",
"http 1.4.0",
"http-body 1.0.1",
"lru 0.16.3",
"percent-encoding",
"regex-lite",
"sha2",
"tracing",
"url",
]
[[package]]
name = "aws-sdk-sso"
version = "1.94.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "699da1961a289b23842d88fe2984c6ff68735fdf9bdcbc69ceaeb2491c9bf434"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-observability",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-types",
"bytes",
"fastrand",
"http 0.2.12",
"http 1.4.0",
"regex-lite",
"tracing",
]
[[package]]
name = "aws-sdk-ssooidc"
version = "1.96.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3e3a4cb3b124833eafea9afd1a6cc5f8ddf3efefffc6651ef76a03cbc6b4981"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-observability",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-types",
"bytes",
"fastrand",
"http 0.2.12",
"http 1.4.0",
"regex-lite",
"tracing",
]
[[package]]
name = "aws-sdk-sts"
version = "1.98.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89c4f19655ab0856375e169865c91264de965bd74c407c7f1e403184b1049409"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-observability",
"aws-smithy-query",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-smithy-xml",
"aws-types",
"fastrand",
"http 0.2.12",
"http 1.4.0",
"regex-lite",
"tracing",
]
[[package]]
name = "aws-sigv4"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68f6ae9b71597dc5fd115d52849d7a5556ad9265885ad3492ea8d73b93bbc46e"
dependencies = [
"aws-credential-types",
"aws-smithy-eventstream",
"aws-smithy-http",
"aws-smithy-runtime-api",
"aws-smithy-types",
"bytes",
"crypto-bigint 0.5.5",
"form_urlencoded",
"hex",
"hmac",
"http 0.2.12",
"http 1.4.0",
"p256",
"percent-encoding",
"ring",
"sha2",
"subtle",
"time",
"tracing",
"zeroize",
]
[[package]]
name = "aws-smithy-async"
version = "1.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3cba48474f1d6807384d06fec085b909f5807e16653c5af5c45dfe89539f0b70"
dependencies = [
"futures-util",
"pin-project-lite",
"tokio",
]
[[package]]
name = "aws-smithy-checksums"
version = "0.64.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a764fa7222922f6c0af8eea478b0ef1ba5ce1222af97e01f33ca5e957bd7f3b9"
dependencies = [
"aws-smithy-http",
"aws-smithy-types",
"bytes",
"crc-fast",
"hex",
"http 1.4.0",
"http-body 1.0.1",
"http-body-util",
"md-5",
"pin-project-lite",
"sha1",
"sha2",
"tracing",
]
[[package]]
name = "aws-smithy-eventstream"
version = "0.60.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c0b3e587fbaa5d7f7e870544508af8ce82ea47cd30376e69e1e37c4ac746f79"
dependencies = [
"aws-smithy-types",
"bytes",
"crc32fast",
]
[[package]]
name = "aws-smithy-http"
version = "0.63.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af4a8a5fe3e4ac7ee871237c340bbce13e982d37543b65700f4419e039f5d78e"
dependencies = [
"aws-smithy-eventstream",
"aws-smithy-runtime-api",
"aws-smithy-types",
"bytes",
"bytes-utils",
"futures-core",
"futures-util",
"http 1.4.0",
"http-body 1.0.1",
"http-body-util",
"percent-encoding",
"pin-project-lite",
"pin-utils",
"tracing",
]
[[package]]
name = "aws-smithy-http-client"
version = "1.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0709f0083aa19b704132684bc26d3c868e06bd428ccc4373b0b55c3e8748a58b"
dependencies = [
"aws-smithy-async",
"aws-smithy-runtime-api",
"aws-smithy-types",
"h2 0.3.27",
"h2 0.4.13",
"http 0.2.12",
"http 1.4.0",
"http-body 0.4.6",
"hyper 0.14.32",
"hyper 1.9.0",
"hyper-rustls 0.24.2",
"hyper-rustls 0.27.7",
"hyper-util",
"pin-project-lite",
"rustls 0.21.12",
"rustls 0.23.37",
"rustls-native-certs 0.8.3",
"rustls-pki-types",
"tokio",
"tokio-rustls 0.26.4",
"tower 0.5.3",
"tracing",
]
[[package]]
name = "aws-smithy-json"
version = "0.62.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27b3a779093e18cad88bbae08dc4261e1d95018c4c5b9356a52bcae7c0b6e9bb"
dependencies = [
"aws-smithy-types",
]
[[package]]
name = "aws-smithy-observability"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d3f39d5bb871aaf461d59144557f16d5927a5248a983a40654d9cf3b9ba183b"
dependencies = [
"aws-smithy-runtime-api",
]
[[package]]
name = "aws-smithy-query"
version = "0.60.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05f76a580e3d8f8961e5d48763214025a2af65c2fa4cd1fb7f270a0e107a71b0"
dependencies = [
"aws-smithy-types",
"urlencoding",
]
[[package]]
name = "aws-smithy-runtime"
version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd3dfc18c1ce097cf81fced7192731e63809829c6cbf933c1ec47452d08e1aa"
dependencies = [
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-http-client",
"aws-smithy-observability",
"aws-smithy-runtime-api",
"aws-smithy-types",
"bytes",
"fastrand",
"http 0.2.12",
"http 1.4.0",
"http-body 0.4.6",
"http-body 1.0.1",
"http-body-util",
"pin-project-lite",
"pin-utils",
"tokio",
"tracing",
]
[[package]]
name = "aws-smithy-runtime-api"
version = "1.11.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c55e0837e9b8526f49e0b9bfa9ee18ddee70e853f5bc09c5d11ebceddcb0fec"
dependencies = [
"aws-smithy-async",
"aws-smithy-types",
"bytes",
"http 0.2.12",
"http 1.4.0",
"pin-project-lite",
"tokio",
"tracing",
"zeroize",
]
[[package]]
name = "aws-smithy-types"
version = "1.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "576b0d6991c9c32bc14fc340582ef148311f924d41815f641a308b5d11e8e7cd"
dependencies = [
"base64-simd",
"bytes",
"bytes-utils",
"futures-core",
"http 0.2.12",
"http 1.4.0",
"http-body 0.4.6",
"http-body 1.0.1",
"http-body-util",
"itoa",
"num-integer",
"pin-project-lite",
"pin-utils",
"ryu",
"serde",
"time",
"tokio",
"tokio-util",
]
[[package]]
name = "aws-smithy-xml"
version = "0.60.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ce02add1aa3677d022f8adf81dcbe3046a95f17a1b1e8979c145cd21d3d22b3"
dependencies = [
"xmlparser",
]
[[package]]
name = "aws-types"
version = "1.3.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c50f3cdf47caa8d01f2be4a6663ea02418e892f9bbfd82c7b9a3a37eaccdd3a"
dependencies = [
"aws-credential-types",
"aws-smithy-async",
"aws-smithy-runtime-api",
"aws-smithy-types",
"rustc_version",
"tracing",
]
[[package]]
name = "axum"
version = "0.7.9"
......@@ -531,11 +951,11 @@ dependencies = [
"hyper 1.9.0",
"hyper-util",
"pin-project-lite",
"rustls",
"rustls 0.23.37",
"rustls-pemfile",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tokio-rustls 0.26.4",
"tower-service",
]
......@@ -550,6 +970,12 @@ dependencies = [
"tokio",
]
[[package]]
name = "base16ct"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce"
[[package]]
name = "base64"
version = "0.13.1"
......@@ -568,6 +994,16 @@ version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "base64-simd"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195"
dependencies = [
"outref",
"vsimd",
]
[[package]]
name = "base64ct"
version = "1.8.3"
......@@ -811,6 +1247,16 @@ dependencies = [
"serde",
]
[[package]]
name = "bytes-utils"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35"
dependencies = [
"bytes",
"either",
]
[[package]]
name = "cast"
version = "0.3.0"
......@@ -1223,6 +1669,33 @@ dependencies = [
"libc",
]
[[package]]
name = "crc"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675"
dependencies = [
"crc-catalog",
]
[[package]]
name = "crc-catalog"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
[[package]]
name = "crc-fast"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fd92aca2c6001b1bf5ba0ff84ee74ec8501b52bbef0cac80bf25a6c1d87a83d"
dependencies = [
"crc",
"digest",
"rustversion",
"spin 0.10.0",
]
[[package]]
name = "crc32fast"
version = "1.5.0"
......@@ -1368,6 +1841,28 @@ version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
name = "crypto-bigint"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef2b4b23cddf68b89b8f8069890e8c270d54e2d5fe1b143820234805e4cb17ef"
dependencies = [
"generic-array",
"rand_core 0.6.4",
"subtle",
"zeroize",
]
[[package]]
name = "crypto-bigint"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
dependencies = [
"rand_core 0.6.4",
"subtle",
]
[[package]]
name = "crypto-common"
version = "0.1.7"
......@@ -1564,19 +2059,6 @@ dependencies = [
"serde",
]
[[package]]
name = "dashmap"
version = "5.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
dependencies = [
"cfg-if",
"hashbrown 0.14.5",
"lock_api",
"once_cell",
"parking_lot_core",
]
[[package]]
name = "dashmap"
version = "6.1.0"
......@@ -1603,6 +2085,16 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aafbece59594ed57696a1a69e8bb3ca1683fbc9cdb41d5c02726070b2cd8f19d"
[[package]]
name = "der"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1a467a65c5e759bce6e65eaf91cc29f466cdc57cb65777bd646872a8a1fd4de"
dependencies = [
"const-oid",
"zeroize",
]
[[package]]
name = "der"
version = "0.7.10"
......@@ -1739,6 +2231,7 @@ checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
"subtle",
]
[[package]]
......@@ -1861,12 +2354,12 @@ dependencies = [
"anyhow",
"async-trait",
"axum 0.8.4",
"dashmap 6.1.0",
"dashmap",
"derive-getters",
"derive_builder",
"dynamo-runtime",
"dynamo-tokens",
"flume",
"flume 0.12.0",
"ordered-float 4.6.0",
"parking_lot",
"prometheus",
......@@ -1913,11 +2406,12 @@ dependencies = [
"clap 4.6.0",
"criterion 0.3.6",
"cudarc",
"dashmap 5.5.3",
"dashmap",
"derive-getters",
"derive_builder",
"dialoguer",
"dynamo-bench",
"dynamo-config",
"dynamo-kv-router",
"dynamo-memory",
"dynamo-mocker",
......@@ -1959,7 +2453,7 @@ dependencies = [
"rmp-serde",
"rstest 0.18.2",
"rustc-hash 1.1.0",
"rustls",
"rustls 0.23.37",
"serde",
"serde_json",
"serial_test",
......@@ -1994,6 +2488,7 @@ dependencies = [
"anyhow",
"cudarc",
"libc",
"libloading 0.8.9",
"nix 0.30.1",
"nixl-sys",
"offset-allocator",
......@@ -2009,7 +2504,7 @@ name = "dynamo-mocker"
version = "1.0.0"
dependencies = [
"anyhow",
"dashmap 6.1.0",
"dashmap",
"derive-getters",
"derive_builder",
"dynamo-kv-router",
......@@ -2086,7 +2581,7 @@ dependencies = [
"console-subscriber",
"criterion 0.5.1",
"cudarc",
"dashmap 6.1.0",
"dashmap",
"derive-getters",
"derive_builder",
"dynamo-config",
......@@ -2148,7 +2643,7 @@ version = "1.0.0"
dependencies = [
"bs58",
"bytemuck",
"dashmap 6.1.0",
"dashmap",
"derive-getters",
"serde",
"thiserror 2.0.18",
......@@ -2156,13 +2651,25 @@ dependencies = [
"xxhash-rust",
]
[[package]]
name = "ecdsa"
version = "0.14.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c"
dependencies = [
"der 0.6.1",
"elliptic-curve",
"rfc6979",
"signature 1.6.4",
]
[[package]]
name = "ed25519"
version = "2.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53"
dependencies = [
"signature",
"signature 2.2.0",
]
[[package]]
......@@ -2174,7 +2681,7 @@ dependencies = [
"curve25519-dalek",
"ed25519",
"sha2",
"signature",
"signature 2.2.0",
"subtle",
]
......@@ -2199,6 +2706,26 @@ dependencies = [
"serde",
]
[[package]]
name = "elliptic-curve"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3"
dependencies = [
"base16ct",
"crypto-bigint 0.4.9",
"der 0.6.1",
"digest",
"ff",
"generic-array",
"group",
"pkcs8 0.9.0",
"rand_core 0.6.4",
"sec1",
"subtle",
"zeroize",
]
[[package]]
name = "encode_unicode"
version = "1.0.0"
......@@ -2397,9 +2924,9 @@ dependencies = [
[[package]]
name = "fastrand"
version = "2.4.0"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a043dc74da1e37d6afe657061213aa6f425f855399a11d3463c6ecccc4dfda1f"
checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6"
dependencies = [
"getrandom 0.3.4",
]
......@@ -2433,6 +2960,16 @@ dependencies = [
"simd-adler32",
]
[[package]]
name = "ff"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d013fc25338cc558c5c2cfbad646908fb23591e2404481826742b651c9af7160"
dependencies = [
"rand_core 0.6.4",
"subtle",
]
[[package]]
name = "ffmpeg-next"
version = "7.1.0"
......@@ -2515,6 +3052,18 @@ dependencies = [
"zlib-rs",
]
[[package]]
name = "flume"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095"
dependencies = [
"futures-core",
"futures-sink",
"nanorand",
"spin 0.9.8",
]
[[package]]
name = "flume"
version = "0.12.0"
......@@ -2524,7 +3073,7 @@ dependencies = [
"fastrand",
"futures-core",
"futures-sink",
"spin",
"spin 0.9.8",
]
[[package]]
......@@ -2574,6 +3123,17 @@ dependencies = [
"tokio",
]
[[package]]
name = "fs4"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8640e34b88f7652208ce9e88b1a37a2ae95227d84abec377ccd3c5cfeb141ed4"
dependencies = [
"rustix 1.1.4",
"tokio",
"windows-sys 0.59.0",
]
[[package]]
name = "fs_extra"
version = "1.3.0"
......@@ -2805,6 +3365,17 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "group"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7"
dependencies = [
"ff",
"rand_core 0.6.4",
"subtle",
]
[[package]]
name = "h2"
version = "0.3.27"
......@@ -2978,6 +3549,15 @@ dependencies = [
"windows-sys 0.60.2",
]
[[package]]
name = "hmac"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
dependencies = [
"digest",
]
[[package]]
name = "home"
version = "0.5.12"
......@@ -3117,6 +3697,21 @@ dependencies = [
"want",
]
[[package]]
name = "hyper-rustls"
version = "0.24.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590"
dependencies = [
"futures-util",
"http 0.2.12",
"hyper 0.14.32",
"log",
"rustls 0.21.12",
"tokio",
"tokio-rustls 0.24.1",
]
[[package]]
name = "hyper-rustls"
version = "0.27.7"
......@@ -3127,11 +3722,11 @@ dependencies = [
"hyper 1.9.0",
"hyper-util",
"log",
"rustls",
"rustls 0.23.37",
"rustls-native-certs 0.8.3",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tokio-rustls 0.26.4",
"tower-service",
"webpki-roots 1.0.6",
]
......@@ -3777,14 +4372,14 @@ dependencies = [
"http-body 1.0.1",
"http-body-util",
"hyper 1.9.0",
"hyper-rustls",
"hyper-rustls 0.27.7",
"hyper-timeout",
"hyper-util",
"jsonpath-rust",
"k8s-openapi",
"kube-core",
"pem",
"rustls",
"rustls 0.23.37",
"secrecy",
"serde",
"serde_json",
......@@ -3859,15 +4454,75 @@ dependencies = [
[[package]]
name = "kvbm-common"
version = "0.1.0"
version = "1.0.0"
dependencies = [
"dynamo-tokens",
"serde",
]
[[package]]
name = "kvbm-config"
version = "1.0.0"
dependencies = [
"anyhow",
"dynamo-memory",
"figment",
"nix 0.30.1",
"nvtx",
"rayon",
"serde",
"serde_json",
"temp-env",
"thiserror 2.0.18",
"tokio",
"tracing",
"validator",
"velo",
]
[[package]]
name = "kvbm-engine"
version = "1.0.0"
dependencies = [
"anyhow",
"async-nats",
"aws-config",
"aws-sdk-s3",
"bytes",
"chrono",
"clap 4.6.0",
"crossbeam-queue",
"cudarc",
"dashmap",
"derive_builder",
"dynamo-memory",
"figment",
"flume 0.11.1",
"futures",
"kvbm-common",
"kvbm-config",
"kvbm-logical",
"kvbm-physical",
"libc",
"nvtx",
"oneshot",
"parking_lot",
"rayon",
"rmp-serde",
"serde",
"serde_json",
"tokio",
"tokio-rayon",
"tokio-stream",
"tracing",
"tracing-subscriber",
"uuid",
"velo",
]
[[package]]
name = "kvbm-kernels"
version = "0.1.0"
version = "1.0.0"
dependencies = [
"clap 4.6.0",
"cudarc",
......@@ -3878,7 +4533,7 @@ dependencies = [
[[package]]
name = "kvbm-logical"
version = "0.1.0"
version = "1.0.0"
dependencies = [
"anyhow",
"async-stream",
......@@ -3908,7 +4563,7 @@ dependencies = [
[[package]]
name = "kvbm-physical"
version = "0.1.0"
version = "1.0.0"
dependencies = [
"aligned-vec",
"anyhow",
......@@ -3921,7 +4576,6 @@ dependencies = [
"futures",
"kvbm-common",
"kvbm-kernels",
"parking_lot",
"rstest 0.26.1",
"serde",
"serde_json",
......@@ -3930,7 +4584,7 @@ dependencies = [
"tracing",
"uuid",
"validator",
"velo-events",
"velo",
]
[[package]]
......@@ -4270,6 +4924,15 @@ dependencies = [
"autocfg",
]
[[package]]
name = "memoffset"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
dependencies = [
"autocfg",
]
[[package]]
name = "mime"
version = "0.3.17"
......@@ -4464,6 +5127,15 @@ version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084"
[[package]]
name = "nanorand"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3"
dependencies = [
"getrandom 0.2.17",
]
[[package]]
name = "ndarray"
version = "0.16.1"
......@@ -4563,7 +5235,7 @@ dependencies = [
"bitflags 1.3.2",
"cfg-if",
"libc",
"memoffset",
"memoffset 0.7.1",
"pin-utils",
]
......@@ -4577,6 +5249,7 @@ dependencies = [
"cfg-if",
"cfg_aliases",
"libc",
"memoffset 0.9.1",
]
[[package]]
......@@ -4801,6 +5474,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "nvtx"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad2e855e8019f99e4b94ac33670eb4e4f570a2e044f3749a0b2c7f83b841e52c"
dependencies = [
"cc",
]
[[package]]
name = "objc2"
version = "0.6.4"
......@@ -5228,6 +5910,23 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "outref"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e"
[[package]]
name = "p256"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594"
dependencies = [
"ecdsa",
"elliptic-curve",
"sha2",
]
[[package]]
name = "parking"
version = "2.2.1"
......@@ -5479,14 +6178,24 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pkcs8"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9eca2c590a5f85da82668fa685c09ce2888b9430e83299debf1f34b65fd4a4ba"
dependencies = [
"der 0.6.1",
"spki 0.6.0",
]
[[package]]
name = "pkcs8"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
dependencies = [
"der",
"spki",
"der 0.7.10",
"spki 0.7.3",
]
[[package]]
......@@ -5597,7 +6306,7 @@ version = "3.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f"
dependencies = [
"toml_edit 0.25.10+spec-1.1.0",
"toml_edit 0.25.11+spec-1.1.0",
]
[[package]]
......@@ -5904,7 +6613,7 @@ dependencies = [
"quinn-proto",
"quinn-udp",
"rustc-hash 2.1.2",
"rustls",
"rustls 0.23.37",
"socket2 0.6.3",
"thiserror 2.0.18",
"tokio",
......@@ -5924,7 +6633,7 @@ dependencies = [
"rand 0.9.2",
"ring",
"rustc-hash 2.1.2",
"rustls",
"rustls 0.23.37",
"rustls-pki-types",
"slab",
"thiserror 2.0.18",
......@@ -6205,6 +6914,12 @@ dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-lite"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973"
[[package]]
name = "regex-syntax"
version = "0.8.10"
......@@ -6270,7 +6985,7 @@ dependencies = [
"http-body 1.0.1",
"http-body-util",
"hyper 1.9.0",
"hyper-rustls",
"hyper-rustls 0.27.7",
"hyper-util",
"js-sys",
"log",
......@@ -6279,7 +6994,7 @@ dependencies = [
"percent-encoding",
"pin-project-lite",
"quinn",
"rustls",
"rustls 0.23.37",
"rustls-native-certs 0.8.3",
"rustls-pki-types",
"serde",
......@@ -6287,7 +7002,7 @@ dependencies = [
"serde_urlencoded",
"sync_wrapper 1.0.2",
"tokio",
"tokio-rustls",
"tokio-rustls 0.26.4",
"tokio-util",
"tower 0.5.3",
"tower-http",
......@@ -6300,6 +7015,17 @@ dependencies = [
"webpki-roots 1.0.6",
]
[[package]]
name = "rfc6979"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7743f17af12fa0b03b803ba12cd6a8d9483a587e89c69445e3909655c0b9fabb"
dependencies = [
"crypto-bigint 0.4.9",
"hmac",
"zeroize",
]
[[package]]
name = "rgb"
version = "0.8.53"
......@@ -6573,6 +7299,18 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "rustls"
version = "0.21.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e"
dependencies = [
"log",
"ring",
"rustls-webpki 0.101.7",
"sct",
]
[[package]]
name = "rustls"
version = "0.23.37"
......@@ -6633,6 +7371,16 @@ dependencies = [
"zeroize",
]
[[package]]
name = "rustls-webpki"
version = "0.101.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
dependencies = [
"ring",
"untrusted",
]
[[package]]
name = "rustls-webpki"
version = "0.102.8"
......@@ -6806,12 +7554,36 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "sct"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
dependencies = [
"ring",
"untrusted",
]
[[package]]
name = "sdd"
version = "3.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca"
[[package]]
name = "sec1"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928"
dependencies = [
"base16ct",
"der 0.6.1",
"generic-array",
"pkcs8 0.9.0",
"subtle",
"zeroize",
]
[[package]]
name = "secrecy"
version = "0.10.3"
......@@ -7150,12 +7922,22 @@ version = "0.27.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1e303f8205714074f6068773f0e29527e0453937fe837c9717d066635b65f31"
dependencies = [
"pkcs8",
"pkcs8 0.10.2",
"rand_core 0.6.4",
"signature",
"signature 2.2.0",
"zeroize",
]
[[package]]
name = "signature"
version = "1.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c"
dependencies = [
"digest",
"rand_core 0.6.4",
]
[[package]]
name = "signature"
version = "2.2.0"
......@@ -7254,6 +8036,22 @@ dependencies = [
"lock_api",
]
[[package]]
name = "spin"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591"
[[package]]
name = "spki"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67cf02bbac7a337dc36e4f5a693db6c21e7863f45070f7064577eb4367a3212b"
dependencies = [
"base64ct",
"der 0.6.1",
]
[[package]]
name = "spki"
version = "0.7.3"
......@@ -7261,7 +8059,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d"
dependencies = [
"base64ct",
"der",
"der 0.7.10",
]
[[package]]
......@@ -7722,13 +8520,23 @@ dependencies = [
"tokio",
]
[[package]]
name = "tokio-rustls"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
dependencies = [
"rustls 0.21.12",
"tokio",
]
[[package]]
name = "tokio-rustls"
version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61"
dependencies = [
"rustls",
"rustls 0.23.37",
"tokio",
]
......@@ -7799,7 +8607,7 @@ dependencies = [
"ring",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tokio-rustls 0.26.4",
"tokio-util",
"webpki-roots 0.26.11",
]
......@@ -7863,9 +8671,9 @@ dependencies = [
[[package]]
name = "toml_edit"
version = "0.25.10+spec-1.1.0"
version = "0.25.11+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a82418ca169e235e6c399a84e395ab6debeb3bc90edc959bf0f48647c6a32d1b"
checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b"
dependencies = [
"indexmap 2.13.1",
"toml_datetime 1.1.1+spec-1.1.0",
......@@ -7969,7 +8777,7 @@ dependencies = [
"socket2 0.6.3",
"sync_wrapper 1.0.2",
"tokio",
"tokio-rustls",
"tokio-rustls 0.26.4",
"tokio-stream",
"tower 0.5.3",
"tower-layer",
......@@ -8414,7 +9222,7 @@ dependencies = [
"flate2",
"log",
"once_cell",
"rustls",
"rustls 0.23.37",
"rustls-pki-types",
"serde",
"serde_json",
......@@ -8436,6 +9244,12 @@ dependencies = [
"serde_derive",
]
[[package]]
name = "urlencoding"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
[[package]]
name = "utf16_iter"
version = "1.0.5"
......@@ -8561,9 +9375,33 @@ version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "velo"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e0cc874c11ea3d03afd7adf90529bcf3df374039deff65bc02a26e71bb5814b"
dependencies = [
"anyhow",
"bytes",
"serde",
"tokio",
"tokio-util",
"velo-common",
"velo-discovery",
"velo-events",
"velo-messenger",
"velo-observability",
"velo-queue",
"velo-rendezvous",
"velo-streaming",
"velo-transports",
]
[[package]]
name = "velo-common"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc2824d7667c4ee992dfc58c30799518f504d39c91f1e83962d55cb8c44cfb67"
dependencies = [
"bytes",
"rmp-serde",
......@@ -8575,12 +9413,36 @@ dependencies = [
"xxhash-rust",
]
[[package]]
name = "velo-discovery"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41107947fe3d15972c56815e42fd2485ca4ba58a5a69ec1a6fd61c00966f8734"
dependencies = [
"anyhow",
"async-stream",
"bytes",
"fs4",
"futures",
"parking_lot",
"rmp-serde",
"serde",
"serde_json",
"tokio",
"tokio-util",
"tracing",
"uuid",
"velo-common",
]
[[package]]
name = "velo-events"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2504e857f1ed52ad96ce73792a724be880ba75f3579a27050395e81d38cb42d0"
dependencies = [
"anyhow",
"dashmap 6.1.0",
"dashmap",
"futures",
"parking_lot",
"serde",
......@@ -8591,17 +9453,123 @@ dependencies = [
"xxhash-rust",
]
[[package]]
name = "velo-messenger"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ada8ea47ab39a82ef9101024452a3c605bcf5c0acad82c76afcf48a17c0450cf"
dependencies = [
"anyhow",
"bs58",
"bytes",
"dashmap",
"derive-getters",
"derive_builder",
"flume 0.12.0",
"futures",
"lru 0.12.5",
"parking_lot",
"rmp-serde",
"serde",
"serde_json",
"thiserror 2.0.18",
"tokio",
"tokio-util",
"tracing",
"uuid",
"velo-common",
"velo-discovery",
"velo-events",
"velo-observability",
"velo-transports",
]
[[package]]
name = "velo-observability"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57813414b19b0f845744fcf158ed4b87afe4f5c59ee99d58872c8e5d5534d61b"
dependencies = [
"prometheus",
"tracing",
]
[[package]]
name = "velo-queue"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7bcce33f5710d87e33e1eacc5472300cbe6acdfb5f2cd6929c12f2e4908ef38"
dependencies = [
"bytes",
"dashmap",
"flume 0.12.0",
"futures",
"rmp-serde",
"serde",
"thiserror 2.0.18",
"tokio",
"tracing",
]
[[package]]
name = "velo-rendezvous"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1eac77f329b0c3dba1b36114c969e0a7d680bb4a6436720754bef14a24fedde"
dependencies = [
"anyhow",
"bytes",
"dashmap",
"futures",
"serde",
"serde_json",
"tracing",
"velo-common",
"velo-messenger",
"velo-observability",
]
[[package]]
name = "velo-streaming"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd6dc0d71f73c6f369302bb94b05341a807a5693f6c321fc208d9c9a974d918a"
dependencies = [
"anyhow",
"bytes",
"dashmap",
"derive_builder",
"flume 0.12.0",
"futures",
"rmp-serde",
"serde",
"serde_json",
"socket2 0.6.3",
"thiserror 2.0.18",
"tokio",
"tokio-util",
"tonic-build 0.13.1",
"tracing",
"uuid",
"velo-common",
"velo-messenger",
"velo-observability",
"velo-transports",
]
[[package]]
name = "velo-transports"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb5a348cc2fe13fc560824f170d3bdc3d10ec3a11802bb32489a59f49eb12409"
dependencies = [
"anyhow",
"async-nats",
"axum 0.8.4",
"bs58",
"bytes",
"dashmap 6.1.0",
"flume",
"dashmap",
"flume 0.12.0",
"futures",
"http 1.4.0",
"http-body 1.0.1",
......@@ -8611,8 +9579,8 @@ dependencies = [
"nix 0.30.1",
"parking_lot",
"prost 0.13.5",
"reqwest 0.12.28",
"rmp-serde",
"serde",
"serde_json",
"socket2 0.6.3",
"thiserror 2.0.18",
......@@ -8623,8 +9591,8 @@ dependencies = [
"tonic-build 0.13.1",
"tower 0.5.3",
"tracing",
"tracing-subscriber",
"velo-common",
"velo-observability",
]
[[package]]
......@@ -8656,6 +9624,12 @@ version = "0.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1"
[[package]]
name = "vsimd"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64"
[[package]]
name = "wait-timeout"
version = "0.2.1"
......@@ -9314,6 +10288,12 @@ version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4"
[[package]]
name = "xmlparser"
version = "0.13.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4"
[[package]]
name = "xxhash-rust"
version = "0.8.15"
......
......@@ -11,6 +11,8 @@ members = [
"lib/kv-router",
"lib/memory",
"lib/kvbm-common",
"lib/kvbm-config",
"lib/kvbm-engine",
"lib/kvbm-kernels",
"lib/kvbm-logical",
"lib/kvbm-physical",
......@@ -19,9 +21,6 @@ members = [
"lib/bench",
"lib/bindings/c",
"lib/bindings/python/codegen",
"lib/velo-common",
"lib/velo-transports",
"lib/velo-events",
]
resolver = "3"
......@@ -48,16 +47,17 @@ dynamo-protocols = { path = "lib/protocols", version = "1.0.0" }
dynamo-parsers = { path = "lib/parsers", version = "1.0.0" }
fastokens = { version = "0.1.0" }
# kvbm
kvbm-common = { path = "lib/kvbm-common", version = "0.1.0" }
kvbm-kernels = { path = "lib/kvbm-kernels", version = "0.1.0" }
kvbm-logical = { path = "lib/kvbm-logical", version = "0.1.0" }
kvbm-physical = { path = "lib/kvbm-physical", version = "0.1.0" }
kvbm-common = { path = "lib/kvbm-common", version = "1.0.0" }
kvbm-config = { path = "lib/kvbm-config", version = "1.0.0" }
kvbm-engine = { path = "lib/kvbm-engine", version = "1.0.0" }
kvbm-kernels = { path = "lib/kvbm-kernels", version = "1.0.0" }
kvbm-logical = { path = "lib/kvbm-logical", version = "1.0.0" }
kvbm-physical = { path = "lib/kvbm-physical", version = "1.0.0" }
# velo
velo-common = { path = "lib/velo-common", version = "0.1.0" }
velo-transports = { path = "lib/velo-transports", version = "0.1.0" }
velo-events = { path = "lib/velo-events", version = "0.1.0" }
velo = { version = "0.1.0" }
# External dependencies
anyhow = { version = "1" }
......
......@@ -1249,19 +1249,6 @@ dependencies = [
"serde",
]
[[package]]
name = "dashmap"
version = "5.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
dependencies = [
"cfg-if",
"hashbrown 0.14.5",
"lock_api",
"once_cell",
"parking_lot_core",
]
[[package]]
name = "dashmap"
version = "6.1.0"
......@@ -1523,7 +1510,7 @@ version = "1.0.0"
dependencies = [
"anyhow",
"async-trait",
"dashmap 6.1.0",
"dashmap",
"derive-getters",
"derive_builder",
"dynamo-runtime",
......@@ -1566,10 +1553,11 @@ dependencies = [
"bytes",
"chrono",
"cudarc",
"dashmap 5.5.3",
"dashmap",
"derive-getters",
"derive_builder",
"dialoguer",
"dynamo-config",
"dynamo-kv-router",
"dynamo-memory",
"dynamo-mocker",
......@@ -1636,6 +1624,7 @@ dependencies = [
"anyhow",
"cudarc",
"libc",
"libloading 0.8.9",
"nix 0.30.1",
"nixl-sys",
"offset-allocator",
......@@ -1649,7 +1638,7 @@ name = "dynamo-mocker"
version = "1.0.0"
dependencies = [
"anyhow",
"dashmap 6.1.0",
"dashmap",
"derive-getters",
"derive_builder",
"dynamo-kv-router",
......@@ -1718,7 +1707,7 @@ dependencies = [
"blake3",
"bytes",
"chrono",
"dashmap 6.1.0",
"dashmap",
"derive-getters",
"derive_builder",
"dynamo-config",
......@@ -1775,7 +1764,7 @@ version = "1.0.0"
dependencies = [
"bs58",
"bytemuck",
"dashmap 6.1.0",
"dashmap",
"derive-getters",
"serde",
"thiserror 2.0.18",
......@@ -2014,9 +2003,9 @@ dependencies = [
[[package]]
name = "fastrand"
version = "2.4.0"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a043dc74da1e37d6afe657061213aa6f425f855399a11d3463c6ecccc4dfda1f"
checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6"
dependencies = [
"getrandom 0.3.4",
]
......
......@@ -1267,19 +1267,6 @@ dependencies = [
"serde",
]
[[package]]
name = "dashmap"
version = "5.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
dependencies = [
"cfg-if",
"hashbrown 0.14.5",
"lock_api",
"once_cell",
"parking_lot_core",
]
[[package]]
name = "dashmap"
version = "6.1.0"
......@@ -1532,7 +1519,7 @@ dependencies = [
"anyhow",
"async-trait",
"axum",
"dashmap 6.1.0",
"dashmap",
"derive-getters",
"derive_builder",
"dynamo-runtime",
......@@ -1578,10 +1565,11 @@ dependencies = [
"bytes",
"chrono",
"cudarc",
"dashmap 5.5.3",
"dashmap",
"derive-getters",
"derive_builder",
"dialoguer",
"dynamo-config",
"dynamo-kv-router",
"dynamo-memory",
"dynamo-mocker",
......@@ -1651,6 +1639,7 @@ dependencies = [
"anyhow",
"cudarc",
"libc",
"libloading 0.8.9",
"nix 0.30.1",
"nixl-sys",
"offset-allocator",
......@@ -1664,7 +1653,7 @@ name = "dynamo-mocker"
version = "1.0.0"
dependencies = [
"anyhow",
"dashmap 6.1.0",
"dashmap",
"derive-getters",
"derive_builder",
"dynamo-kv-router",
......@@ -1725,7 +1714,7 @@ dependencies = [
"anyhow",
"async-trait",
"clap",
"dashmap 6.1.0",
"dashmap",
"dynamo-kv-router",
"dynamo-llm",
"dynamo-mocker",
......@@ -1765,7 +1754,7 @@ dependencies = [
"bytes",
"chrono",
"cudarc",
"dashmap 6.1.0",
"dashmap",
"derive-getters",
"derive_builder",
"dynamo-config",
......@@ -1822,7 +1811,7 @@ version = "1.0.0"
dependencies = [
"bs58",
"bytemuck",
"dashmap 6.1.0",
"dashmap",
"derive-getters",
"serde",
"thiserror 2.0.18",
......@@ -2061,9 +2050,9 @@ dependencies = [
[[package]]
name = "fastrand"
version = "2.4.0"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a043dc74da1e37d6afe657061213aa6f425f855399a11d3463c6ecccc4dfda1f"
checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6"
dependencies = [
"getrandom 0.3.4",
]
......
......@@ -3,7 +3,7 @@
[package]
name = "kvbm-common"
version = "0.1.0"
version = "1.0.0"
edition.workspace = true
description.workspace = true
authors.workspace = true
......
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
[package]
name = "kvbm-config"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true
description = "KVBM Configuration Library for Tokio, Rayon, and Messenger runtimes"
[features]
default = []
rayon = ["dep:rayon"]
nvtx = ["dep:nvtx"]
[dependencies]
anyhow = { workspace = true }
figment = { version = "0.10", features = ["env", "toml", "json"] }
nix = { version = "0.30.1", features = ["net"] }
serde = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
validator = { workspace = true }
# Optional dependencies
rayon = { version = "1.10", optional = true }
nvtx = { version = "1.3", optional = true }
# Velo dependencies
velo = { workspace = true }
# Memory dependencies (for NixL)
dynamo-memory = { workspace = true }
[dev-dependencies]
serde_json = { workspace = true }
temp-env = { version = "0.3.6" }
tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Cache tier configuration for KVBM.
//!
//! Defines configuration for G2 (host/pinned memory) and G3 (disk) cache tiers,
//! as well as the parallelism mode for distributed workers.
//!
//! The leader uses this configuration to coordinate cache tier creation on workers.
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
use validator::Validate;
/// Parallelism strategy for KV cache across workers.
///
/// This determines how KV blocks are distributed and transferred across
/// multiple workers in a distributed inference setup.
#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ParallelismMode {
/// Tensor parallel: each worker has a shard of each KV block.
///
/// This is the standard approach for tensor-parallel inference where
/// attention heads are split across workers. Each worker stores and
/// transfers only its portion of each KV block.
///
/// All workers have G1, G2, and G3 tiers. Operations execute on all
/// workers simultaneously (SPMD).
#[default]
TensorParallel,
/// Replicated data: all workers have full KV blocks (MLA scenario).
///
/// In MLA (Multi-head Latent Attention) architectures, KV blocks are
/// replicated rather than sharded. Only rank 0 has G2/G3 storage;
/// data is broadcast to other ranks after loading to G1.
///
/// This reduces storage requirements on non-rank-0 workers and is
/// suitable when the model's KV representation is the same across
/// all attention heads.
ReplicatedData,
}
/// Host cache configuration (G2 tier - pinned CPU memory).
///
/// The host cache provides a staging area for KV blocks between GPU and disk.
/// Memory is allocated as pinned (page-locked) for efficient DMA transfers.
#[derive(Debug, Clone, Serialize, Deserialize, Validate, Default)]
pub struct HostCacheConfig {
/// Cache size in gigabytes.
/// Used to compute num_blocks if not explicitly set.
pub cache_size_gb: Option<f64>,
/// Explicit number of blocks for the host cache.
/// Takes priority over cache_size_gb if set.
pub num_blocks: Option<usize>,
}
impl HostCacheConfig {
/// Compute the number of blocks based on configuration and block size.
///
/// Priority: explicit num_blocks > computed from cache_size_gb
///
/// # Arguments
/// * `bytes_per_block` - Size of each block in bytes
///
/// # Returns
/// Number of blocks, or None if neither num_blocks nor cache_size_gb is set,
/// or if bytes_per_block is zero.
pub fn compute_num_blocks(&self, bytes_per_block: usize) -> Option<usize> {
if bytes_per_block == 0 {
return None;
}
self.num_blocks.or_else(|| {
self.cache_size_gb.map(|gb| {
// Convert GB to bytes and divide by block size
((gb * 1_000_000_000.0) / bytes_per_block as f64) as usize
})
})
}
/// Check if host cache is enabled (has any configuration).
pub fn is_enabled(&self) -> bool {
self.num_blocks.is_some() || self.cache_size_gb.is_some()
}
}
/// Disk cache configuration (G3 tier - persistent storage).
///
/// The disk cache provides extended capacity for KV blocks beyond GPU and host memory.
/// Can use either GPU Direct Storage (GDS) for direct GPU-disk transfers or POSIX
/// for regular file I/O.
#[derive(Debug, Clone, Serialize, Deserialize, Validate, Default)]
pub struct DiskCacheConfig {
/// Cache size in gigabytes.
/// Used to compute num_blocks if not explicitly set.
pub cache_size_gb: Option<f64>,
/// Explicit number of blocks for the disk cache.
/// Takes priority over cache_size_gb if set.
pub num_blocks: Option<usize>,
/// Use GPU Direct Storage (GDS) if available.
/// When true, enables GDS_MT backend for direct GPU-disk transfers.
/// When false or GDS unavailable, falls back to POSIX backend.
#[serde(default)]
pub use_gds: bool,
/// Storage path for disk cache files.
/// If None, a default path will be used.
pub storage_path: Option<PathBuf>,
}
impl DiskCacheConfig {
/// Compute the number of blocks based on configuration and block size.
///
/// Priority: explicit num_blocks > computed from cache_size_gb
///
/// # Arguments
/// * `bytes_per_block` - Size of each block in bytes
///
/// # Returns
/// Number of blocks, or None if neither num_blocks nor cache_size_gb is set,
/// or if bytes_per_block is zero.
pub fn compute_num_blocks(&self, bytes_per_block: usize) -> Option<usize> {
if bytes_per_block == 0 {
return None;
}
self.num_blocks.or_else(|| {
self.cache_size_gb.map(|gb| {
// Convert GB to bytes and divide by block size
((gb * 1_000_000_000.0) / bytes_per_block as f64) as usize
})
})
}
/// Check if disk cache is enabled (has any configuration).
pub fn is_enabled(&self) -> bool {
self.num_blocks.is_some() || self.cache_size_gb.is_some()
}
}
/// Top-level cache configuration.
///
/// Groups host (G2) and disk (G3) cache configurations together,
/// plus the parallelism mode for distributed workers.
///
/// Use Figment profiles to configure different cache settings for leader vs worker.
#[derive(Debug, Clone, Default, Serialize, Deserialize, Validate)]
pub struct CacheConfig {
/// Host cache (G2 tier) - pinned CPU memory.
#[serde(default)]
#[validate(nested)]
pub host: HostCacheConfig,
/// Disk cache (G3 tier) - persistent storage.
/// Optional - only configure if disk caching is needed.
#[validate(nested)]
pub disk: Option<DiskCacheConfig>,
/// Parallelism mode for distributed workers.
///
/// - `TensorParallel` (default): Each worker has a shard of each KV block
/// - `ReplicatedData`: Only rank 0 has G2/G3; data is broadcast on load
///
/// Can be set via env var: `KVBM_CACHE_PARALLELISM=tensor_parallel|replicated_data`
#[serde(default)]
pub parallelism: ParallelismMode,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_host_cache_default() {
let config = HostCacheConfig::default();
assert!(config.cache_size_gb.is_none());
assert!(config.num_blocks.is_none());
assert!(!config.is_enabled());
}
#[test]
fn test_host_cache_explicit_blocks() {
let config = HostCacheConfig {
num_blocks: Some(1000),
cache_size_gb: Some(10.0), // Should be ignored
};
// With 1MB blocks, explicit num_blocks takes priority
let bytes_per_block = 1_000_000;
assert_eq!(config.compute_num_blocks(bytes_per_block), Some(1000));
assert!(config.is_enabled());
}
#[test]
fn test_host_cache_from_size_gb() {
let config = HostCacheConfig {
num_blocks: None,
cache_size_gb: Some(10.0), // 10 GB
};
// With 1MB blocks: 10GB / 1MB = 10,000 blocks
let bytes_per_block = 1_000_000;
assert_eq!(config.compute_num_blocks(bytes_per_block), Some(10_000));
assert!(config.is_enabled());
}
#[test]
fn test_disk_cache_default() {
let config = DiskCacheConfig::default();
assert!(config.cache_size_gb.is_none());
assert!(config.num_blocks.is_none());
assert!(!config.use_gds);
assert!(config.storage_path.is_none());
assert!(!config.is_enabled());
}
#[test]
fn test_disk_cache_with_gds() {
let config = DiskCacheConfig {
num_blocks: Some(5000),
cache_size_gb: None,
use_gds: true,
storage_path: Some(PathBuf::from("/mnt/nvme/kv_cache")),
};
assert!(config.use_gds);
assert_eq!(
config.storage_path,
Some(PathBuf::from("/mnt/nvme/kv_cache"))
);
assert!(config.is_enabled());
}
#[test]
fn test_parallelism_mode_default() {
let mode = ParallelismMode::default();
assert_eq!(mode, ParallelismMode::TensorParallel);
}
#[test]
fn test_parallelism_mode_serde() {
// Test serialization
let tp = ParallelismMode::TensorParallel;
let json = serde_json::to_string(&tp).unwrap();
assert_eq!(json, "\"tensor_parallel\"");
let rd = ParallelismMode::ReplicatedData;
let json = serde_json::to_string(&rd).unwrap();
assert_eq!(json, "\"replicated_data\"");
// Test deserialization
let mode: ParallelismMode = serde_json::from_str("\"tensor_parallel\"").unwrap();
assert_eq!(mode, ParallelismMode::TensorParallel);
let mode: ParallelismMode = serde_json::from_str("\"replicated_data\"").unwrap();
assert_eq!(mode, ParallelismMode::ReplicatedData);
}
#[test]
fn test_cache_config_with_parallelism() {
let config = CacheConfig {
host: HostCacheConfig::default(),
disk: None,
parallelism: ParallelismMode::ReplicatedData,
};
assert_eq!(config.parallelism, ParallelismMode::ReplicatedData);
}
#[test]
fn test_cache_config_default_parallelism() {
let config = CacheConfig::default();
assert_eq!(config.parallelism, ParallelismMode::TensorParallel);
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Discovery configuration for Nova peer discovery.
//!
//! Supports three discovery backends:
//! - **Etcd**: Centralized discovery using etcd key-value store
//! - **P2P**: Decentralized discovery using libp2p DHT
//! - **Filesystem**: File-based discovery for development/testing
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
use validator::Validate;
/// Discovery configuration - only one type can be active at a time.
///
/// # JSON Configuration Examples
///
/// ## Etcd Discovery
/// ```json
/// {
/// "type": "etcd",
/// "cluster_id": "my-cluster",
/// "endpoints": ["http://etcd1:2379", "http://etcd2:2379"],
/// "ttl_secs": 60
/// }
/// ```
///
/// ## P2P Discovery
/// ```json
/// {
/// "type": "p2p",
/// "cluster_id": "my-cluster",
/// "listen_port": 0,
/// "bootstrap_peers": ["192.168.1.10:4001"],
/// "enable_mdns": true
/// }
/// ```
///
/// ## Filesystem Discovery
/// ```json
/// {
/// "type": "filesystem",
/// "path": "/tmp/discovery.json"
/// }
/// ```
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "lowercase")]
pub enum DiscoveryConfig {
/// Etcd-based discovery (centralized).
Etcd(EtcdDiscoveryConfig),
/// P2P discovery using libp2p DHT (decentralized).
P2p(P2pDiscoveryConfig),
/// Filesystem-based discovery (for dev/testing).
Filesystem(FilesystemDiscoveryConfig),
}
/// Etcd discovery configuration.
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
pub struct EtcdDiscoveryConfig {
/// Cluster ID / key prefix for discovery (required).
pub cluster_id: String,
/// Etcd endpoints (default: ["http://localhost:2379"]).
#[serde(default = "default_etcd_endpoints")]
pub endpoints: Vec<String>,
/// Lease TTL in seconds (default: 60, range: 10-600).
#[serde(default = "default_etcd_ttl")]
#[validate(range(min = 10, max = 600))]
pub ttl_secs: u64,
/// Operation timeout in seconds (default: 30).
#[serde(default = "default_operation_timeout")]
pub operation_timeout_secs: u64,
/// Max retries for operations (default: 3).
#[serde(default = "default_max_retries")]
#[validate(range(min = 0, max = 10))]
pub max_retries: u32,
}
/// P2P discovery configuration.
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
pub struct P2pDiscoveryConfig {
/// Cluster ID / swarm key (required).
pub cluster_id: String,
/// Listen port (default: 0 = OS-assigned).
#[serde(default)]
pub listen_port: u16,
/// Bootstrap peer addresses.
#[serde(default)]
pub bootstrap_peers: Vec<String>,
/// DHT replication factor (default: 3).
#[serde(default = "default_replication_factor")]
pub replication_factor: usize,
/// Enable mDNS for local network discovery (default: false).
#[serde(default)]
pub enable_mdns: bool,
/// Record TTL in seconds (default: 600).
#[serde(default = "default_record_ttl")]
pub record_ttl_secs: u64,
}
/// Filesystem discovery configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FilesystemDiscoveryConfig {
/// Path to the discovery JSON file.
pub path: PathBuf,
}
fn default_etcd_endpoints() -> Vec<String> {
vec!["http://localhost:2379".to_string()]
}
fn default_etcd_ttl() -> u64 {
60
}
fn default_operation_timeout() -> u64 {
30
}
fn default_max_retries() -> u32 {
3
}
fn default_replication_factor() -> usize {
3
}
fn default_record_ttl() -> u64 {
600
}
impl Default for EtcdDiscoveryConfig {
fn default() -> Self {
Self {
cluster_id: String::new(),
endpoints: default_etcd_endpoints(),
ttl_secs: default_etcd_ttl(),
operation_timeout_secs: default_operation_timeout(),
max_retries: default_max_retries(),
}
}
}
impl Default for P2pDiscoveryConfig {
fn default() -> Self {
Self {
cluster_id: String::new(),
listen_port: 0,
bootstrap_peers: Vec::new(),
replication_factor: default_replication_factor(),
enable_mdns: false,
record_ttl_secs: default_record_ttl(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_deserialize_etcd_config() {
let json = r#"{
"type": "etcd",
"cluster_id": "test-cluster",
"endpoints": ["http://etcd1:2379"],
"ttl_secs": 120
}"#;
let config: DiscoveryConfig = serde_json::from_str(json).unwrap();
match config {
DiscoveryConfig::Etcd(etcd) => {
assert_eq!(etcd.cluster_id, "test-cluster");
assert_eq!(etcd.endpoints, vec!["http://etcd1:2379"]);
assert_eq!(etcd.ttl_secs, 120);
assert_eq!(etcd.operation_timeout_secs, 30); // default
assert_eq!(etcd.max_retries, 3); // default
}
_ => panic!("Expected Etcd config"),
}
}
#[test]
fn test_deserialize_p2p_config() {
let json = r#"{
"type": "p2p",
"cluster_id": "test-cluster",
"listen_port": 4001,
"bootstrap_peers": ["192.168.1.10:4001"],
"enable_mdns": true
}"#;
let config: DiscoveryConfig = serde_json::from_str(json).unwrap();
match config {
DiscoveryConfig::P2p(p2p) => {
assert_eq!(p2p.cluster_id, "test-cluster");
assert_eq!(p2p.listen_port, 4001);
assert_eq!(p2p.bootstrap_peers, vec!["192.168.1.10:4001"]);
assert!(p2p.enable_mdns);
assert_eq!(p2p.replication_factor, 3); // default
assert_eq!(p2p.record_ttl_secs, 600); // default
}
_ => panic!("Expected P2p config"),
}
}
#[test]
fn test_deserialize_filesystem_config() {
let json = r#"{
"type": "filesystem",
"path": "/tmp/discovery.json"
}"#;
let config: DiscoveryConfig = serde_json::from_str(json).unwrap();
match config {
DiscoveryConfig::Filesystem(fs) => {
assert_eq!(fs.path, PathBuf::from("/tmp/discovery.json"));
}
_ => panic!("Expected Filesystem config"),
}
}
#[test]
fn test_serialize_etcd_config() {
let config = DiscoveryConfig::Etcd(EtcdDiscoveryConfig {
cluster_id: "my-cluster".to_string(),
endpoints: vec!["http://localhost:2379".to_string()],
ttl_secs: 60,
operation_timeout_secs: 30,
max_retries: 3,
});
let json = serde_json::to_string(&config).unwrap();
assert!(json.contains(r#""type":"etcd""#));
assert!(json.contains(r#""cluster_id":"my-cluster""#));
}
#[test]
fn test_etcd_default() {
let config = EtcdDiscoveryConfig::default();
assert!(config.cluster_id.is_empty());
assert_eq!(config.endpoints, vec!["http://localhost:2379"]);
assert_eq!(config.ttl_secs, 60);
assert_eq!(config.operation_timeout_secs, 30);
assert_eq!(config.max_retries, 3);
}
#[test]
fn test_p2p_default() {
let config = P2pDiscoveryConfig::default();
assert!(config.cluster_id.is_empty());
assert_eq!(config.listen_port, 0);
assert!(config.bootstrap_peers.is_empty());
assert_eq!(config.replication_factor, 3);
assert!(!config.enable_mdns);
assert_eq!(config.record_ttl_secs, 600);
}
}
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Event publishing configuration for KV cache coordination.
//!
//! This module defines the configuration for the event publishing pipeline
//! that broadcasts block registration/removal events to distributed consumers
//! (e.g., KvbmHub for radix tree maintenance).
use serde::{Deserialize, Serialize};
use validator::Validate;
/// Configuration for event publishing.
///
/// Events are broadcast when blocks are registered or removed from the cache.
/// The pipeline batches events for efficient wire transmission.
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
pub struct EventsConfig {
/// Whether event publishing is enabled.
///
/// When disabled, no events are emitted and no publisher is started.
/// Default: false
#[serde(default)]
pub enabled: bool,
/// Batching configuration for the event pipeline.
#[serde(default)]
#[validate(nested)]
pub batching: BatchingConfig,
/// Broadcast channel capacity for the EventsManager.
///
/// This determines how many events can be buffered before slow
/// subscribers start lagging. Default: 1024
#[serde(default = "default_channel_capacity")]
#[validate(range(min = 16, max = 65536))]
pub channel_capacity: usize,
/// Subject/topic pattern for publishing events.
///
/// This is the NATS/messaging subject where events are published.
/// Default: "kvbm.events"
#[serde(default = "default_subject")]
pub subject: String,
/// Event emission policy.
///
/// Determines which blocks trigger events:
/// - `power_of_two`: Only emit for blocks at power-of-2 positions (default)
/// - `all`: Emit for all blocks (testing/debugging)
#[serde(default)]
pub policy: EventPolicyConfig,
}
impl Default for EventsConfig {
fn default() -> Self {
Self {
enabled: false,
batching: BatchingConfig::default(),
channel_capacity: default_channel_capacity(),
subject: default_subject(),
policy: EventPolicyConfig::default(),
}
}
}
fn default_channel_capacity() -> usize {
1024
}
fn default_subject() -> String {
"kvbm.events".to_string()
}
/// Batching configuration for the event pipeline.
///
/// Events are batched before publishing to reduce wire traffic.
/// Batches are flushed when:
/// - The window duration expires
/// - The max batch size is reached
/// - The event type switches (Create -> Remove or vice versa)
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
pub struct BatchingConfig {
/// Maximum time to wait before flushing a batch (in milliseconds).
///
/// Default: 10ms
#[serde(default = "default_window_duration_ms")]
#[validate(range(min = 1, max = 10000))]
pub window_duration_ms: u64,
/// Maximum number of events in a batch before flushing.
///
/// Default: 1024
#[serde(default = "default_max_batch_size")]
#[validate(range(min = 1, max = 65536))]
pub max_batch_size: usize,
}
impl Default for BatchingConfig {
fn default() -> Self {
Self {
window_duration_ms: default_window_duration_ms(),
max_batch_size: default_max_batch_size(),
}
}
}
fn default_window_duration_ms() -> u64 {
10
}
fn default_max_batch_size() -> usize {
1024
}
/// Event emission policy configuration.
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum EventPolicyConfig {
/// Emit events only for blocks at power-of-2 positions (default).
///
/// This creates sparse sampling at positions 16, 32, 64, ..., 65536
/// for efficient radix tree construction without tracking every block.
#[default]
PowerOfTwo,
/// Emit events for all blocks.
///
/// Useful for testing or when complete block tracking is needed.
All,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = EventsConfig::default();
assert!(!config.enabled);
assert_eq!(config.batching.window_duration_ms, 10);
assert_eq!(config.batching.max_batch_size, 1024);
assert_eq!(config.channel_capacity, 1024);
assert_eq!(config.subject, "kvbm.events");
assert_eq!(config.policy, EventPolicyConfig::PowerOfTwo);
}
#[test]
fn test_serde_roundtrip() {
let json = r#"{
"enabled": true,
"batching": {
"window_duration_ms": 50,
"max_batch_size": 512
},
"channel_capacity": 2048,
"subject": "my.events",
"policy": "all"
}"#;
let config: EventsConfig = serde_json::from_str(json).unwrap();
assert!(config.enabled);
assert_eq!(config.batching.window_duration_ms, 50);
assert_eq!(config.batching.max_batch_size, 512);
assert_eq!(config.channel_capacity, 2048);
assert_eq!(config.subject, "my.events");
assert_eq!(config.policy, EventPolicyConfig::All);
// Roundtrip
let serialized = serde_json::to_string(&config).unwrap();
let deserialized: EventsConfig = serde_json::from_str(&serialized).unwrap();
assert_eq!(deserialized.enabled, config.enabled);
assert_eq!(deserialized.policy, config.policy);
}
#[test]
fn test_empty_json_uses_defaults() {
let json = r#"{}"#;
let config: EventsConfig = serde_json::from_str(json).unwrap();
assert!(!config.enabled);
assert_eq!(config.batching.window_duration_ms, 10);
}
#[test]
fn test_partial_config() {
// Only override enabled, everything else uses defaults
let json = r#"{"enabled": true}"#;
let config: EventsConfig = serde_json::from_str(json).unwrap();
assert!(config.enabled);
assert_eq!(config.batching.window_duration_ms, 10);
assert_eq!(config.channel_capacity, 1024);
}
#[test]
fn test_validation() {
let config = EventsConfig {
enabled: true,
batching: BatchingConfig {
window_duration_ms: 10,
max_batch_size: 1024,
},
channel_capacity: 1024,
subject: "test".to_string(),
policy: EventPolicyConfig::PowerOfTwo,
};
assert!(config.validate().is_ok());
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! KVBM Configuration Library
//!
//! Provides centralized configuration for Tokio, Rayon, Messenger, and NixL runtimes.
//! Supports role-specific configuration for leader and worker components.
mod cache;
mod discovery;
mod events;
mod messenger;
mod nixl;
mod object;
mod offload;
mod onboard;
mod rayon;
mod tokio;
pub use cache::{CacheConfig, DiskCacheConfig, HostCacheConfig, ParallelismMode};
pub use discovery::{
DiscoveryConfig, EtcdDiscoveryConfig, FilesystemDiscoveryConfig, P2pDiscoveryConfig,
};
pub use events::{BatchingConfig as EventsBatchingConfig, EventPolicyConfig, EventsConfig};
pub use messenger::{MessengerBackendConfig, MessengerConfig};
pub use nixl::NixlConfig;
pub use object::{NixlObjectConfig, ObjectClientConfig, ObjectConfig, S3ObjectConfig};
pub use offload::{
OffloadConfig, PolicyType, PresenceFilterConfig, PresenceLfuFilterConfig, TierOffloadConfig,
};
pub use onboard::{OnboardConfig, OnboardMode};
pub use rayon::RayonConfig;
pub use tokio::TokioConfig;
use figment::{
Figment, Metadata, Profile, Provider,
providers::{Env, Format, Json, Serialized, Toml},
value::{Dict, Map},
};
use serde::{Deserialize, Serialize};
use thiserror::Error;
use validator::{Validate, ValidationErrors};
/// Configuration errors
#[derive(Debug, Error)]
pub enum ConfigError {
#[error("Failed to extract configuration: {0}")]
Extraction(#[from] Box<figment::Error>),
#[error("Configuration validation failed: {0}")]
Validation(#[from] ValidationErrors),
#[error("Configuration error: {0}")]
Other(#[from] anyhow::Error),
}
/// Top-level KVBM configuration.
///
/// Use Figment profiles to configure role-specific settings. For example,
/// leader and worker can have different `tokio.worker_threads` values by
/// putting them under `"leader"` and `"worker"` profile keys in JSON.
#[derive(Debug, Clone, Default, Serialize, Deserialize, Validate)]
pub struct KvbmConfig {
#[validate(nested)]
pub tokio: TokioConfig,
#[validate(nested)]
pub rayon: RayonConfig,
#[validate(nested)]
pub messenger: MessengerConfig,
/// NixL configuration. None = NixL disabled.
#[validate(nested)]
#[serde(default)]
pub nixl: Option<NixlConfig>,
/// Cache configuration (host G2 tier and disk G3 tier).
#[validate(nested)]
#[serde(default)]
pub cache: CacheConfig,
/// Offload policy configuration (G1→G2, G2→G3 transitions).
#[validate(nested)]
#[serde(default)]
pub offload: OffloadConfig,
/// Onboard configuration (G2→G1 loading strategy).
#[serde(default)]
pub onboard: OnboardConfig,
/// Object storage configuration (G4 tier).
/// None = object storage disabled.
#[validate(nested)]
#[serde(default)]
pub object: Option<ObjectConfig>,
/// Event publishing configuration for distributed coordination.
#[validate(nested)]
#[serde(default)]
pub events: EventsConfig,
}
impl KvbmConfig {
/// Create a Figment configuration with all sources merged.
///
/// Configuration sources in priority order (lowest to highest):
/// 1. Code defaults
/// 2. System config file at /opt/dynamo/etc/kvbm.toml
/// 3. TOML file from KVBM_CONFIG_PATH environment variable
/// 4. Environment variables (KVBM_* prefixed)
pub fn figment() -> Figment {
let config_path = std::env::var("KVBM_CONFIG_PATH").unwrap_or_default();
Figment::new()
.merge(Serialized::defaults(KvbmConfig::default()))
.merge(Toml::file("/opt/dynamo/etc/kvbm.toml"))
.merge(Toml::file(&config_path))
// Tokio config: KVBM_TOKIO_WORKER_THREADS, KVBM_TOKIO_MAX_BLOCKING_THREADS
.merge(
Env::prefixed("KVBM_TOKIO_")
.map(|k| format!("tokio.{}", k.as_str().to_lowercase()).into()),
)
// Rayon config: KVBM_RAYON_NUM_THREADS
.merge(
Env::prefixed("KVBM_RAYON_")
.map(|k| format!("rayon.{}", k.as_str().to_lowercase()).into()),
)
// Messenger backend config: KVBM_MESSENGER_BACKEND_TCP_ADDR, etc.
.merge(
Env::prefixed("KVBM_MESSENGER_BACKEND_")
.map(|k| format!("messenger.backend.{}", k.as_str().to_lowercase()).into()),
)
// Messenger discovery config: KVBM_MESSENGER_DISCOVERY_CLUSTER_ID, etc.
.merge(
Env::prefixed("KVBM_MESSENGER_DISCOVERY_")
.map(|k| format!("messenger.discovery.{}", k.as_str().to_lowercase()).into()),
)
// NixL config: KVBM_NIXL_BACKENDS (comma-separated list)
.merge(
Env::prefixed("KVBM_NIXL_")
.map(|k| format!("nixl.{}", k.as_str().to_lowercase()).into()),
)
// Cache host config: KVBM_CACHE_HOST_SIZE_GB, KVBM_CACHE_HOST_NUM_BLOCKS
.merge(
Env::prefixed("KVBM_CACHE_HOST_")
.map(|k| format!("cache.host.{}", k.as_str().to_lowercase()).into()),
)
// Cache disk config: KVBM_CACHE_DISK_SIZE_GB, KVBM_CACHE_DISK_NUM_BLOCKS, etc.
.merge(
Env::prefixed("KVBM_CACHE_DISK_")
.map(|k| format!("cache.disk.{}", k.as_str().to_lowercase()).into()),
)
// Cache parallelism mode: KVBM_CACHE_PARALLELISM=tensor_parallel|replicated_data
.merge(Env::prefixed("KVBM_CACHE_PARALLELISM").map(|_| "cache.parallelism".into()))
// Events config: KVBM_EVENTS_ENABLED, KVBM_EVENTS_SUBJECT
.merge(
Env::prefixed("KVBM_EVENTS_")
.map(|k| format!("events.{}", k.as_str().to_lowercase()).into()),
)
// Events batching config: KVBM_EVENTS_BATCHING_WINDOW_DURATION_MS, etc.
.merge(
Env::prefixed("KVBM_EVENTS_BATCHING_")
.map(|k| format!("events.batching.{}", k.as_str().to_lowercase()).into()),
)
}
/// Load configuration from default figment (env and files).
pub fn from_env() -> Result<Self, ConfigError> {
Self::extract_from(Self::figment())
}
/// Extract configuration from any provider.
///
/// Use this to load config from custom sources or to add programmatic overrides.
///
/// # Example
/// ```rust,ignore
/// // Merge tuple pairs for programmatic overrides (figment best practice)
/// let config = KvbmConfig::extract_from(
/// KvbmConfig::figment()
/// .merge(("messenger.backend.tcp_port", 8080u16))
/// .merge(("tokio.worker_threads", 4usize))
/// )?;
/// ```
pub fn extract_from<T: Provider>(provider: T) -> Result<Self, ConfigError> {
let config: Self = Figment::from(provider)
.extract()
.map_err(|e| ConfigError::Extraction(Box::new(e)))?;
config.validate()?;
Ok(config)
}
/// Build a figment from defaults, then merge a custom provider.
///
/// Convenience method for adding programmatic overrides with highest priority.
///
/// # Example
/// ```rust,ignore
/// let figment = KvbmConfig::figment_with(("messenger.backend.tcp_port", 8080u16));
/// let config = KvbmConfig::extract_from(figment)?;
/// ```
pub fn figment_with<T: Provider>(extra: T) -> Figment {
Self::figment().merge(extra)
}
/// Load configuration merging JSON overrides from Python.
///
/// JSON has highest priority - overrides env vars, TOML files, and defaults.
/// This is the primary entrypoint for vLLM's `kv_connector_extra_config` dict.
///
/// # Example
/// ```rust,ignore
/// let json = r#"{"tokio": {"worker_threads": 8}, "messenger": {"backend": {"tcp_port": 9000}}}"#;
/// let config = KvbmConfig::from_figment_with_json(json)?;
/// ```
pub fn from_figment_with_json(json: &str) -> Result<Self, ConfigError> {
Self::extract_from(Self::figment().merge(Json::string(json)))
}
// ==================== Profile-based Configuration ====================
//
// Figment profiles allow role-specific configuration. The `profile` key
// in TOML/JSON is special - values under it are stored in named profiles
// and overlaid when that profile is selected.
//
// Example JSON:
// {
// "tokio": {"worker_threads": 4}, // default profile (all roles)
// "profile": {
// "leader": {"tokio": {"worker_threads": 2}}, // leader-only overlay
// "worker": {"tokio": {"worker_threads": 8}} // worker-only overlay
// }
// }
//
// When `build_leader()` selects "leader" profile:
// - tokio.worker_threads = 2 (from leader profile overlay)
//
// When `build_worker()` selects "worker" profile:
// - tokio.worker_threads = 8 (from worker profile overlay)
/// Figment with leader profile selected.
///
/// This merges `profile.leader.*` values over the defaults.
/// If no `profile.leader` section exists, defaults are used.
pub fn figment_for_leader() -> Figment {
Self::figment().select(Profile::new("leader"))
}
/// Figment with worker profile selected.
///
/// This merges `profile.worker.*` values over the defaults.
/// If no `profile.worker` section exists, defaults are used.
pub fn figment_for_worker() -> Figment {
Self::figment().select(Profile::new("worker"))
}
/// Load leader config from env/files with leader profile selected.
pub fn from_env_for_leader() -> Result<Self, ConfigError> {
Self::extract_from(Self::figment_for_leader())
}
/// Load worker config from env/files with worker profile selected.
pub fn from_env_for_worker() -> Result<Self, ConfigError> {
Self::extract_from(Self::figment_for_worker())
}
/// Load leader config with JSON overrides and leader profile selected.
///
/// JSON top-level keys are treated as profile names when using `.nested()`.
/// Keys under `default` apply to all profiles, keys under `leader` only to leader.
///
/// Example JSON:
/// ```json
/// {
/// "leader": {
/// "cache": {"host": {"cache_size_gb": 1.0}},
/// "tokio": {"worker_threads": 2}
/// }
/// }
/// ```
pub fn from_figment_with_json_for_leader(json: &str) -> Result<Self, ConfigError> {
Self::extract_from(Self::figment_for_leader().merge(Json::string(json).nested()))
}
/// Load worker config with JSON overrides and worker profile selected.
///
/// JSON top-level keys are treated as profile names when using `.nested()`.
/// Keys under `default` apply to all profiles, keys under `worker` only to worker.
///
/// Example JSON:
/// ```json
/// {
/// "worker": {"tokio": {"worker_threads": 8}}
/// }
/// ```
pub fn from_figment_with_json_for_worker(json: &str) -> Result<Self, ConfigError> {
Self::extract_from(Self::figment_for_worker().merge(Json::string(json).nested()))
}
}
/// Implement Provider trait for KvbmConfig.
///
/// This allows KvbmConfig to be used as a configuration source itself,
/// enabling composition with other providers. Dependent libraries can
/// extract their own config from the same Figment.
impl Provider for KvbmConfig {
fn metadata(&self) -> Metadata {
Metadata::named("KvbmConfig")
}
fn data(&self) -> Result<Map<Profile, Dict>, figment::Error> {
Serialized::defaults(self).data()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = KvbmConfig::default();
// TokioConfig defaults to 1 worker thread
assert_eq!(config.tokio.worker_threads, Some(1));
assert!(config.tokio.max_blocking_threads.is_none());
assert!(config.rayon.num_threads.is_none());
}
#[test]
fn test_figment_defaults() {
temp_env::with_vars_unset(
vec![
"KVBM_CONFIG_PATH",
"KVBM_TOKIO_WORKER_THREADS",
"KVBM_RAYON_NUM_THREADS",
"KVBM_MESSENGER_BACKEND_TCP_ADDR",
"KVBM_MESSENGER_DISCOVERY_CLUSTER_ID",
],
|| {
let figment = KvbmConfig::figment();
let config: KvbmConfig = figment.extract().unwrap();
// TokioConfig defaults to 1 worker thread
assert_eq!(config.tokio.worker_threads, Some(1));
},
);
}
#[test]
fn test_env_override_tokio() {
temp_env::with_vars(
vec![
("KVBM_TOKIO_WORKER_THREADS", Some("2")),
("KVBM_TOKIO_MAX_BLOCKING_THREADS", Some("32")),
],
|| {
let figment = KvbmConfig::figment();
let config: KvbmConfig = figment.extract().unwrap();
assert_eq!(config.tokio.worker_threads, Some(2));
assert_eq!(config.tokio.max_blocking_threads, Some(32));
},
);
}
#[test]
fn test_extract_from_with_tuple_override() {
temp_env::with_vars_unset(
vec![
"KVBM_CONFIG_PATH",
"KVBM_TOKIO_WORKER_THREADS",
"KVBM_MESSENGER_BACKEND_TCP_PORT",
],
|| {
// Use tuple pair for programmatic override (figment best practice)
let figment = KvbmConfig::figment()
.merge(("tokio.worker_threads", 2usize))
.merge(("messenger.backend.tcp_port", 9090u16));
let config = KvbmConfig::extract_from(figment).unwrap();
assert_eq!(config.tokio.worker_threads, Some(2));
assert_eq!(config.messenger.backend.tcp_port, 9090);
},
);
}
#[test]
fn test_figment_with_helper() {
temp_env::with_vars_unset(vec!["KVBM_CONFIG_PATH", "KVBM_RAYON_NUM_THREADS"], || {
let figment = KvbmConfig::figment_with(("rayon.num_threads", 8usize));
let config = KvbmConfig::extract_from(figment).unwrap();
assert_eq!(config.rayon.num_threads, Some(8));
});
}
#[test]
fn test_config_as_provider() {
// KvbmConfig implements Provider, so it can be used as a source
let original = KvbmConfig {
tokio: TokioConfig {
worker_threads: Some(4),
max_blocking_threads: Some(128),
},
..Default::default()
};
// Use the config as a provider to create a new figment
let figment = Figment::from(&original);
let extracted: KvbmConfig = figment.extract().unwrap();
assert_eq!(extracted.tokio.worker_threads, Some(4));
assert_eq!(extracted.tokio.max_blocking_threads, Some(128));
}
#[test]
fn test_from_figment_with_json() {
temp_env::with_vars_unset(
vec![
"KVBM_CONFIG_PATH",
"KVBM_TOKIO_WORKER_THREADS",
"KVBM_MESSENGER_BACKEND_TCP_PORT",
],
|| {
let json = r#"{"tokio": {"worker_threads": 2}, "messenger": {"backend": {"tcp_port": 9090}}}"#;
let config = KvbmConfig::from_figment_with_json(json).unwrap();
assert_eq!(config.tokio.worker_threads, Some(2));
assert_eq!(config.messenger.backend.tcp_port, 9090);
},
);
}
#[test]
fn test_from_figment_with_json_overrides_env() {
// JSON should override env vars (highest priority)
temp_env::with_vars(vec![("KVBM_TOKIO_WORKER_THREADS", Some("1"))], || {
let json = r#"{"tokio": {"worker_threads": 2}}"#;
let config = KvbmConfig::from_figment_with_json(json).unwrap();
// JSON (2) should override env var (1)
assert_eq!(config.tokio.worker_threads, Some(2));
});
}
#[test]
fn test_from_figment_with_empty_json() {
// Empty JSON object should not cause errors and should use env/defaults
// We just verify it doesn't fail - the actual values depend on environment
let config = KvbmConfig::from_figment_with_json("{}");
assert!(config.is_ok(), "Empty JSON should not cause errors");
}
// ==================== Profile Selection Tests ====================
//
// Figment profiles work with `.nested()` JSON provider - top-level keys
// become profile names. Use "default" for values that apply to all profiles.
#[test]
fn test_profile_selection_leader_vs_worker() {
// Test that leader and worker profiles get different values
// JSON top-level keys are profile names when using .nested()
temp_env::with_vars_unset(
vec!["KVBM_CONFIG_PATH", "KVBM_TOKIO_WORKER_THREADS"],
|| {
// JSON with nested profiles - top-level keys are profile names
let json = r#"{
"default": {"tokio": {"worker_threads": 4}},
"leader": {"tokio": {"worker_threads": 2}},
"worker": {"tokio": {"worker_threads": 8}}
}"#;
// Leader should get 2 threads (from leader profile)
let leader_config = KvbmConfig::from_figment_with_json_for_leader(json).unwrap();
assert_eq!(
leader_config.tokio.worker_threads,
Some(2),
"Leader should get leader profile's tokio.worker_threads"
);
// Worker should get 8 threads (from worker profile)
let worker_config = KvbmConfig::from_figment_with_json_for_worker(json).unwrap();
assert_eq!(
worker_config.tokio.worker_threads,
Some(8),
"Worker should get worker profile's tokio.worker_threads"
);
},
);
}
#[test]
fn test_profile_no_override_uses_default() {
// When no profile-specific section exists, default profile values are used
temp_env::with_vars_unset(
vec!["KVBM_CONFIG_PATH", "KVBM_TOKIO_WORKER_THREADS"],
|| {
// JSON with only default profile
let json = r#"{"default": {"tokio": {"worker_threads": 4}}}"#;
// Both leader and worker should get the default (4)
let leader_config = KvbmConfig::from_figment_with_json_for_leader(json).unwrap();
assert_eq!(
leader_config.tokio.worker_threads,
Some(4),
"Leader should use default when no leader profile exists"
);
let worker_config = KvbmConfig::from_figment_with_json_for_worker(json).unwrap();
assert_eq!(
worker_config.tokio.worker_threads,
Some(4),
"Worker should use default when no worker profile exists"
);
},
);
}
#[test]
fn test_profile_with_defaults_and_overlay() {
// Test that default profile values apply to all roles, profile-specific overlay on top
temp_env::with_vars_unset(
vec!["KVBM_CONFIG_PATH", "KVBM_TOKIO_WORKER_THREADS"],
|| {
// cache.host in default applies to all profiles
// leader profile adds tokio override
let json = r#"{
"default": {"cache": {"host": {"cache_size_gb": 2.0}}},
"leader": {"tokio": {"worker_threads": 2}}
}"#;
// Leader: gets cache.host from default + tokio from leader profile
let leader_config = KvbmConfig::from_figment_with_json_for_leader(json).unwrap();
assert_eq!(leader_config.cache.host.cache_size_gb, Some(2.0));
assert_eq!(leader_config.tokio.worker_threads, Some(2));
// Worker: gets cache.host from default, uses default tokio (not leader's override)
let worker_config = KvbmConfig::from_figment_with_json_for_worker(json).unwrap();
assert_eq!(worker_config.cache.host.cache_size_gb, Some(2.0));
// Worker gets default tokio.worker_threads (1), NOT leader's override (2)
assert_eq!(
worker_config.tokio.worker_threads,
Some(1),
"Worker should get default tokio, not leader's override"
);
},
);
}
#[test]
fn test_from_env_for_leader_and_worker() {
// Test from_env_for_leader and from_env_for_worker work without error
temp_env::with_vars_unset(
vec!["KVBM_CONFIG_PATH", "KVBM_TOKIO_WORKER_THREADS"],
|| {
// Both should succeed with default values
let leader_config = KvbmConfig::from_env_for_leader();
assert!(leader_config.is_ok(), "from_env_for_leader should succeed");
let worker_config = KvbmConfig::from_env_for_worker();
assert!(worker_config.is_ok(), "from_env_for_worker should succeed");
},
);
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Messenger transport and discovery configuration.
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use anyhow::{Context, Result, bail};
use serde::{Deserialize, Serialize};
use validator::Validate;
use crate::discovery::DiscoveryConfig;
/// Messenger configuration combining backend and discovery settings.
#[derive(Debug, Clone, Default, Serialize, Deserialize, Validate)]
pub struct MessengerConfig {
#[validate(nested)]
pub backend: MessengerBackendConfig,
/// Discovery configuration. None = discovery disabled.
#[serde(default)]
pub discovery: Option<DiscoveryConfig>,
}
impl MessengerConfig {
/// Build a Messenger instance from this configuration.
///
/// This creates:
/// 1. A TCP transport bound to the configured address
/// 2. A discovery backend based on the configured type (if any)
/// 3. A Messenger instance combining both
pub async fn build_messenger(&self) -> Result<std::sync::Arc<velo::Messenger>> {
use std::net::TcpListener;
use std::sync::Arc;
use velo::Messenger;
use velo::backend::tcp::TcpTransportBuilder;
// 1. Build TCP transport
// Pre-bind listener to get OS-assigned port (if port is 0)
let bind_addr = self.backend.resolve_bind_addr()?;
let listener = TcpListener::bind(bind_addr)
.with_context(|| format!("Failed to bind TCP listener to {}", bind_addr))?;
// Extract actual bound address (with real port if OS-assigned)
let actual_addr = listener
.local_addr()
.context("Failed to get local address from listener")?;
tracing::info!("Built TCP transport bound to {}", actual_addr);
// Build transport using from_listener to use the actual port
let tcp_transport = TcpTransportBuilder::new()
.from_listener(listener)?
.build()
.context("Failed to build TCP transport")?;
let tcp_transport = Arc::new(tcp_transport);
// 2. Build discovery backend based on configuration
let mut builder = Messenger::builder().add_transport(tcp_transport);
if let Some(discovery_config) = &self.discovery {
match discovery_config {
DiscoveryConfig::Etcd(_cfg) => {
bail!("Etcd discovery not yet supported in velo");
}
DiscoveryConfig::P2p(_cfg) => {
bail!("P2P discovery not yet supported in velo");
}
DiscoveryConfig::Filesystem(cfg) => {
use velo::discovery::FilesystemPeerDiscovery;
let peer_discovery = FilesystemPeerDiscovery::new(&cfg.path)
.context("Failed to build filesystem discovery")?;
builder = builder.discovery(Arc::new(peer_discovery));
tracing::info!("Built filesystem discovery from: {:?}", cfg.path);
}
}
}
// 3. Build Messenger
let messenger = builder.build().await.context("Failed to build Messenger")?;
Ok(messenger)
}
}
/// Messenger backend (transport) configuration.
#[derive(Debug, Clone, Default, Serialize, Deserialize, Validate)]
pub struct MessengerBackendConfig {
/// IP address to bind (mutually exclusive with tcp_interface).
/// e.g., "0.0.0.0" or "192.168.1.100"
pub tcp_addr: Option<String>,
/// Network interface to bind (mutually exclusive with tcp_addr).
/// e.g., "eth0", "ens192"
pub tcp_interface: Option<String>,
/// TCP port to bind. 0 means OS-assigned (ephemeral port).
#[serde(default)]
pub tcp_port: u16,
}
impl MessengerBackendConfig {
/// Resolve the bind address from either interface name or explicit address.
///
/// Returns error if both tcp_addr and tcp_interface are specified.
pub fn resolve_bind_addr(&self) -> Result<SocketAddr> {
let ip = match (&self.tcp_addr, &self.tcp_interface) {
(Some(_), Some(_)) => {
bail!("tcp_addr and tcp_interface are mutually exclusive")
}
(Some(addr), None) => addr
.parse::<IpAddr>()
.with_context(|| format!("Invalid IP address: {}", addr))?,
(None, Some(iface)) => get_interface_ip(iface)
.with_context(|| format!("Failed to get IP for interface: {}", iface))?,
(None, None) => IpAddr::V4(Ipv4Addr::UNSPECIFIED),
};
Ok(SocketAddr::new(ip, self.tcp_port))
}
}
/// Get the IP address for a network interface.
fn get_interface_ip(interface_name: &str) -> Result<IpAddr> {
use nix::ifaddrs::getifaddrs;
let addrs = getifaddrs().context("Failed to get interface addresses")?;
for ifaddr in addrs {
if ifaddr.interface_name == interface_name
&& let Some(addr) = ifaddr.address
{
// Prefer IPv4 addresses
if let Some(sockaddr) = addr.as_sockaddr_in() {
return Ok(IpAddr::V4(sockaddr.ip()));
}
// Fall back to IPv6 if no IPv4
if let Some(sockaddr) = addr.as_sockaddr_in6() {
return Ok(IpAddr::V6(sockaddr.ip()));
}
}
}
bail!("No IP address found for interface: {}", interface_name)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_backend_config() {
let config = MessengerBackendConfig::default();
assert!(config.tcp_addr.is_none());
assert!(config.tcp_interface.is_none());
assert_eq!(config.tcp_port, 0);
}
#[test]
fn test_resolve_bind_addr_default() {
let config = MessengerBackendConfig::default();
let addr = config.resolve_bind_addr().unwrap();
assert_eq!(addr.ip(), IpAddr::V4(Ipv4Addr::UNSPECIFIED));
assert_eq!(addr.port(), 0);
}
#[test]
fn test_resolve_bind_addr_explicit() {
let config = MessengerBackendConfig {
tcp_addr: Some("192.168.1.100".to_string()),
tcp_interface: None,
tcp_port: 8080,
};
let addr = config.resolve_bind_addr().unwrap();
assert_eq!(addr.ip(), IpAddr::V4(Ipv4Addr::new(192, 168, 1, 100)));
assert_eq!(addr.port(), 8080);
}
#[test]
fn test_resolve_bind_addr_mutual_exclusivity() {
let config = MessengerBackendConfig {
tcp_addr: Some("0.0.0.0".to_string()),
tcp_interface: Some("eth0".to_string()),
tcp_port: 0,
};
let result = config.resolve_bind_addr();
assert!(result.is_err());
assert!(
result
.unwrap_err()
.to_string()
.contains("mutually exclusive")
);
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! NixL backend configuration.
//!
//! Configures which NixL backends (UCX, GDS, etc.) are enabled for RDMA transfers,
//! along with optional parameters for each backend.
use dynamo_memory::nixl::NixlBackendConfig;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use validator::Validate;
/// NixL backend configuration.
///
/// Controls which NixL backends are enabled for RDMA memory transfers
/// and their optional parameters.
///
/// # Backends
///
/// Common backends include:
/// - `UCX` - Unified Communication X (default)
/// - `GDS` - GPUDirect Storage
/// - `GDS_MT` - GPUDirect Storage (multi-threaded)
///
/// All backend names are normalized to uppercase.
///
/// # Configuration
///
/// Each backend can have optional parameters as key-value pairs.
/// If a backend has no parameters, use an empty map.
///
/// ## TOML Example
///
/// ```toml
/// [nixl.backends.UCX]
/// # UCX with default params (empty map)
///
/// [nixl.backends.GDS]
/// threads = "4"
/// buffer_size = "1048576"
/// ```
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
pub struct NixlConfig {
/// Map of backend name (uppercase) -> optional parameters.
///
/// If a backend is present in the map, it's enabled.
/// The inner HashMap contains optional override parameters.
/// An empty inner map means use default parameters.
#[serde(default = "default_backends")]
pub backends: HashMap<String, HashMap<String, String>>,
}
fn default_backends() -> HashMap<String, HashMap<String, String>> {
let mut backends = HashMap::new();
backends.insert("UCX".to_string(), HashMap::new());
backends.insert("POSIX".to_string(), HashMap::new());
backends
}
impl Default for NixlConfig {
fn default() -> Self {
Self {
backends: default_backends(),
}
}
}
impl NixlConfig {
pub fn new(backends: HashMap<String, HashMap<String, String>>) -> Self {
Self { backends }
}
pub fn empty() -> Self {
Self {
backends: HashMap::new(),
}
}
pub fn from_nixl_backend_config(config: NixlBackendConfig) -> Self {
let backends: HashMap<String, HashMap<String, String>> = config
.iter()
.map(|(backend, params)| (backend.to_string(), params.clone()))
.collect();
Self { backends }
}
/// Add a backend with default parameters.
/// Backend name is normalized to uppercase.
pub fn with_backend(mut self, name: impl Into<String>) -> Self {
self.backends
.insert(name.into().to_uppercase(), HashMap::new());
self
}
/// Add a backend with custom parameters.
/// Backend name is normalized to uppercase.
pub fn with_backend_params(
mut self,
name: impl Into<String>,
params: HashMap<String, String>,
) -> Self {
self.backends.insert(name.into().to_uppercase(), params);
self
}
/// Get the list of enabled backend names (uppercase).
pub fn enabled_backends(&self) -> Vec<&String> {
self.backends.keys().collect()
}
/// Check if a specific backend is enabled.
/// Backend name is normalized to uppercase for lookup.
pub fn has_backend(&self, backend: &str) -> bool {
self.backends.contains_key(&backend.to_uppercase())
}
/// Get parameters for a specific backend.
/// Backend name is normalized to uppercase for lookup.
///
/// Returns None if the backend is not enabled.
pub fn backend_params(&self, backend: &str) -> Option<&HashMap<String, String>> {
self.backends.get(&backend.to_uppercase())
}
/// Iterate over all enabled backends and their parameters.
pub fn iter(&self) -> impl Iterator<Item = (&String, &HashMap<String, String>)> {
self.backends.iter()
}
}
impl From<NixlConfig> for NixlBackendConfig {
fn from(config: NixlConfig) -> Self {
NixlBackendConfig::new(config.backends)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = NixlConfig::default();
assert!(config.has_backend("UCX"));
assert!(!config.has_backend("GDS"));
}
#[test]
fn test_new_default() {
let config = NixlConfig::default();
assert!(config.has_backend("UCX"));
assert!(config.has_backend("POSIX"));
assert!(!config.enabled_backends().is_empty());
}
#[test]
fn test_with_backend() {
let config = NixlConfig::empty().with_backend("ucx").with_backend("gds");
assert!(config.has_backend("UCX"));
assert!(config.has_backend("GDS"));
assert!(!config.has_backend("POSIX"));
// Keys are stored uppercase
assert!(config.backends.contains_key("UCX"));
assert!(config.backends.contains_key("GDS"));
}
#[test]
fn test_with_backend_params() {
let mut params = HashMap::new();
params.insert("threads".to_string(), "4".to_string());
params.insert("buffer_size".to_string(), "1048576".to_string());
let config = NixlConfig::empty()
.with_backend("UCX")
.with_backend_params("GDS", params);
// UCX should have empty params
let ucx_params = config.backend_params("UCX").unwrap();
assert!(ucx_params.is_empty());
// GDS should have custom params
let gds_params = config.backend_params("GDS").unwrap();
assert_eq!(gds_params.get("threads"), Some(&"4".to_string()));
assert_eq!(gds_params.get("buffer_size"), Some(&"1048576".to_string()));
}
#[test]
fn test_lookup_normalizes_to_uppercase() {
let config = NixlConfig::empty().with_backend("ucx");
// All lookups normalize to uppercase
assert!(config.has_backend("ucx"));
assert!(config.has_backend("UCX"));
assert!(config.has_backend("Ucx"));
assert!(config.backend_params("ucx").is_some());
assert!(config.backend_params("UCX").is_some());
}
#[test]
fn test_enabled_backends() {
let config = NixlConfig::empty().with_backend("ucx").with_backend("gds");
let backends = config.enabled_backends();
assert_eq!(backends.len(), 2);
assert!(backends.contains(&&"UCX".to_string()));
assert!(backends.contains(&&"GDS".to_string()));
}
#[test]
fn test_iter() {
let mut params = HashMap::new();
params.insert("key".to_string(), "value".to_string());
let config = NixlConfig::empty()
.with_backend("UCX")
.with_backend_params("GDS", params);
let items: Vec<_> = config.iter().collect();
assert_eq!(items.len(), 2);
}
#[test]
fn test_serde_roundtrip() {
let mut params = HashMap::new();
params.insert("threads".to_string(), "4".to_string());
let config = NixlConfig::empty()
.with_backend("UCX")
.with_backend_params("GDS", params);
let json = serde_json::to_string(&config).unwrap();
let parsed: NixlConfig = serde_json::from_str(&json).unwrap();
assert!(parsed.has_backend("UCX"));
assert!(parsed.has_backend("GDS"));
assert_eq!(
parsed.backend_params("GDS").unwrap().get("threads"),
Some(&"4".to_string())
);
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Object storage configuration for KVBM.
//!
//! Defines configuration for object storage backends (S3, NIXL) used for
//! the G4 tier (object storage) in the cache hierarchy.
use serde::{Deserialize, Serialize};
use validator::Validate;
/// Top-level object storage configuration.
///
/// When present, enables object storage operations on workers.
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
pub struct ObjectConfig {
/// Which object client implementation to use.
pub client: ObjectClientConfig,
}
/// Object client implementation selector.
///
/// Determines whether to use direct S3 client or NIXL agent for object storage.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "lowercase")]
pub enum ObjectClientConfig {
/// Direct S3/MinIO client using AWS SDK.
S3(S3ObjectConfig),
/// NIXL agent with object storage backend.
Nixl(NixlObjectConfig),
}
/// S3-compatible object storage configuration.
///
/// Used for both direct S3 access and as a backend for NIXL.
/// Compatible with AWS S3 and S3-compatible services like MinIO.
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
pub struct S3ObjectConfig {
/// Custom endpoint URL for S3-compatible services (e.g., MinIO).
/// If None, uses the default AWS S3 endpoint.
#[serde(default)]
pub endpoint_url: Option<String>,
/// S3 bucket name for storing blocks.
pub bucket: String,
/// AWS region.
#[serde(default = "default_region")]
pub region: String,
/// Use path-style URLs instead of virtual-hosted-style.
/// Required for MinIO and some S3-compatible services.
#[serde(default)]
pub force_path_style: bool,
/// Maximum number of concurrent S3 requests.
#[serde(default = "default_max_concurrent")]
pub max_concurrent_requests: usize,
}
fn default_region() -> String {
"us-east-1".to_string()
}
fn default_max_concurrent() -> usize {
16
}
impl Default for S3ObjectConfig {
fn default() -> Self {
Self {
endpoint_url: None,
bucket: "kvbm-blocks".to_string(),
region: default_region(),
force_path_style: false,
max_concurrent_requests: default_max_concurrent(),
}
}
}
impl S3ObjectConfig {
/// Create configuration for AWS S3.
pub fn aws(bucket: String, region: String) -> Self {
Self {
endpoint_url: None,
bucket,
region,
force_path_style: false,
max_concurrent_requests: default_max_concurrent(),
}
}
/// Create configuration for MinIO or other S3-compatible services.
pub fn minio(endpoint_url: String, bucket: String) -> Self {
Self {
endpoint_url: Some(endpoint_url),
bucket,
region: default_region(),
force_path_style: true,
max_concurrent_requests: default_max_concurrent(),
}
}
}
/// NIXL object storage backend configuration.
///
/// NIXL can use various object storage backends. Each variant
/// specifies the backend type and its configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "backend", rename_all = "lowercase")]
pub enum NixlObjectConfig {
/// S3-compatible backend via NIXL.
S3(S3ObjectConfig),
// Future backends can be added here:
// Gcs(GcsObjectConfig),
// Azure(AzureObjectConfig),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_s3_config_default() {
let config = S3ObjectConfig::default();
assert!(config.endpoint_url.is_none());
assert_eq!(config.bucket, "kvbm-blocks");
assert_eq!(config.region, "us-east-1");
assert!(!config.force_path_style);
assert_eq!(config.max_concurrent_requests, 16);
}
#[test]
fn test_s3_config_aws() {
let config = S3ObjectConfig::aws("my-bucket".into(), "us-west-2".into());
assert!(config.endpoint_url.is_none());
assert_eq!(config.bucket, "my-bucket");
assert_eq!(config.region, "us-west-2");
assert!(!config.force_path_style);
}
#[test]
fn test_s3_config_minio() {
let config = S3ObjectConfig::minio("http://localhost:9000".into(), "test".into());
assert_eq!(config.endpoint_url, Some("http://localhost:9000".into()));
assert_eq!(config.bucket, "test");
assert!(config.force_path_style);
}
#[test]
fn test_object_config_serde_s3() {
let json = r#"{
"client": {
"type": "s3",
"bucket": "my-bucket",
"region": "us-west-2"
}
}"#;
let config: ObjectConfig = serde_json::from_str(json).unwrap();
match config.client {
ObjectClientConfig::S3(s3) => {
assert_eq!(s3.bucket, "my-bucket");
assert_eq!(s3.region, "us-west-2");
}
_ => panic!("Expected S3 config"),
}
}
#[test]
fn test_object_config_serde_nixl_s3() {
let json = r#"{
"client": {
"type": "nixl",
"backend": "s3",
"bucket": "nixl-bucket",
"endpoint_url": "http://minio:9000",
"force_path_style": true
}
}"#;
let config: ObjectConfig = serde_json::from_str(json).unwrap();
match config.client {
ObjectClientConfig::Nixl(NixlObjectConfig::S3(s3)) => {
assert_eq!(s3.bucket, "nixl-bucket");
assert_eq!(s3.endpoint_url, Some("http://minio:9000".into()));
assert!(s3.force_path_style);
}
_ => panic!("Expected Nixl S3 config"),
}
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Offload policy configuration for KVBM.
//!
//! Defines configuration for offload policies that control which blocks
//! are transferred between storage tiers (G1→G2, G2→G3).
//!
//! # Policy Types
//!
//! - `pass_all`: No filtering, all blocks pass
//! - `presence`: Skip blocks already present in destination tier
//! - `presence_lfu`: Presence check + LFU count threshold
//!
//! # Configuration
//!
//! Policies are configured per tier transition. Multiple policies in the
//! `policies` list are applied in order with implicit AND logic (all must pass).
//!
//! ## JSON Example
//!
//! ```json
//! {
//! "offload": {
//! "g1_to_g2": {
//! "policies": ["presence"],
//! "presence": {}
//! },
//! "g2_to_g3": {
//! "policies": ["presence_lfu"],
//! "presence_lfu": { "min_lfu_count": 8 }
//! }
//! }
//! }
//! ```
use serde::{Deserialize, Serialize};
use validator::Validate;
/// Policy type enum for serialization.
///
/// Each variant corresponds to a policy implementation in the kvbm crate.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum PolicyType {
/// PassAllPolicy - no filtering, all blocks pass
PassAll,
/// PresenceFilter - skip blocks already in destination tier
Presence,
/// PresenceAndLFUFilter - presence check + LFU threshold
PresenceLfu,
}
/// Configuration for presence filter.
///
/// Currently has no parameters, but the struct exists for future extensibility
/// and to maintain consistent configuration patterns.
#[derive(Debug, Clone, Default, Serialize, Deserialize, Validate)]
pub struct PresenceFilterConfig {}
/// Default LFU count threshold.
fn default_min_lfu_count() -> u32 {
8
}
/// Configuration for presence + LFU filter.
///
/// Combines presence checking with LFU (Least Frequently Used) count threshold.
/// Only blocks with access count above the threshold are offloaded.
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
pub struct PresenceLfuFilterConfig {
/// Minimum LFU count threshold for offload.
///
/// Blocks must have been accessed more than this many times to be
/// considered for offload. This prevents offloading rarely-used blocks.
///
/// Default: 8
#[serde(default = "default_min_lfu_count")]
#[validate(range(min = 1))]
pub min_lfu_count: u32,
}
impl Default for PresenceLfuFilterConfig {
fn default() -> Self {
Self {
min_lfu_count: default_min_lfu_count(),
}
}
}
/// Configuration for a tier transition (e.g., G1→G2, G2→G3).
///
/// Defines which policies to apply when offloading blocks between tiers.
/// Policies are evaluated in order with implicit AND logic - a block must
/// pass ALL policies to be transferred.
#[derive(Debug, Clone, Default, Serialize, Deserialize, Validate)]
pub struct TierOffloadConfig {
/// Ordered list of policies to apply (implicit AND).
///
/// If empty, tier-specific defaults are applied by the engine.
/// Policies are evaluated in order; a block must pass all to be transferred.
#[serde(default)]
pub policies: Vec<PolicyType>,
/// Presence filter configuration.
///
/// Used when "presence" is in the policies list.
#[serde(default)]
#[validate(nested)]
pub presence: PresenceFilterConfig,
/// Presence + LFU filter configuration.
///
/// Used when "presence_lfu" is in the policies list.
#[serde(default)]
#[validate(nested)]
pub presence_lfu: PresenceLfuFilterConfig,
}
/// Top-level offload configuration.
///
/// Groups policy configurations for each tier transition.
#[derive(Debug, Clone, Default, Serialize, Deserialize, Validate)]
pub struct OffloadConfig {
/// G1 (GPU) → G2 (Host) offload policies.
#[serde(default)]
#[validate(nested)]
pub g1_to_g2: TierOffloadConfig,
/// G2 (Host) → G3 (Disk) offload policies.
#[serde(default)]
#[validate(nested)]
pub g2_to_g3: TierOffloadConfig,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = OffloadConfig::default();
// Empty policies - engine applies tier-specific defaults
assert!(config.g1_to_g2.policies.is_empty());
assert!(config.g2_to_g3.policies.is_empty());
assert_eq!(config.g2_to_g3.presence_lfu.min_lfu_count, 8);
}
#[test]
fn test_policy_type_serde() {
let json = r#"["pass_all", "presence", "presence_lfu"]"#;
let policies: Vec<PolicyType> = serde_json::from_str(json).unwrap();
assert_eq!(policies.len(), 3);
assert_eq!(policies[0], PolicyType::PassAll);
assert_eq!(policies[1], PolicyType::Presence);
assert_eq!(policies[2], PolicyType::PresenceLfu);
// Roundtrip (serde_json doesn't add spaces after commas)
let serialized = serde_json::to_string(&policies).unwrap();
let roundtrip: Vec<PolicyType> = serde_json::from_str(&serialized).unwrap();
assert_eq!(policies, roundtrip);
}
#[test]
fn test_tier_config_serde() {
let json = r#"{
"policies": ["presence_lfu"],
"presence_lfu": { "min_lfu_count": 16 }
}"#;
let config: TierOffloadConfig = serde_json::from_str(json).unwrap();
assert_eq!(config.policies.len(), 1);
assert_eq!(config.policies[0], PolicyType::PresenceLfu);
assert_eq!(config.presence_lfu.min_lfu_count, 16);
}
#[test]
fn test_offload_config_serde() {
let json = r#"{
"g1_to_g2": {
"policies": ["presence"]
},
"g2_to_g3": {
"policies": ["presence_lfu"],
"presence_lfu": { "min_lfu_count": 4 }
}
}"#;
let config: OffloadConfig = serde_json::from_str(json).unwrap();
assert_eq!(config.g1_to_g2.policies, vec![PolicyType::Presence]);
assert_eq!(config.g2_to_g3.policies, vec![PolicyType::PresenceLfu]);
assert_eq!(config.g2_to_g3.presence_lfu.min_lfu_count, 4);
}
#[test]
fn test_default_lfu_threshold() {
let json = r#"{"policies": ["presence_lfu"]}"#;
let config: TierOffloadConfig = serde_json::from_str(json).unwrap();
// Should use default of 8
assert_eq!(config.presence_lfu.min_lfu_count, 8);
}
#[test]
fn test_validation() {
let config = OffloadConfig::default();
assert!(config.validate().is_ok());
let config_with_lfu = OffloadConfig {
g2_to_g3: TierOffloadConfig {
policies: vec![PolicyType::PresenceLfu],
presence_lfu: PresenceLfuFilterConfig { min_lfu_count: 1 },
..Default::default()
},
..Default::default()
};
assert!(config_with_lfu.validate().is_ok());
}
}
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Onboard configuration for KV cache loading strategies.
//!
//! This module defines the configuration for how external KV cache blocks
//! are loaded (onboarded) from G2 (host memory) to G1 (GPU memory).
use serde::{Deserialize, Serialize};
/// Configuration for KV cache onboarding strategy.
///
/// Onboarding is the process of loading external KV cache blocks from
/// G2 (host memory) into G1 (GPU memory) for use during inference.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct OnboardConfig {
/// The onboarding mode to use.
///
/// - `inter`: Async out-of-band loading via Nova messages (default)
/// - `intra`: Synchronous layer-wise loading during forward pass
#[serde(default)]
pub mode: OnboardMode,
}
/// Onboarding mode for loading external KV cache blocks.
///
/// This determines when and how G2→G1 transfers occur during inference.
#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum OnboardMode {
/// Inter-pass onboarding (default).
///
/// Blocks are loaded asynchronously between scheduler passes via Nova
/// active messages to workers. The `get_num_new_matched_tokens` returns
/// `(Some(n), true)` to indicate async loading is in progress.
///
/// Pros: Overlaps transfer with computation
/// Cons: Adds latency before first token if transfer not complete
#[default]
Inter,
/// Intra-pass onboarding.
///
/// Blocks are loaded synchronously during the forward pass, layer by layer.
/// The `get_num_new_matched_tokens` returns `(Some(n), false)` and the
/// G2/G1 block pairs are passed to workers via `KvConnectorMetadata`.
///
/// Pros: Guaranteed data availability before each layer
/// Cons: Serializes transfer with computation per layer
Intra,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_mode_is_inter() {
let config = OnboardConfig::default();
assert_eq!(config.mode, OnboardMode::Inter);
}
#[test]
fn test_mode_serde_roundtrip() {
// Test inter mode
let json = r#"{"mode": "inter"}"#;
let config: OnboardConfig = serde_json::from_str(json).unwrap();
assert_eq!(config.mode, OnboardMode::Inter);
// Test intra mode
let json = r#"{"mode": "intra"}"#;
let config: OnboardConfig = serde_json::from_str(json).unwrap();
assert_eq!(config.mode, OnboardMode::Intra);
}
#[test]
fn test_empty_json_uses_default() {
let json = r#"{}"#;
let config: OnboardConfig = serde_json::from_str(json).unwrap();
assert_eq!(config.mode, OnboardMode::Inter);
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Rayon thread pool configuration.
use serde::{Deserialize, Serialize};
use validator::Validate;
/// Rayon thread pool configuration.
#[derive(Debug, Clone, Default, Serialize, Deserialize, Validate)]
pub struct RayonConfig {
/// Number of threads in the Rayon thread pool.
/// If None, uses the number of logical CPUs.
#[validate(range(min = 1))]
pub num_threads: Option<usize>,
}
#[cfg(feature = "rayon")]
impl RayonConfig {
/// Build a Rayon thread pool from this configuration.
pub fn build_pool(&self) -> Result<::rayon::ThreadPool, ::rayon::ThreadPoolBuildError> {
let mut builder = ::rayon::ThreadPoolBuilder::new();
if let Some(threads) = self.num_threads {
builder = builder.num_threads(threads);
}
builder.build()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = RayonConfig::default();
assert!(config.num_threads.is_none());
}
#[cfg(feature = "rayon")]
#[test]
fn test_build_pool() {
let config = RayonConfig {
num_threads: Some(2),
};
let pool = config.build_pool().expect("Failed to build pool");
assert_eq!(pool.current_num_threads(), 2);
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Tokio runtime configuration.
use std::sync::atomic::{AtomicUsize, Ordering};
use serde::{Deserialize, Serialize};
use validator::Validate;
/// Atomic counter for assigning unique thread ranks.
static THREAD_RANK: AtomicUsize = AtomicUsize::new(0);
/// Tokio runtime configuration.
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
pub struct TokioConfig {
/// Number of async worker threads.
/// If None, uses the number of logical CPUs.
#[validate(range(min = 1, max = default_max_cpus()))]
pub worker_threads: Option<usize>,
/// Maximum number of blocking threads.
/// If None, uses Tokio's default (512).
#[validate(range(min = 1))]
pub max_blocking_threads: Option<usize>,
}
impl TokioConfig {
/// Build a Tokio runtime from this configuration.
pub fn build_runtime(&self) -> std::io::Result<::tokio::runtime::Runtime> {
let mut builder = ::tokio::runtime::Builder::new_multi_thread();
if let Some(threads) = self.worker_threads {
builder.worker_threads(threads);
}
if let Some(blocking) = self.max_blocking_threads {
builder.max_blocking_threads(blocking);
}
builder
.on_thread_start(|| {
let rank = THREAD_RANK.fetch_add(1, Ordering::Relaxed);
#[cfg(feature = "nvtx")]
nvtx::name_thread!("kvbm-tokio:{}", rank);
#[cfg(not(feature = "nvtx"))]
let _ = rank;
})
.enable_all()
.build()
}
}
impl Default for TokioConfig {
fn default() -> Self {
Self {
worker_threads: Some(1),
max_blocking_threads: None,
}
}
}
fn default_max_cpus() -> usize {
std::thread::available_parallelism()
.unwrap_or_else(|_| std::num::NonZeroUsize::new(4).expect("4 is non-zero"))
.get()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = TokioConfig::default();
// Default uses 1 worker thread to minimize resource usage
assert_eq!(config.worker_threads, Some(1));
assert!(config.max_blocking_threads.is_none());
}
#[test]
fn test_build_runtime_with_defaults() {
let config = TokioConfig::default();
let runtime = config.build_runtime().expect("Failed to build runtime");
drop(runtime);
}
#[test]
fn test_build_runtime_with_custom_threads() {
let config = TokioConfig {
worker_threads: Some(2),
max_blocking_threads: Some(4),
};
let runtime = config.build_runtime().expect("Failed to build runtime");
drop(runtime);
}
}
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Build & Test
This is a Rust crate (`kvbm-engine`) in the dynamo workspace. Rust edition 2024, requires rustc 1.93.1+.
```bash
# Build
cargo build -p kvbm-engine
cargo build -p kvbm-engine --features s3,testing,nats
# Test (most tests require the `testing` feature)
cargo test -p kvbm-engine --features testing
cargo test -p kvbm-engine --features testing -- test_name # single test
# Lint
cargo clippy -p kvbm-engine --all-features
cargo fmt
cargo machete
```
## Feature Flags
| Flag | Purpose |
|------|---------|
| `s3` (default) | S3/MinIO object storage (G4 tier) |
| `testing` | Test utilities, mock infrastructure, fixtures |
| `nats` | NATS-based pub/sub transport |
| `collectives` | NIXL + NCCL multi-GPU collectives |
| `nccl` | NCCL via cudarc |
| `nvtx` | NVIDIA Tools Extension profiling markers |
## Architecture
kvbm-engine implements distributed coordination for KV cache block management across a tiered storage hierarchy:
- **G1** (GPU HBM) → **G2** (Pinned DRAM) → **G3** (NVMe/SSD) → **G4** (S3/MinIO)
Leaders own block metadata and make placement decisions. Workers execute data transfers (RDMA, NVMe, object storage). Sessions coordinate multi-instance block transfers between leaders and workers.
### Key Modules
- **`leader/`**`InstanceLeader` coordinates block lookups (`find_matches`), holds blocks via RAII `BlockHolder`, and manages distributed sessions. The `Leader` trait is the core coordination interface.
- **`leader/session/`** — Distributed session protocol: `InitiatorSession` (requester), `ResponderSession` (provider), `ServerSession` (server-side block exposure with optional G3→G2 staging). Sessions track onboarding state: Searching → Holding → Staging → Ready → Complete.
- **`worker/`**`PhysicalWorker` owns a `TransferManager` and layout handles for actual transfers. `CoordinatedWorker` wraps any `Worker` with the leader's coordination state. The `Worker` and `WorkerTransfers` traits define the execution contract.
- **`worker/group/`**`SpmdParallelWorkers` broadcasts operations to all workers in parallel (SPMD model) with event aggregation.
- **`worker/velo/`** — RPC layer (`VeloWorkerService`/`VeloWorkerClient`) for remote worker execution via Velo.
- **`offload/`** — Multi-stage async pipeline for tier demotion: PolicyEvaluator → PreconditionAwaiter → Batcher → TransferExecutor. Supports per-container cancellation tokens. **See `src/offload/AGENTS.md` for governance rules before modifying this module.**
- **`object/`**`ObjectBlockOps` trait for G4 storage. S3 implementation with concurrent uploads/downloads. `ObjectLockManager` for distributed locking via conditional S3 PUTs.
- **`runtime/`**`KvbmRuntime` bundles tokio, Velo messenger, NixlAgent (RDMA), and EventManager. Built via `KvbmRuntimeBuilder` or quick constructors (`from_env_leader`, `from_env_worker`).
- **`pubsub/`** — Publisher/Subscriber traits with NATS and in-memory stub implementations.
- **`collectives/`**`CollectiveOps` trait for multi-GPU sync. NCCL implementation and stub for testing. MLA pattern: only rank 0 needs G2/G3; others receive via broadcast.
- **`testing/`** — Feature-gated test utilities: `TestManagerBuilder`, `MessengerPair`, `TestSession`, `EventsPipelineFixture`, `MultiInstancePopulator`, `TestAgent`.
### Documentation
Module docs live in `docs/` and are included via `#[doc = include_str!("../docs/...")]`. When modifying a module, update the corresponding doc file.
### Key Patterns
- **Trait-based abstraction**: `Leader`, `Worker`, `WorkerTransfers`, `ObjectBlockOps`, `CollectiveOps`, `KeyFormatter` — implementations are swappable (real vs. test stubs).
- **RAII resource management**: `BlockHolder` holds blocks during sessions with automatic release on drop. `TransferHandle` tracks offload operations.
- **Builder pattern**: `InstanceLeaderBuilder`, `PhysicalWorkerBuilder`, `KvbmRuntimeBuilder`, `OffloadEngineBuilder`.
- **Execution vs. coordination state**: `PhysicalWorker` owns execution state; `CoordinatedWorker` adds the leader's coordination view. Same API regardless of worker locality.
### Workspace Dependencies
Internal crates: `kvbm-common`, `kvbm-config`, `kvbm-kernels`, `kvbm-logical`, `kvbm-physical`, `velo`, `dynamo-tokens`, `dynamo-memory`.
## Offload Module Governance
The offload module (`src/offload/`) has explicit policies (P1–P6) documented in its README. Before modifying offload code, read `src/offload/AGENTS.md` and the offload docs (`docs/offload.md`, `docs/offload-developer.md`). Off-policy changes require user approval before implementation.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
[package]
name = "kvbm-engine"
version = "1.0.0"
edition.workspace = true
authors.workspace = true
license.workspace = true
repository.workspace = true
description = "Distributed coordination primitives for KVBM"
[dependencies]
kvbm-common = { workspace = true }
kvbm-config = { workspace = true }
kvbm-logical = { workspace = true }
kvbm-physical = { workspace = true }
velo = { workspace = true }
# Workspace deps
dynamo-memory = { workspace = true }
anyhow = { workspace = true }
dashmap = { workspace = true }
derive_builder = { workspace = true }
futures = { workspace = true }
parking_lot = { workspace = true }
rmp-serde = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true }
tokio-stream = { workspace = true }
tracing = { workspace = true }
uuid = { workspace = true }
# Non-workspace
bytes = "1.10"
crossbeam-queue = "0.3"
oneshot = "0.1.11"
# Optional
cudarc = { workspace = true, optional = true }
aws-sdk-s3 = { version = "1.120.0", optional = true }
aws-config = { version = "1.8.11", optional = true }
rayon = { version = "1", optional = true }
tokio-rayon = { version = "2", optional = true }
chrono = { version = "0.4", optional = true }
async-nats = { workspace = true, optional = true }
flume = { version = "0.11", optional = true }
clap = { version = "4", features = ["derive"], optional = true }
figment = { version = "0.10", features = ["env", "toml"], optional = true }
libc = { version = "0.2", optional = true }
tracing-subscriber = { workspace = true, optional = true }
nvtx = { version = "1.3", optional = true }
[features]
default = ["s3"]
s3 = ["dep:aws-sdk-s3", "dep:aws-config", "dep:rayon", "dep:tokio-rayon", "dep:chrono"]
collectives = ["nccl"]
nccl = ["dep:cudarc"]
testing-nccl = ["collectives"]
nats = ["dep:async-nats", "dep:flume"]
testing = ["kvbm-logical/testing", "kvbm-physical/testing"]
testing-s3 = ["s3", "testing"]
nvtx = ["kvbm-config/nvtx", "dep:nvtx"]
bench = ["dep:clap", "dep:figment", "dep:libc", "dep:tracing-subscriber", "dep:chrono", "testing"]
[[bin]]
name = "bench_engine"
path = "bin/bench_engine.rs"
required-features = ["bench"]
[package.metadata.cargo-machete]
ignored = ["rayon"]
# kvbm-engine
Distributed coordination primitives for KV cache block management (KVBM).
This crate implements the leader/worker architecture for managing KV cache blocks across a tiered storage hierarchy:
**G1** (GPU HBM) → **G2** (Pinned DRAM) → **G3** (NVMe/SSD) → **G4** (S3/MinIO)
Leaders own block metadata and make placement decisions. Workers execute data transfers (RDMA, NVMe, object storage). Sessions coordinate multi-instance block transfers.
## Feature Flags
| Flag | Purpose |
| -------------- | ---------------------------------------- |
| `s3` (default) | S3/MinIO object storage (G4 tier) |
| `testing` | Test utilities and mock infrastructure |
| `nats` | NATS-based pub/sub transport |
| `collectives` | NIXL + NCCL multi-GPU collectives |
| `nccl` | NCCL via cudarc |
| `nvtx` | NVIDIA Tools Extension profiling markers |
## Documentation
Detailed module documentation lives in `[docs/](docs/)`:
- [Architecture](docs/architecture.md) — Overall system design
- [Leader](docs/leader.md) — Block coordination and metadata management
- [Session](docs/session.md) — Distributed onboarding protocol
- [Worker](docs/worker.md) — Transfer execution
- [Worker Group](docs/worker-group.md) — SPMD parallel workers
- [Offload](docs/offload.md) — Async tier-demotion pipeline
- [Offload Developer Guide](docs/offload-developer.md) — Contributing to the offload module
- [Object Storage](docs/object.md) — S3/MinIO integration
- [Runtime](docs/runtime.md) — Runtime bundle (tokio, Velo, NIXL)
- [Testing](docs/testing.md) — Test utilities and fixtures
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment