"lib/vscode:/vscode.git/clone" did not exist on "de27efe6e6972f6c21539e257f9109c8fa0dc065"
Unverified Commit f849e1a6 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

fix(kv-router): migrate raw zmq paths to libzmq and refresh lockfiles (#7871)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent bfc59cd2
......@@ -14,7 +14,7 @@ version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"getrandom 0.3.4",
"once_cell",
"serde",
......@@ -306,33 +306,6 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "async_zmq"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "499c7104563d51146553fb0963f00210d8825833789e0ed270dd96aeeff6ac93"
dependencies = [
"futures",
"mio 0.6.23",
"once_cell",
"slab",
"thiserror 1.0.69",
"zmq",
]
[[package]]
name = "asynchronous-codec"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a860072022177f903e59730004fb5dc13db9275b79bb2aef7ba8ce831956c233"
dependencies = [
"bytes",
"futures-sink",
"futures-util",
"memchr",
"pin-project-lite",
]
[[package]]
name = "atomic"
version = "0.6.1"
......@@ -356,7 +329,7 @@ checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi 0.1.19",
"libc",
"winapi 0.3.9",
"winapi",
]
[[package]]
......@@ -749,7 +722,7 @@ dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if 1.0.4",
"cfg-if",
"constant_time_eq",
"cpufeatures",
"memmap2",
......@@ -915,12 +888,6 @@ dependencies = [
"target-lexicon",
]
[[package]]
name = "cfg-if"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
[[package]]
name = "cfg-if"
version = "1.0.4"
......@@ -1063,7 +1030,7 @@ version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -1073,7 +1040,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a"
dependencies = [
"castaway",
"cfg-if 1.0.4",
"cfg-if",
"itoa",
"rustversion",
"ryu",
......@@ -1265,7 +1232,7 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
]
[[package]]
......@@ -1461,7 +1428,7 @@ version = "4.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"cpufeatures",
"curve25519-dalek-derive",
"digest",
......@@ -1606,7 +1573,7 @@ version = "5.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"hashbrown 0.14.5",
"lock_api",
"once_cell",
......@@ -1619,7 +1586,7 @@ version = "6.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"crossbeam-utils",
"hashbrown 0.14.5",
"lock_api",
......@@ -1897,7 +1864,6 @@ dependencies = [
"anyhow",
"async-trait",
"axum 0.8.4",
"bytes",
"dashmap 6.1.0",
"derive-getters",
"derive_builder",
......@@ -1922,7 +1888,7 @@ dependencies = [
"uuid",
"validator",
"xxhash-rust",
"zeromq",
"zmq",
]
[[package]]
......@@ -2023,7 +1989,6 @@ dependencies = [
"validator",
"video-rs",
"xxhash-rust",
"zeromq",
]
[[package]]
......@@ -2129,7 +2094,6 @@ dependencies = [
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"axum 0.8.4",
"bincode 1.3.3",
"blake3",
......@@ -2179,6 +2143,7 @@ dependencies = [
"temp-env",
"tempfile",
"thiserror 2.0.18",
"tmq",
"tokio",
"tokio-rayon",
"tokio-stream",
......@@ -2191,7 +2156,6 @@ dependencies = [
"uuid",
"validator",
"xxhash-rust",
"zmq",
]
[[package]]
......@@ -2263,7 +2227,7 @@ version = "0.8.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
]
[[package]]
......@@ -2550,7 +2514,7 @@ version = "0.2.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"libc",
"libredox",
]
......@@ -2652,22 +2616,6 @@ dependencies = [
"libc",
]
[[package]]
name = "fuchsia-zircon"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
dependencies = [
"bitflags 1.3.2",
"fuchsia-zircon-sys",
]
[[package]]
name = "fuchsia-zircon-sys"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
[[package]]
name = "futures"
version = "0.3.32"
......@@ -2797,7 +2745,7 @@ version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"js-sys",
"libc",
"wasi",
......@@ -2810,7 +2758,7 @@ version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"js-sys",
"libc",
"r-efi 5.3.0",
......@@ -2824,7 +2772,7 @@ version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"libc",
"r-efi 6.0.0",
"wasip2",
......@@ -2934,7 +2882,7 @@ version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"crunchy",
"num-traits",
"rand 0.9.2",
......@@ -3072,7 +3020,7 @@ version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "617aaa3557aef3810a6369d0a99fac8a080891b68bd9f9812a1eeda0c0730cbd"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"libc",
"windows-link",
]
......@@ -3535,7 +3483,7 @@ version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
]
[[package]]
......@@ -3549,15 +3497,6 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "iovec"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e"
dependencies = [
"libc",
]
[[package]]
name = "ipnet"
version = "2.12.0"
......@@ -3823,16 +3762,6 @@ dependencies = [
"serde_json",
]
[[package]]
name = "kernel32-sys"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
dependencies = [
"winapi 0.2.8",
"winapi-build",
]
[[package]]
name = "kqueue"
version = "1.1.1"
......@@ -4102,7 +4031,7 @@ version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"windows-link",
]
......@@ -4112,7 +4041,7 @@ version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"windows-link",
]
......@@ -4322,7 +4251,7 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"rayon",
]
......@@ -4332,7 +4261,7 @@ version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"digest",
]
......@@ -4438,25 +4367,6 @@ dependencies = [
"web-time",
]
[[package]]
name = "mio"
version = "0.6.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4afd66f5b91bf2a3bc13fad0e21caedac168ca4c707504e75585648ae80e4cc4"
dependencies = [
"cfg-if 0.1.10",
"fuchsia-zircon",
"fuchsia-zircon-sys",
"iovec",
"kernel32-sys",
"libc",
"log",
"miow",
"net2",
"slab",
"winapi 0.2.8",
]
[[package]]
name = "mio"
version = "0.8.11"
......@@ -4480,18 +4390,6 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "miow"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebd808424166322d4a38da87083bfddd3ac4c131334ed55856112eb06d46944d"
dependencies = [
"kernel32-sys",
"net2",
"winapi 0.2.8",
"ws2_32-sys",
]
[[package]]
name = "mockito"
version = "1.7.2"
......@@ -4685,17 +4583,6 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "net2"
version = "0.2.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b13b648036a2339d06de780866fbdfda0dde886de7b3af2ddeba8b14f4ee34ac"
dependencies = [
"cfg-if 0.1.10",
"libc",
"winapi 0.3.9",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.6"
......@@ -4709,7 +4596,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
dependencies = [
"bitflags 1.3.2",
"cfg-if 1.0.4",
"cfg-if",
"libc",
"memoffset",
"pin-utils",
......@@ -4722,7 +4609,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
dependencies = [
"bitflags 2.11.0",
"cfg-if 1.0.4",
"cfg-if",
"cfg_aliases",
"libc",
]
......@@ -5398,7 +5285,7 @@ version = "0.9.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"libc",
"redox_syscall 0.5.18",
"smallvec",
......@@ -5817,7 +5704,7 @@ version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"fnv",
"lazy_static",
"memchr",
......@@ -6208,7 +6095,7 @@ dependencies = [
"av1-grain",
"bitstream-io",
"built",
"cfg-if 1.0.4",
"cfg-if",
"interpolate_name",
"itertools 0.14.0",
"libc",
......@@ -6477,7 +6364,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
dependencies = [
"cc",
"cfg-if 1.0.4",
"cfg-if",
"getrandom 0.2.17",
"libc",
"untrusted",
......@@ -6570,7 +6457,7 @@ version = "0.18.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d428f8247852f894ee1be110b375111b586d4fa431f6c46e64ba5a0dcccbe605"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"glob",
"proc-macro2",
"quote",
......@@ -6587,7 +6474,7 @@ version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "825ea780781b15345a146be27eaefb05085e337e869bff01b4306a4fd4a9ad5a"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"glob",
"proc-macro-crate",
"proc-macro2",
......@@ -6605,7 +6492,7 @@ version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f168d99749d307be9de54d23fd226628d99768225ef08f6ffb52e0182a27746"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"glob",
"proc-macro-crate",
"proc-macro2",
......@@ -6623,7 +6510,7 @@ version = "0.26.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"glob",
"proc-macro-crate",
"proc-macro2",
......@@ -6686,7 +6573,7 @@ version = "0.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "796e8d2b6696392a43bea58116b667fb4c29727dc5abd27d6acf338bb4f688c7"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"ordered-multimap",
]
......@@ -7268,7 +7155,7 @@ version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"cpufeatures",
"digest",
]
......@@ -7279,7 +7166,7 @@ version = "0.10.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"cpufeatures",
"digest",
]
......@@ -7413,7 +7300,7 @@ checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b"
dependencies = [
"byteorder",
"libc",
"winapi 0.3.9",
"winapi",
]
[[package]]
......@@ -7466,7 +7353,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cffa8a2e517b4e9f270c47e1c4120df90506d9451c1efa67e3698d66446d30ce"
dependencies = [
"libc",
"winapi 0.3.9",
"winapi",
]
[[package]]
......@@ -7689,7 +7576,7 @@ version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
]
[[package]]
......@@ -7959,7 +7846,6 @@ checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098"
dependencies = [
"bytes",
"futures-core",
"futures-io",
"futures-sink",
"futures-util",
"pin-project-lite",
......@@ -8919,7 +8805,7 @@ version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
......@@ -8932,7 +8818,7 @@ version = "0.4.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"futures-util",
"js-sys",
"once_cell",
......@@ -9063,12 +8949,6 @@ version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88"
[[package]]
name = "winapi"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
[[package]]
name = "winapi"
version = "0.3.9"
......@@ -9079,12 +8959,6 @@ dependencies = [
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-build"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
......@@ -9097,7 +8971,7 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -9422,7 +9296,7 @@ version = "0.50.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
dependencies = [
"cfg-if 1.0.4",
"cfg-if",
"windows-sys 0.48.0",
]
......@@ -9526,16 +9400,6 @@ version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
[[package]]
name = "ws2_32-sys"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d59cefebd0c892fa2dd6de581e937301d8552cb44489cdff035c6187cb63fa5e"
dependencies = [
"winapi 0.2.8",
"winapi-build",
]
[[package]]
name = "xxhash-rust"
version = "0.8.15"
......@@ -9635,33 +9499,6 @@ version = "1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
[[package]]
name = "zeromq"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a4528179201f6eecf211961a7d3276faa61554c82651ecc66387f68fc3004bd"
dependencies = [
"async-trait",
"asynchronous-codec",
"bytes",
"crossbeam-queue",
"dashmap 5.5.3",
"futures-channel",
"futures-io",
"futures-task",
"futures-util",
"log",
"num-traits",
"once_cell",
"parking_lot",
"rand 0.8.5",
"regex",
"thiserror 1.0.69",
"tokio",
"tokio-util",
"uuid",
]
[[package]]
name = "zeromq-src"
version = "0.2.6+4.3.4"
......
......@@ -64,7 +64,6 @@ anyhow = { version = "1" }
async-nats = { version = "0.45.0", features = ["service"] }
async-stream = { version = "0.3" }
async-trait = { version = "0.1" }
async_zmq = { version = "0.4.0" }
blake3 = { version = "1" }
bytes = { version = "1" }
chrono = { version = "0.4", default-features = false, features = [
......@@ -74,7 +73,7 @@ chrono = { version = "0.4", default-features = false, features = [
"now",
"serde",
] }
cudarc = { version = "0.19.2", features = ["cuda-version-from-build-system", "fallback-latest"] }
cudarc = { version = "=0.19.3", features = ["cuda-version-from-build-system", "fallback-latest"] }
dashmap = { version = "6.1" }
derive_builder = { version = "0.20" }
derive-getters = { version = "0.5" }
......@@ -115,7 +114,6 @@ strum = { version = "0.27", features = ["derive"] }
tempfile = "3"
thiserror = { version = "2.0.17" }
tmq = { version = "0.5.0" }
zmq = { version = "0.10" }
tokio = { version = "=1.48.0", features = ["full"] }
tokio-stream = { version = "0.1" }
tokio-util = { version = "0.7.17", features = ["codec", "net", "rt", "io-util"] }
......
This diff is collapsed.
......@@ -56,6 +56,6 @@ pyo3-async-runtimes = { version = "0.23.0", default-features = false, features =
] }
dlpark = { version = "0.5", features = ["pyo3", "half"], optional = true }
cudarc = { version = "0.19.2", features = ["cuda-version-from-build-system", "fallback-latest"], optional = true }
cudarc = { version = "=0.19.3", features = ["cuda-version-from-build-system", "fallback-latest"], optional = true }
[dev-dependencies]
This diff is collapsed.
......@@ -17,7 +17,7 @@ default = []
metrics = ["dep:prometheus"]
runtime-protocols = ["dep:dynamo-runtime"]
bench = []
standalone-indexer = ["dep:axum", "dep:bytes", "dep:zeromq", "dep:serde_json", "dep:reqwest"]
standalone-indexer = ["dep:axum", "dep:serde_json", "dep:reqwest", "dep:zmq"]
indexer-runtime = ["metrics", "runtime-protocols", "standalone-indexer"]
[dependencies]
......@@ -40,6 +40,7 @@ thiserror = { workspace = true }
tokio = { workspace = true }
tokio-util = { workspace = true }
tracing = { workspace = true }
zmq = { version = "0.10", optional = true }
uuid = { workspace = true }
validator = { workspace = true }
xxhash-rust = { workspace = true }
......@@ -53,9 +54,7 @@ rustc-hash = "2.1.1"
# standalone-indexer (optional)
axum = { workspace = true, optional = true }
bytes = { workspace = true, optional = true }
reqwest = { workspace = true, optional = true }
zeromq = { version = "0.4.1", optional = true }
[dev-dependencies]
rstest = "0.18.2"
......
......@@ -3,13 +3,10 @@
use std::sync::Arc;
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
use std::time::Duration;
use bytes::Bytes;
use rmp_serde as rmps;
use tokio::sync::watch;
use tokio_util::sync::CancellationToken;
use zeromq::{DealerSocket, Socket, SocketRecv, SocketSend, SubSocket};
use crate::protocols::{WorkerId, WorkerWithDpRank};
use crate::recovery::{CursorObservation, CursorState};
......@@ -17,18 +14,10 @@ use crate::zmq_wire::{KvEventBatch, convert_event};
use super::indexer::Indexer;
use super::registry::ListenerRecord;
const INITIAL_BACKOFF_MS: u64 = 10;
const MAX_BACKOFF_MS: u64 = 5000;
const MAX_CONSECUTIVE_ERRORS: u32 = 10;
const MAX_BACKOFF_EXPONENT: u32 = 8;
fn calculate_backoff_ms(consecutive_errors: u32) -> u64 {
std::cmp::min(
INITIAL_BACKOFF_MS * 2_u64.pow(consecutive_errors.min(MAX_BACKOFF_EXPONENT)),
MAX_BACKOFF_MS,
)
}
use super::zmq::{
MultipartMessage, SharedSocket, connect_dealer_socket, connect_sub_socket, recv_multipart,
send_multipart,
};
const WATERMARK_UNSET: u64 = u64::MAX;
......@@ -46,11 +35,10 @@ struct ListenerLoop {
block_size: u32,
indexer: Indexer,
cancel: CancellationToken,
socket: SubSocket,
replay_socket: Option<DealerSocket>,
live_socket: SharedSocket,
replay_socket: Option<SharedSocket>,
watermark: Arc<AtomicU64>,
warning_count: Arc<AtomicU32>,
consecutive_errors: u32,
messages_processed: u64,
}
......@@ -62,8 +50,8 @@ impl ListenerLoop {
block_size: u32,
indexer: Indexer,
cancel: CancellationToken,
socket: SubSocket,
replay_socket: Option<DealerSocket>,
live_socket: SharedSocket,
replay_socket: Option<SharedSocket>,
watermark: Arc<AtomicU64>,
) -> Self {
Self {
......@@ -72,11 +60,10 @@ impl ListenerLoop {
block_size,
indexer,
cancel,
socket,
live_socket,
replay_socket,
watermark,
warning_count: Arc::new(AtomicU32::new(0)),
consecutive_errors: 0,
messages_processed: 0,
}
}
......@@ -94,7 +81,7 @@ impl ListenerLoop {
"Requesting replay from engine"
);
let Some(replay_socket) = self.replay_socket.as_mut() else {
let Some(replay_socket) = self.replay_socket.as_ref() else {
tracing::warn!(
self.worker_id,
self.dp_rank,
......@@ -111,21 +98,25 @@ impl ListenerLoop {
let warning_count = &self.warning_count;
let watermark = &self.watermark;
let req_frames = vec![Bytes::new(), Bytes::from(start_seq.to_be_bytes().to_vec())];
let Ok(req_msg) = zeromq::ZmqMessage::try_from(req_frames) else {
tracing::error!(worker_id, dp_rank, "Failed to build replay request");
return 0;
};
if let Err(error) = replay_socket.send(req_msg).await {
let req_frames = vec![Vec::new(), start_seq.to_be_bytes().to_vec()];
if let Err(error) = send_multipart(replay_socket, req_frames).await {
tracing::error!(worker_id, dp_rank, error = %error, "Failed to send replay request");
return 0;
}
let mut replayed = 0u64;
loop {
let Ok(msg) = replay_socket.recv().await else {
tracing::error!(worker_id, dp_rank, "Replay recv error");
break;
let msg = tokio::select! {
_ = self.cancel.cancelled() => break,
result = recv_multipart(replay_socket) => {
match result {
Ok(msg) => msg,
Err(error) => {
tracing::error!(worker_id, dp_rank, error = %error, "Replay recv error");
break;
}
}
}
};
if msg.len() < 3 {
tracing::warn!(
......@@ -251,7 +242,7 @@ impl ListenerLoop {
self.watermark.store(seq, Ordering::Release);
}
async fn handle_message(&mut self, msg: zeromq::ZmqMessage) {
async fn handle_message(&mut self, msg: MultipartMessage) {
if msg.len() != 3 {
tracing::warn!(
self.worker_id,
......@@ -299,34 +290,15 @@ impl ListenerLoop {
return Ok(());
}
msg_result = self.socket.recv() => {
match msg_result {
Ok(msg) => {
self.consecutive_errors = 0;
msg
}
result = recv_multipart(&self.live_socket) => {
match result {
Ok(msg) => msg,
Err(error) => {
self.consecutive_errors += 1;
if self.consecutive_errors >= MAX_CONSECUTIVE_ERRORS {
return Err(format!(
"too many consecutive ZMQ recv errors for worker {} dp_rank {}: {error}",
self.worker_id,
self.dp_rank,
));
}
let backoff_ms = calculate_backoff_ms(self.consecutive_errors);
tracing::warn!(
error = %error,
consecutive_errors = self.consecutive_errors,
backoff_ms,
worker_id = self.worker_id,
dp_rank = self.dp_rank,
"ZMQ recv error, backing off"
);
tokio::time::sleep(Duration::from_millis(backoff_ms)).await;
continue;
return Err(format!(
"ZMQ recv failed for worker {} dp_rank {}: {error}",
self.worker_id,
self.dp_rank,
));
}
}
}
......@@ -382,18 +354,8 @@ async fn run_listener(
return Ok(());
}
let mut socket = SubSocket::new();
socket
.subscribe("")
.await
.map_err(|e| format!("failed to subscribe on ZMQ socket: {e}"))?;
tokio::select! {
_ = cancel.cancelled() => return Ok(()),
result = socket.connect(&endpoint) => {
result.map_err(|e| format!("failed to connect ZMQ SUB socket to {endpoint}: {e}"))?;
}
}
let socket = connect_sub_socket(&endpoint)
.map_err(|e| format!("failed to connect ZMQ SUB socket to {endpoint}: {e}"))?;
tokio::select! {
_ = cancel.cancelled() => return Ok(()),
......@@ -438,33 +400,31 @@ async fn connect_replay_socket(
dp_rank: u32,
replay_endpoint: Option<&str>,
cancel: &CancellationToken,
) -> Option<DealerSocket> {
) -> Option<SharedSocket> {
let endpoint = replay_endpoint?;
let mut socket = DealerSocket::new();
tokio::select! {
_ = cancel.cancelled() => None,
result = socket.connect(endpoint) => {
match result {
Ok(()) => {
tracing::info!(
worker_id,
dp_rank,
replay_endpoint = endpoint,
"Replay socket connected"
);
Some(socket)
}
Err(e) => {
tracing::error!(
worker_id,
dp_rank,
error = %e,
"Failed to connect replay socket to {endpoint}"
);
None
}
}
if cancel.is_cancelled() {
return None;
}
match connect_dealer_socket(endpoint) {
Ok(socket) => {
tracing::info!(
worker_id,
dp_rank,
replay_endpoint = endpoint,
"Replay socket connected"
);
Some(socket)
}
Err(error) => {
tracing::error!(
worker_id,
dp_rank,
error = %error,
"Failed to connect replay socket to {endpoint}"
);
None
}
}
}
......@@ -473,7 +433,9 @@ async fn connect_replay_socket(
mod tests {
use super::{WATERMARK_UNSET, cursor_from_watermark};
use crate::recovery::CursorObservation;
use zeromq::{PubSocket, Socket, SocketRecv, SocketSend, SubSocket};
use crate::standalone_indexer::zmq::{
SharedSocket, bind_pub_socket, connect_sub_socket, recv_multipart, send_multipart,
};
#[test]
fn initial_gap_replays_from_zero_and_replayed_seq_becomes_stale() {
......@@ -494,23 +456,27 @@ mod tests {
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn zmq_buffers_messages_during_brief_delay() {
let mut pub_socket = PubSocket::new();
let bound_endpoint = pub_socket.bind("tcp://127.0.0.1:0").await.unwrap();
let mut sub_socket = SubSocket::new();
sub_socket.subscribe("").await.unwrap();
sub_socket
.connect(&bound_endpoint.to_string())
.await
.unwrap();
let reserved_listener = reserve_open_port();
let endpoint = format!(
"tcp://127.0.0.1:{}",
reserved_listener
.local_addr()
.expect("failed to read reserved listener address")
.port()
);
drop(reserved_listener);
let pub_socket = bind_pub_socket(&endpoint).unwrap();
let mut sub_socket = connect_sub_socket(&endpoint).unwrap();
let (tx, mut rx) = tokio::sync::mpsc::channel::<SubSocket>(1);
let (tx, mut rx) = tokio::sync::mpsc::channel::<SharedSocket>(1);
tokio::spawn(async move {
let _ = sub_socket.recv().await.unwrap();
let _ = recv_multipart(&sub_socket).await.unwrap();
let _ = tx.send(sub_socket).await;
});
loop {
pub_socket.send("probe".into()).await.unwrap();
send_multipart(&pub_socket, vec![b"probe".to_vec()])
.await
.unwrap();
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
if let Ok(sub) = rx.try_recv() {
sub_socket = sub;
......@@ -521,8 +487,7 @@ mod tests {
let num_messages = 10u64;
for i in 0..num_messages {
pub_socket
.send(i.to_le_bytes().to_vec().into())
send_multipart(&pub_socket, vec![i.to_le_bytes().to_vec()])
.await
.unwrap();
}
......@@ -530,14 +495,55 @@ mod tests {
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
for i in 0u64..num_messages {
let msg = tokio::time::timeout(std::time::Duration::from_secs(5), sub_socket.recv())
.await
.expect("timed out waiting for ZMQ message")
.expect("ZMQ recv error");
let msg = tokio::time::timeout(
std::time::Duration::from_secs(5),
recv_multipart(&sub_socket),
)
.await
.expect("timed out waiting for ZMQ message")
.unwrap();
let payload = msg.get(0).unwrap();
let payload = msg.first().unwrap();
let received = u64::from_le_bytes(payload[..8].try_into().unwrap());
assert_eq!(received, i, "message {i} arrived out of order");
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn zmq_subscriber_connects_before_publisher_bind() {
let reserved_listener = reserve_open_port();
let endpoint = format!(
"tcp://127.0.0.1:{}",
reserved_listener
.local_addr()
.expect("failed to read reserved listener address")
.port()
);
drop(reserved_listener);
let sub_socket = connect_sub_socket(&endpoint).unwrap();
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
let pub_socket = bind_pub_socket(&endpoint).unwrap();
for _ in 0..5 {
send_multipart(&pub_socket, vec![b"probe".to_vec()])
.await
.unwrap();
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
}
let msg = tokio::time::timeout(
std::time::Duration::from_secs(5),
recv_multipart(&sub_socket),
)
.await
.expect("timed out waiting for ZMQ message")
.unwrap();
assert_eq!(msg, vec![b"probe".to_vec()]);
}
fn reserve_open_port() -> std::net::TcpListener {
std::net::TcpListener::bind("127.0.0.1:0").expect("failed to bind probe listener")
}
}
......@@ -9,6 +9,7 @@ pub mod registry;
#[cfg(feature = "indexer-runtime")]
pub mod runtime;
pub mod server;
mod zmq;
use std::sync::Arc;
use std::time::Duration;
......@@ -38,10 +39,39 @@ pub struct RuntimeConfig {
}
pub(super) fn validate_zmq_endpoint(endpoint: &str) -> anyhow::Result<()> {
endpoint
.parse::<zeromq::Endpoint>()
.map(|_| ())
.map_err(|error| anyhow::anyhow!("invalid ZMQ endpoint `{endpoint}`: {error}"))
let (scheme, address) = endpoint
.split_once("://")
.ok_or_else(|| anyhow::anyhow!("invalid ZMQ endpoint `{endpoint}`: missing scheme"))?;
if address.is_empty() {
anyhow::bail!("invalid ZMQ endpoint `{endpoint}`: missing address");
}
match scheme {
"tcp" => {
let (host, port) = address.rsplit_once(':').ok_or_else(|| {
anyhow::anyhow!("invalid ZMQ endpoint `{endpoint}`: missing TCP port")
})?;
if host.is_empty() {
anyhow::bail!("invalid ZMQ endpoint `{endpoint}`: missing TCP host");
}
if host.starts_with('[') {
if !host.ends_with(']') {
anyhow::bail!("invalid ZMQ endpoint `{endpoint}`: missing closing `]`");
}
} else if host.contains(':') {
anyhow::bail!("invalid ZMQ endpoint `{endpoint}`: missing TCP port");
}
port.parse::<u16>().map_err(|error| {
anyhow::anyhow!("invalid ZMQ endpoint `{endpoint}`: invalid TCP port: {error}")
})?;
Ok(())
}
"ipc" | "inproc" => Ok(()),
other => Err(anyhow::anyhow!(
"invalid ZMQ endpoint `{endpoint}`: unsupported scheme `{other}`"
)),
}
}
pub(super) fn validate_listener_endpoints(
......@@ -326,4 +356,20 @@ mod tests {
let error = parse_workers("1").unwrap_err().to_string();
assert!(error.contains("invalid worker entry"));
}
#[test]
fn test_validate_zmq_endpoint_allows_wildcard_tcp_bind() {
validate_zmq_endpoint("tcp://*:5558").unwrap();
validate_zmq_endpoint("tcp://127.0.0.1:0").unwrap();
validate_zmq_endpoint("inproc://listener").unwrap();
validate_zmq_endpoint("ipc:///tmp/dynamo.sock").unwrap();
}
#[test]
fn test_validate_zmq_endpoint_rejects_invalid_values() {
assert!(validate_zmq_endpoint("tcp://host").is_err());
assert!(validate_zmq_endpoint("tcp://:5558").is_err());
assert!(validate_zmq_endpoint("udp://host:5558").is_err());
assert!(validate_zmq_endpoint("not-an-endpoint").is_err());
}
}
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use std::collections::VecDeque;
use std::future::poll_fn;
use std::os::unix::io::{AsRawFd, RawFd};
use std::sync::Arc;
use std::task::{Context, Poll, ready};
use anyhow::{Result, anyhow};
use tokio::{io::unix::AsyncFd, sync::Mutex};
pub(super) type MultipartMessage = Vec<Vec<u8>>;
pub(super) type SharedSocket = Arc<Mutex<ZmqSocket>>;
const ZMQ_RCVTIMEOUT_MS: i32 = 100;
const ZMQ_SNDTIMEOUT_MS: i32 = 0;
const ZMQ_RECONNECT_IVL_MS: i32 = 100;
const ZMQ_RECONNECT_IVL_MAX_MS: i32 = 5000;
const ZMQ_TCP_KEEPALIVE: i32 = 1;
const ZMQ_HEARTBEAT_IVL_MS: i32 = 5000;
const ZMQ_HEARTBEAT_TIMEOUT_MS: i32 = 15000;
const ZMQ_HEARTBEAT_TTL_MS: i32 = 15000;
const ZMQ_LINGER_MS: i32 = 0;
struct SocketWrapper {
socket: zmq::Socket,
fd: RawFd,
}
impl SocketWrapper {
fn new(socket: zmq::Socket) -> Result<Self> {
Ok(Self {
fd: socket.get_fd()?,
socket,
})
}
}
impl AsRawFd for SocketWrapper {
fn as_raw_fd(&self) -> RawFd {
self.fd
}
}
pub(super) struct ZmqSocket(AsyncFd<SocketWrapper>);
impl ZmqSocket {
fn new(socket: zmq::Socket) -> Result<Self> {
Ok(Self(AsyncFd::new(SocketWrapper::new(socket)?)?))
}
fn socket(&self) -> &zmq::Socket {
&self.0.get_ref().socket
}
fn poll_socket_event(
&mut self,
cx: &mut Context<'_>,
event: zmq::PollEvents,
) -> Poll<Result<()>> {
if self.socket().get_events()?.contains(event) {
Poll::Ready(Ok(()))
} else {
self.clear_read_ready(cx)?;
Poll::Pending
}
}
fn clear_read_ready(&mut self, cx: &mut Context<'_>) -> Result<()> {
if let Poll::Ready(mut guard) = self.0.poll_read_ready(cx)? {
guard.clear_ready();
cx.waker().wake_by_ref();
}
Ok(())
}
fn poll_recv_multipart(&mut self, cx: &mut Context<'_>) -> Poll<Result<MultipartMessage>> {
ready!(self.poll_socket_event(cx, zmq::POLLIN))?;
let mut frames = Vec::new();
loop {
let mut msg = zmq::Message::new();
match self.socket().recv(&mut msg, zmq::DONTWAIT) {
Ok(_) => {
let more = msg.get_more();
frames.push(msg.to_vec());
if !more {
return Poll::Ready(Ok(frames));
}
}
Err(zmq::Error::EAGAIN) if frames.is_empty() => {
self.clear_read_ready(cx)?;
return Poll::Pending;
}
Err(zmq::Error::EAGAIN) => {
return Poll::Ready(Err(anyhow!(
"multipart receive interrupted after {} frames",
frames.len()
)));
}
Err(error) => return Poll::Ready(Err(error.into())),
}
}
}
fn poll_send_multipart(
&mut self,
cx: &mut Context<'_>,
buffer: &mut VecDeque<zmq::Message>,
) -> Poll<Result<()>> {
while !buffer.is_empty() {
ready!(self.poll_socket_event(cx, zmq::POLLOUT))?;
while let Some(frame) = buffer.pop_front() {
let mut flags = zmq::DONTWAIT;
if !buffer.is_empty() {
flags |= zmq::SNDMORE;
}
match self.socket().send(&*frame, flags) {
Ok(_) => {}
Err(zmq::Error::EAGAIN) => {
buffer.push_front(frame);
self.clear_read_ready(cx)?;
return Poll::Pending;
}
Err(error) => return Poll::Ready(Err(error.into())),
}
}
}
Poll::Ready(Ok(()))
}
}
fn configure_common_socket(socket: &zmq::Socket) -> Result<()> {
socket.set_linger(ZMQ_LINGER_MS)?;
socket.set_reconnect_ivl(ZMQ_RECONNECT_IVL_MS)?;
socket.set_reconnect_ivl_max(ZMQ_RECONNECT_IVL_MAX_MS)?;
socket.set_tcp_keepalive(ZMQ_TCP_KEEPALIVE)?;
socket.set_heartbeat_ivl(ZMQ_HEARTBEAT_IVL_MS)?;
socket.set_heartbeat_timeout(ZMQ_HEARTBEAT_TIMEOUT_MS)?;
socket.set_heartbeat_ttl(ZMQ_HEARTBEAT_TTL_MS)?;
Ok(())
}
fn configure_receive_socket(socket: &zmq::Socket) -> Result<()> {
configure_common_socket(socket)?;
socket.set_rcvtimeo(ZMQ_RCVTIMEOUT_MS)?;
Ok(())
}
fn configure_bidirectional_socket(socket: &zmq::Socket) -> Result<()> {
configure_receive_socket(socket)?;
socket.set_sndtimeo(ZMQ_SNDTIMEOUT_MS)?;
Ok(())
}
#[cfg(test)]
fn configure_send_socket(socket: &zmq::Socket) -> Result<()> {
configure_common_socket(socket)?;
socket.set_sndtimeo(ZMQ_SNDTIMEOUT_MS)?;
Ok(())
}
fn build_socket<F>(socket_type: zmq::SocketType, configure: F) -> Result<ZmqSocket>
where
F: FnOnce(&zmq::Socket) -> Result<()>,
{
let context = zmq::Context::new();
let socket = context.socket(socket_type)?;
configure(&socket)?;
ZmqSocket::new(socket)
}
pub(super) fn connect_sub_socket(endpoint: &str) -> Result<SharedSocket> {
Ok(Arc::new(Mutex::new(build_socket(zmq::SUB, |socket| {
configure_receive_socket(socket)?;
socket.set_subscribe(b"")?;
socket.connect(endpoint)?;
Ok(())
})?)))
}
pub(super) fn connect_dealer_socket(endpoint: &str) -> Result<SharedSocket> {
Ok(Arc::new(Mutex::new(build_socket(zmq::DEALER, |socket| {
configure_bidirectional_socket(socket)?;
socket.connect(endpoint)?;
Ok(())
})?)))
}
#[cfg(test)]
pub(super) fn bind_pub_socket(endpoint: &str) -> Result<SharedSocket> {
Ok(Arc::new(Mutex::new(build_socket(zmq::PUB, |socket| {
configure_send_socket(socket)?;
socket.bind(endpoint)?;
Ok(())
})?)))
}
pub(super) async fn recv_multipart(socket: &SharedSocket) -> Result<MultipartMessage> {
let mut socket = socket.lock().await;
poll_fn(|cx| socket.poll_recv_multipart(cx)).await
}
pub(super) async fn send_multipart(socket: &SharedSocket, frames: MultipartMessage) -> Result<()> {
let mut socket = socket.lock().await;
let mut buffer = frames
.into_iter()
.map(zmq::Message::from)
.collect::<VecDeque<_>>();
poll_fn(|cx| socket.poll_send_multipart(cx, &mut buffer)).await
}
......@@ -167,7 +167,6 @@ tokio-rayon = {version = "2" }
ndarray = { version = "0.16" }
# Publishers
zeromq = "0.4.1"
rmp-serde = "1.3"
[dev-dependencies]
......
......@@ -3,8 +3,8 @@
use super::*;
use super::zmq::*;
use utils::*;
use zmq::*;
use derive_builder::Builder;
use std::sync::Arc;
......
......@@ -3,10 +3,10 @@
use super::*;
use super::zmq::*;
use futures::future::try_join_all;
use nixl_sys::NixlDescriptor;
use utils::*;
use zmq::*;
use BlockTransferPool::*;
......
......@@ -3,10 +3,10 @@
use super::*;
use super::zmq::*;
use async_trait::async_trait;
use transfer::*;
use utils::*;
use zmq::*;
use crate::block_manager::{
BasicMetadata, BlockMetadata, LayoutConfigBuilder, NixlLayout, Storage,
......
......@@ -14,9 +14,9 @@ use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use tokio::sync::RwLock;
use tokio::task::JoinHandle;
use zeromq::{PubSocket, Socket, SocketSend};
use super::tracker::{CacheStatusTracker, ConsolidatedEvent};
use crate::utils::zmq::{bind_pub_socket, send_multipart};
/// Event batch structure matching vLLM's format (array_like=True)
/// Format: [timestamp, [events], data_parallel_rank]
......@@ -183,9 +183,7 @@ impl KvEventConsolidatorPublisher {
tracing::info!("Starting consolidated event publisher on {}", endpoint);
// Create ZMQ PUB socket and bind
let mut socket = PubSocket::new();
socket
.bind(&endpoint)
let socket = bind_pub_socket(&endpoint)
.await
.with_context(|| format!("Failed to bind publisher to {}", endpoint))?;
......@@ -258,16 +256,9 @@ impl KvEventConsolidatorPublisher {
Bytes::from(seq_bytes.to_vec()),
Bytes::from(payload),
];
let frames = frames.into_iter().map(|frame| frame.to_vec()).collect();
let msg = match zeromq::ZmqMessage::try_from(frames) {
Ok(m) => m,
Err(e) => {
tracing::error!("Failed to create multipart ZMQ message: {:?}", e);
continue;
}
};
if let Err(e) = socket.send(msg).await {
if let Err(e) = send_multipart(&socket, frames).await {
tracing::error!("Failed to send consolidated events: {}", e);
} else {
tracing::debug!(
......
......@@ -6,17 +6,18 @@
//! This is a simplified subscriber that deserializes raw vLLM/TensorRT-LLM events.
use anyhow::{Context, Result};
use futures::StreamExt;
use rmp_serde::Deserializer;
use serde::Deserialize;
use std::sync::Arc;
use tokio::sync::RwLock;
use tokio::task::JoinHandle;
use tokio_util::sync::CancellationToken;
use zeromq::{Socket, SocketRecv, SubSocket};
use dynamo_kv_router::zmq_wire::RawKvEvent;
use super::tracker::{CacheStatusTracker, EventSource, StorageTier};
use crate::utils::zmq::{connect_sub_socket, multipart_message};
/// Event batch received from vLLM/TensorRT-LLM (array format)
/// Format: [timestamp, [events], data_parallel_rank]
......@@ -73,15 +74,10 @@ async fn run_listener_loop(
endpoint
);
let mut socket = SubSocket::new();
socket
.connect(&endpoint)
let socket = connect_sub_socket(&endpoint, None)
.await
.context("Failed to connect to ZMQ endpoint")?;
socket
.subscribe("")
.await
.context("Failed to subscribe to ZMQ topics")?;
.with_context(|| format!("Failed to connect to ZMQ endpoint {endpoint}"))?;
let mut socket = socket;
tracing::info!(
"KV event consolidator ZMQ listener successfully connected to {}",
......@@ -97,18 +93,19 @@ async fn run_listener_loop(
break;
}
msg_result = socket.recv() => {
let Ok(msg) = msg_result else {
tracing::warn!("Error receiving ZMQ message: {:?}", msg_result.unwrap_err());
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
continue;
msg_result = socket.next() => {
let frames = match msg_result {
Some(Ok(frames)) => multipart_message(frames),
Some(Err(error)) => {
tracing::error!("Error receiving ZMQ message: {error}");
break;
}
None => break,
};
// Parse multipart message: supports both formats
// - 2 frames: [topic, payload]
// - 3 frames: [topic, sequence, payload]
let frames: Vec<Vec<u8>> = msg.into_vec().into_iter().map(|f| f.to_vec()).collect();
let payload = match frames.len() {
2 => &frames[1], // [topic, payload]
3 => &frames[2], // [topic, sequence, payload]
......
......@@ -11,19 +11,17 @@
//! [`crate::kv_router::publisher::KvEventPublisher`], but is much simpler:
//! no event transformation, no batching, no local indexer — just raw byte relay.
use std::time::Duration;
use anyhow::Result;
use futures::StreamExt;
use tokio_util::sync::CancellationToken;
use zeromq::SocketRecv;
use crate::utils::zmq::connect_sub_socket_with_retry;
use dynamo_runtime::component::Component;
use dynamo_runtime::traits::DistributedRuntimeProvider;
use dynamo_runtime::transports::event_plane::EventPublisher;
use crate::utils::zmq::{connect_sub_socket, multipart_message};
const FPM_TOPIC: &str = "forward-pass-metrics";
const MAX_CONSECUTIVE_ERRORS: u32 = 10;
/// A relay that bridges ForwardPassMetrics from a local raw ZMQ PUB socket
/// to the Dynamo event plane.
......@@ -62,15 +60,16 @@ impl FpmEventRelay {
publisher: EventPublisher,
cancel: CancellationToken,
) {
let Some(mut socket) =
connect_sub_socket_with_retry(&zmq_endpoint, None, &cancel, "FPM relay").await
else {
return;
let socket = match connect_sub_socket(&zmq_endpoint, None).await {
Ok(socket) => socket,
Err(error) => {
tracing::error!(endpoint = %zmq_endpoint, error = %error, "FPM relay: failed to connect");
return;
}
};
let mut socket = socket;
tracing::info!("FPM relay: connected to {zmq_endpoint}");
let mut consecutive_errors: u32 = 0;
loop {
tokio::select! {
biased;
......@@ -78,16 +77,11 @@ impl FpmEventRelay {
tracing::info!("FPM relay: shutting down");
break;
}
result = socket.recv() => {
result = socket.next() => {
match result {
Ok(msg) => {
consecutive_errors = 0;
Some(Ok(frames)) => {
let mut frames = multipart_message(frames);
// ZMQ multipart: [topic, seq, payload]
let mut frames: Vec<Vec<u8>> = msg
.into_vec()
.into_iter()
.map(|f| f.to_vec())
.collect();
if frames.len() == 3 {
let payload = frames.swap_remove(2);
if let Err(e) = publisher.publish_bytes(payload).await {
......@@ -97,19 +91,16 @@ impl FpmEventRelay {
tracing::warn!(
"FPM relay: unexpected ZMQ frame count: expected 3, got {}",
frames.len()
);
}
}
Err(e) => {
consecutive_errors += 1;
tracing::warn!(
"FPM relay: ZMQ recv error ({consecutive_errors}/{MAX_CONSECUTIVE_ERRORS}): {e}"
);
if consecutive_errors >= MAX_CONSECUTIVE_ERRORS {
tracing::error!("FPM relay: too many consecutive errors, exiting");
break;
}
tokio::time::sleep(Duration::from_millis(100)).await;
}
Some(Err(e)) => {
tracing::error!("FPM relay: ZMQ recv failed: {e}");
break;
}
None => {
tracing::error!("FPM relay: ZMQ stream ended");
break;
}
}
}
......
......@@ -40,11 +40,6 @@ use event_processor::{
};
pub use worker_metrics::WorkerMetricsPublisher;
use zmq_listener::start_zmq_listener;
#[cfg(test)]
use zmq_listener::{
INITIAL_BACKOFF_MS, MAX_BACKOFF_EXPONENT, MAX_BACKOFF_MS, MAX_CONSECUTIVE_ERRORS,
calculate_backoff_ms,
};
const MAX_BATCHING_TIMEOUT_MS: u64 = 15_000;
pub const DEFAULT_BATCHING_TIMEOUT_MS: Option<u64> = None;
......
......@@ -13,8 +13,6 @@ use std::future::Future;
#[allow(unused_imports)]
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
use std::time::Duration;
#[allow(unused_imports)]
use zeromq::{PubSocket, Socket, SocketSend, ZmqMessage};
#[cfg(test)]
mod test_event_processing {
......@@ -430,11 +428,11 @@ mod test_event_processing {
#[cfg(test)]
mod tests_startup_helpers {
use super::*;
use crate::utils::zmq::{bind_pub_socket, send_multipart};
use bytes::Bytes;
use dynamo_kv_router::indexer::{GetWorkersRequest, KvIndexer, KvIndexerInterface};
use dynamo_kv_router::protocols::{ExternalSequenceBlockHash, LocalBlockHash};
use std::sync::{Arc, Mutex};
use zeromq::{PubSocket, Socket, SocketSend, ZmqMessage};
// Type alias to resolve clippy::type_complexity warning
type PublishedEvents = Arc<Mutex<Vec<(String, Vec<u8>)>>>;
......@@ -828,13 +826,20 @@ mod tests_startup_helpers {
// Prepare channel that listener should fill
let (tx, mut rx) = mpsc::unbounded_channel::<PlacementEvent>();
// ZMQ TCP endpoint using localhost with fixed port
let endpoint = "tcp://127.0.0.1:15555";
// ZMQ TCP endpoint using localhost with an ephemeral port
let reserved_listener = reserve_open_port();
let endpoint = format!(
"tcp://127.0.0.1:{}",
reserved_listener
.local_addr()
.expect("failed to read reserved listener address")
.port()
);
drop(reserved_listener);
let topic = "".to_string(); // subscribe to all
// Publisher side - set up first
let mut pub_socket = PubSocket::new();
pub_socket.bind(endpoint).await.unwrap();
let pub_socket = bind_pub_socket(&endpoint).await.unwrap();
// Cancellation token so we can stop the listener
let token = dynamo_runtime::CancellationToken::new();
......@@ -873,16 +878,13 @@ mod tests_startup_helpers {
let payload = Bytes::from(rmps::to_vec(&batch).unwrap());
let frames = vec![
Bytes::from(""),
Bytes::from(seq.to_be_bytes().to_vec()),
payload.clone(),
Bytes::from("").to_vec(),
Bytes::from(seq.to_be_bytes().to_vec()).to_vec(),
payload.clone().to_vec(),
];
// Create a proper multipart message
let msg = ZmqMessage::try_from(frames).expect("Failed to create ZmqMessage");
// Send the multipart message
pub_socket.send(msg).await.unwrap();
send_multipart(&pub_socket, frames).await.unwrap();
// Wait for message to be received
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
......@@ -907,6 +909,75 @@ mod tests_startup_helpers {
let _ = listener_handle.await;
}
#[tokio::test]
async fn test_start_zmq_listener_connects_before_publisher_bind() {
let (tx, mut rx) = mpsc::unbounded_channel::<PlacementEvent>();
let reserved_listener = reserve_open_port();
let endpoint = format!(
"tcp://127.0.0.1:{}",
reserved_listener
.local_addr()
.expect("failed to read reserved listener address")
.port()
);
drop(reserved_listener);
let topic = String::new();
let token = dynamo_runtime::CancellationToken::new();
let next_event_id = Arc::new(AtomicU64::new(0));
let listener_handle = tokio::spawn({
let token = token.clone();
let endpoint = endpoint.clone();
start_zmq_listener(endpoint, topic, 1, tx, token, 4, next_event_id)
});
tokio::time::sleep(tokio::time::Duration::from_millis(150)).await;
let pub_socket = bind_pub_socket(&endpoint).await.unwrap();
let batch = KvEventBatch {
ts: 0.0,
events: vec![RawKvEvent::BlockStored {
block_hashes: vec![BlockHashValue::Unsigned(64)],
parent_block_hash: None,
token_ids: vec![4, 5, 6, 7],
block_size: 4,
medium: None,
lora_name: None,
block_mm_infos: None,
is_eagle: None,
}],
data_parallel_rank: Some(0),
};
let payload = rmps::to_vec(&batch).unwrap();
for _ in 0..5 {
send_multipart(
&pub_socket,
vec![Vec::new(), 12u64.to_be_bytes().to_vec(), payload.clone()],
)
.await
.unwrap();
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
}
let event = tokio::time::timeout(tokio::time::Duration::from_secs(5), rx.recv())
.await
.expect("timed out waiting for listener event")
.expect("listener channel closed")
.event;
let KvCacheEventData::Stored(KvCacheStoreData { blocks, .. }) = event.data else {
panic!("expected KvCacheStoreData");
};
assert_eq!(blocks[0].block_hash.0, 64);
token.cancel();
let _ = listener_handle.await;
}
fn reserve_open_port() -> std::net::TcpListener {
std::net::TcpListener::bind("127.0.0.1:0").expect("failed to bind probe listener")
}
//--------------------------------------------------------------------
// Test distributed recovery: Router queries worker's LocalKvIndexer after outage
//--------------------------------------------------------------------
......@@ -1120,55 +1191,6 @@ mod tests_startup_helpers {
}
}
#[cfg(test)]
mod test_exponential_backoff {
use super::*;
#[test]
fn test_backoff_calculation_progression() {
// Test the exponential progression
assert_eq!(calculate_backoff_ms(0), 10); // 10 * 2^0 = 10
assert_eq!(calculate_backoff_ms(1), 20); // 10 * 2^1 = 20
assert_eq!(calculate_backoff_ms(2), 40); // 10 * 2^2 = 40
assert_eq!(calculate_backoff_ms(3), 80); // 10 * 2^3 = 80
assert_eq!(calculate_backoff_ms(4), 160); // 10 * 2^4 = 160
assert_eq!(calculate_backoff_ms(5), 320); // 10 * 2^5 = 320
assert_eq!(calculate_backoff_ms(6), 640); // 10 * 2^6 = 640
assert_eq!(calculate_backoff_ms(7), 1280); // 10 * 2^7 = 1280
assert_eq!(calculate_backoff_ms(8), 2560); // 10 * 2^8 = 2560
}
#[test]
fn test_backoff_caps_at_max_exponent() {
// After MAX_BACKOFF_EXPONENT, should stay at 2^8 = 2560ms
assert_eq!(calculate_backoff_ms(8), 2560);
assert_eq!(calculate_backoff_ms(9), 2560); // Same as 8
assert_eq!(calculate_backoff_ms(100), 2560); // Same as 8
}
#[test]
fn test_backoff_never_exceeds_max() {
// Even if we somehow had a huge exponent, never exceed MAX_BACKOFF_MS
for i in 0..20 {
assert!(calculate_backoff_ms(i) <= MAX_BACKOFF_MS);
}
}
#[test]
#[expect(clippy::assertions_on_constants)]
fn test_backoff_constants_are_sane() {
// Verify our constants make sense together
assert!(INITIAL_BACKOFF_MS > 0);
assert!(MAX_BACKOFF_MS > INITIAL_BACKOFF_MS);
assert!(MAX_BACKOFF_EXPONENT <= 10); // Prevent crazy exponents
assert!(MAX_CONSECUTIVE_ERRORS > 0);
// Max calculated value should be less than MAX_BACKOFF_MS
let max_calculated = INITIAL_BACKOFF_MS * 2_u64.pow(MAX_BACKOFF_EXPONENT);
assert!(max_calculated <= MAX_BACKOFF_MS);
}
}
#[cfg(all(test, feature = "integration"))]
mod test_integration_publisher {
use super::*;
......
......@@ -3,28 +3,16 @@
use std::sync::Arc;
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
use std::time::Duration;
use futures::StreamExt;
use rmp_serde as rmps;
use tokio::sync::mpsc;
use tokio_util::sync::CancellationToken;
use zeromq::SocketRecv;
use crate::utils::zmq::connect_sub_socket_with_retry;
use dynamo_kv_router::protocols::*;
use dynamo_kv_router::zmq_wire::*;
pub(super) const INITIAL_BACKOFF_MS: u64 = 10;
pub(super) const MAX_BACKOFF_MS: u64 = 5000;
pub(super) const MAX_CONSECUTIVE_ERRORS: u32 = 10;
pub(super) const MAX_BACKOFF_EXPONENT: u32 = 8;
pub(super) fn calculate_backoff_ms(consecutive_errors: u32) -> u64 {
std::cmp::min(
INITIAL_BACKOFF_MS * 2_u64.pow(consecutive_errors.min(MAX_BACKOFF_EXPONENT)),
MAX_BACKOFF_MS,
)
}
use crate::utils::zmq::{connect_sub_socket, multipart_message};
pub(super) async fn start_zmq_listener(
zmq_endpoint: String,
......@@ -42,63 +30,40 @@ pub(super) async fn start_zmq_listener(
);
let warning_count = Arc::new(AtomicU32::new(0));
let Some(mut socket) = connect_sub_socket_with_retry(
&zmq_endpoint,
Some(&zmq_topic),
&cancellation_token,
"ZMQ listener",
)
.await
else {
return;
let socket = match connect_sub_socket(&zmq_endpoint, Some(&zmq_topic)).await {
Ok(socket) => socket,
Err(error) => {
tracing::error!(endpoint = %zmq_endpoint, topic = %zmq_topic, error = %error, "ZMQ listener failed to connect");
return;
}
};
let mut socket = socket;
if cancellation_token.is_cancelled() {
return;
}
let mut consecutive_errors = 0u32;
#[expect(unused_assignments)]
let mut exit_reason = "unknown";
let mut messages_processed = 0u64;
'main: loop {
let exit_reason = 'main: loop {
tokio::select! {
biased;
_ = cancellation_token.cancelled() => {
tracing::debug!("ZMQ listener received cancellation signal");
exit_reason = "cancellation token cancelled";
break 'main;
break 'main String::from("cancellation token cancelled");
}
msg_result = socket.recv() => {
let Ok(msg) = msg_result else {
let e = msg_result.unwrap_err();
consecutive_errors += 1;
if consecutive_errors >= MAX_CONSECUTIVE_ERRORS {
tracing::error!(
error=%e,
consecutive_errors=%consecutive_errors,
"Too many consecutive ZMQ errors, terminating listener"
);
exit_reason = "too many consecutive errors";
break 'main;
msg_result = socket.next() => {
let frames = match msg_result {
Some(Ok(frames)) => multipart_message(frames),
Some(Err(error)) => {
tracing::error!(endpoint = %zmq_endpoint, error = %error, "ZMQ listener recv failed");
break 'main format!("ZMQ recv failed: {error}");
}
let backoff_ms = calculate_backoff_ms(consecutive_errors);
tracing::warn!(
error=%e,
consecutive_errors=%consecutive_errors,
backoff_ms=%backoff_ms,
"Error reading from ZMQ socket, applying exponential backoff"
);
tokio::time::sleep(Duration::from_millis(backoff_ms)).await;
continue;
None => break 'main String::from("ZMQ stream ended"),
};
consecutive_errors = 0;
let mut frames: Vec<Vec<u8>> =
msg.into_vec().into_iter().map(|frame| frame.to_vec()).collect();
let mut frames = frames;
if frames.len() != 3 {
tracing::warn!(
......@@ -144,14 +109,13 @@ pub(super) async fn start_zmq_listener(
convert_event(raw_event, event_id, kv_block_size, worker, &warning_count);
if tx.send(event).is_err() {
tracing::warn!("Failed to send message to channel - receiver dropped");
exit_reason = "channel receiver dropped";
break 'main;
break 'main String::from("channel receiver dropped");
}
messages_processed += 1;
}
}
}
}
};
tracing::debug!(
"ZMQ listener exiting, reason: {}, messages processed: {}",
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment