Unverified Commit a13c4cb6 authored by Ryan Olson's avatar Ryan Olson Committed by GitHub
Browse files

feat: add Rayon compute pool for CPU-intensive operations (#2969)


Signed-off-by: default avatarRyan Olson <rolson@nvidia.com>
parent 7ebbd001
......@@ -71,12 +71,6 @@ version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "android-tzdata"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
[[package]]
name = "android_system_properties"
version = "0.1.5"
......@@ -86,6 +80,12 @@ dependencies = [
"libc",
]
[[package]]
name = "anes"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]]
name = "anstream"
version = "0.6.20"
......@@ -138,9 +138,9 @@ dependencies = [
[[package]]
name = "anyhow"
version = "1.0.99"
version = "1.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
dependencies = [
"backtrace",
]
......@@ -386,9 +386,9 @@ dependencies = [
[[package]]
name = "aws-lc-rs"
version = "1.13.3"
version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c953fe1ba023e6b7730c0d4b031d06f267f23a46167dcbd40316644b10a17ba"
checksum = "94b8ff6c09cd57b16da53641caa860168b88c172a5ee163b0288d3d6eea12786"
dependencies = [
"aws-lc-sys",
"zeroize",
......@@ -396,11 +396,11 @@ dependencies = [
[[package]]
name = "aws-lc-sys"
version = "0.30.0"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbfd150b5dbdb988bcc8fb1fe787eb6b7ee6180ca24da683b61ea5405f3d43ff"
checksum = "0e44d16778acaf6a9ec9899b92cebd65580b83f685446bf2e1f5d3d732f99dcd"
dependencies = [
"bindgen 0.69.5",
"bindgen 0.72.1",
"cc",
"cmake",
"dunce",
......@@ -604,36 +604,13 @@ dependencies = [
"serde",
]
[[package]]
name = "bindgen"
version = "0.69.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
dependencies = [
"bitflags 2.9.3",
"cexpr",
"clang-sys",
"itertools 0.12.1",
"lazy_static",
"lazycell",
"log",
"prettyplease",
"proc-macro2",
"quote",
"regex",
"rustc-hash 1.1.0",
"shlex",
"syn 2.0.106",
"which",
]
[[package]]
name = "bindgen"
version = "0.71.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"cexpr",
"clang-sys",
"itertools 0.13.0",
......@@ -649,11 +626,11 @@ dependencies = [
[[package]]
name = "bindgen"
version = "0.72.0"
version = "0.72.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f72209734318d0b619a5e0f5129918b848c416e122a3c4ce054e03cb87b726f"
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"cexpr",
"clang-sys",
"itertools 0.13.0",
......@@ -732,9 +709,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.9.3"
version = "2.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d"
checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
dependencies = [
"serde",
]
......@@ -777,9 +754,9 @@ dependencies = [
[[package]]
name = "bm25"
version = "2.3.1"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b84ff0d57042bc263e2ebadb3703424b59b65870902649a2b3d0f4d7ab863244"
checksum = "1cbd8ffdfb7b4c2ff038726178a780a94f90525ed0ad264c0afaa75dd8c18a64"
dependencies = [
"cached",
"deunicode",
......@@ -1010,9 +987,9 @@ version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fce8dd7fcfcbf3a0a87d8f515194b49d6135acab73e18bd380d1d93bb1a15eb"
dependencies = [
"clap 4.5.46",
"clap 4.5.48",
"heck 0.4.1",
"indexmap 2.11.0",
"indexmap 2.11.4",
"log",
"proc-macro2",
"quote",
......@@ -1025,10 +1002,11 @@ dependencies = [
[[package]]
name = "cc"
version = "1.2.34"
version = "1.2.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc"
checksum = "80f41ae168f955c12fb8960b057d70d0ca153fb83182b57d86380443527be7e9"
dependencies = [
"find-msvc-tools",
"jobserver",
"libc",
"shlex",
......@@ -1077,7 +1055,7 @@ version = "0.13.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf"
dependencies = [
"indexmap 2.11.0",
"indexmap 2.11.4",
"lazy_static",
"num-traits",
"regex",
......@@ -1087,17 +1065,43 @@ dependencies = [
[[package]]
name = "chrono"
version = "0.4.41"
version = "0.4.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"serde",
"wasm-bindgen",
"windows-link",
"windows-link 0.2.0",
]
[[package]]
name = "ciborium"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
dependencies = [
"ciborium-io",
"ciborium-ll",
"serde",
]
[[package]]
name = "ciborium-io"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
[[package]]
name = "ciborium-ll"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
dependencies = [
"ciborium-io",
"half 2.6.0",
]
[[package]]
......@@ -1124,9 +1128,9 @@ dependencies = [
[[package]]
name = "clap"
version = "4.5.46"
version = "4.5.48"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c5e4fcf9c21d2e544ca1ee9d8552de13019a42aa7dbf32747fa7aaf1df76e57"
checksum = "e2134bb3ea021b78629caa971416385309e0131b351b25e01dc16fb54e1b5fae"
dependencies = [
"clap_builder",
"clap_derive",
......@@ -1134,9 +1138,9 @@ dependencies = [
[[package]]
name = "clap_builder"
version = "4.5.46"
version = "4.5.48"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fecb53a0e6fcfb055f686001bc2e2592fa527efaf38dbe81a6a9563562e57d41"
checksum = "c2ba64afa3c0a6df7fa517765e31314e983f51dda798ffba27b988194fb65dc9"
dependencies = [
"anstream",
"anstyle",
......@@ -1147,9 +1151,9 @@ dependencies = [
[[package]]
name = "clap_derive"
version = "4.5.45"
version = "4.5.47"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14cb31bb0a7d536caef2639baa7fad459e15c3144efefa6dbd1c84562c4739f6"
checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c"
dependencies = [
"heck 0.5.0",
"proc-macro2",
......@@ -1210,9 +1214,9 @@ dependencies = [
[[package]]
name = "config"
version = "0.15.15"
version = "0.15.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0faa974509d38b33ff89282db9c3295707ccf031727c0de9772038ec526852ba"
checksum = "680d3ac2fe066c43300ec831c978871e50113a708d58ab13d231bd92deca5adb"
dependencies = [
"async-trait",
"convert_case",
......@@ -1220,10 +1224,10 @@ dependencies = [
"pathdiff",
"ron",
"rust-ini",
"serde",
"serde-untagged",
"serde_core",
"serde_json",
"toml 0.9.5",
"toml 0.9.7",
"winnow",
"yaml-rust2",
]
......@@ -1385,7 +1389,7 @@ dependencies = [
"atty",
"cast",
"clap 2.34.0",
"criterion-plot",
"criterion-plot 0.4.5",
"csv",
"itertools 0.10.5",
"lazy_static",
......@@ -1402,6 +1406,34 @@ dependencies = [
"walkdir",
]
[[package]]
name = "criterion"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
dependencies = [
"anes",
"cast",
"ciborium",
"clap 4.5.48",
"criterion-plot 0.5.0",
"futures",
"is-terminal",
"itertools 0.10.5",
"num-traits",
"once_cell",
"oorandom",
"plotters",
"rayon",
"regex",
"serde",
"serde_derive",
"serde_json",
"tinytemplate",
"tokio",
"walkdir",
]
[[package]]
name = "criterion-plot"
version = "0.4.5"
......@@ -1412,6 +1444,16 @@ dependencies = [
"itertools 0.10.5",
]
[[package]]
name = "criterion-plot"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
dependencies = [
"cast",
"itertools 0.10.5",
]
[[package]]
name = "crossbeam"
version = "0.8.4"
......@@ -1565,9 +1607,9 @@ dependencies = [
[[package]]
name = "cudarc"
version = "0.17.2"
version = "0.17.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8147ca46109d41cc513fd629b52bbea9bd09b972034c2f32954ce84a92895a91"
checksum = "72ba848ae5c6f3cb36e71eab5f268763e3fabcabe3f7bc683e16f7fa3d46281e"
dependencies = [
"libloading",
]
......@@ -1618,6 +1660,16 @@ dependencies = [
"darling_macro 0.20.11",
]
[[package]]
name = "darling"
version = "0.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0"
dependencies = [
"darling_core 0.21.3",
"darling_macro 0.21.3",
]
[[package]]
name = "darling_core"
version = "0.11.0"
......@@ -1646,6 +1698,20 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "darling_core"
version = "0.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim 0.11.1",
"syn 2.0.106",
]
[[package]]
name = "darling_macro"
version = "0.11.0"
......@@ -1668,11 +1734,22 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "darling_macro"
version = "0.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
dependencies = [
"darling_core 0.21.3",
"quote",
"syn 2.0.106",
]
[[package]]
name = "dary_heap"
version = "0.3.7"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728"
checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04"
dependencies = [
"serde",
]
......@@ -1715,12 +1792,12 @@ dependencies = [
[[package]]
name = "deranged"
version = "0.4.0"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e"
checksum = "a41953f86f8a05768a6cda24def994fd2f424b04ec5c719cf89989779f199071"
dependencies = [
"powerfmt",
"serde",
"serde_core",
]
[[package]]
......@@ -1919,7 +1996,7 @@ dependencies = [
"libc",
"option-ext",
"redox_users",
"windows-sys 0.60.2",
"windows-sys 0.61.0",
]
[[package]]
......@@ -2019,7 +2096,7 @@ dependencies = [
"tokio-util",
"tracing",
"url",
"uuid 1.18.0",
"uuid 1.18.1",
]
[[package]]
......@@ -2045,7 +2122,7 @@ dependencies = [
"dynamo-llm",
"dynamo-runtime",
"either",
"indexmap 2.11.0",
"indexmap 2.11.4",
"mistralrs",
"serde_json",
"tokio",
......@@ -2069,15 +2146,15 @@ dependencies = [
"async_zmq",
"axum 0.8.4",
"axum-server",
"bitflags 2.9.3",
"bitflags 2.9.4",
"blake3",
"bs62",
"bytemuck",
"bytes",
"candle-core 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono",
"criterion",
"cudarc 0.17.2",
"criterion 0.3.6",
"cudarc 0.17.3",
"dashmap",
"derive-getters",
"derive_builder",
......@@ -2140,7 +2217,7 @@ dependencies = [
"tracing",
"unicode-segmentation",
"url",
"uuid 1.18.0",
"uuid 1.18.1",
"validator",
"xxhash-rust",
"zeromq",
......@@ -2161,7 +2238,7 @@ dependencies = [
"serde_json",
"tokio",
"tracing",
"uuid 1.18.0",
"uuid 1.18.1",
]
[[package]]
......@@ -2171,7 +2248,7 @@ dependencies = [
"anyhow",
"async-stream",
"async-trait",
"clap 4.5.46",
"clap 4.5.48",
"dynamo-async-openai",
"dynamo-engine-llamacpp",
"dynamo-engine-mistralrs",
......@@ -2189,7 +2266,7 @@ dependencies = [
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid 1.18.0",
"uuid 1.18.1",
"vergen-gitcl",
]
......@@ -2211,6 +2288,7 @@ dependencies = [
"bytes",
"chrono",
"console-subscriber",
"criterion 0.5.1",
"derive-getters",
"derive_builder",
"educe",
......@@ -2229,6 +2307,7 @@ dependencies = [
"once_cell",
"prometheus",
"rand 0.9.2",
"rayon",
"regex",
"reqwest 0.12.23",
"rstest 0.23.0",
......@@ -2240,13 +2319,14 @@ dependencies = [
"tempfile",
"thiserror 2.0.16",
"tokio",
"tokio-rayon",
"tokio-stream",
"tokio-util",
"tower-http",
"tracing",
"tracing-subscriber",
"url",
"uuid 1.18.0",
"uuid 1.18.1",
"validator",
"xxhash-rust",
]
......@@ -2434,22 +2514,23 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "erased-serde"
version = "0.4.6"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e004d887f51fcb9fef17317a2f3525c887d8aa3f4f50fed920816a688284a5b7"
checksum = "259d404d09818dec19332e31d94558aeb442fea04c817006456c24b5460bbd4b"
dependencies = [
"serde",
"serde_core",
"typeid",
]
[[package]]
name = "errno"
version = "0.3.13"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
"libc",
"windows-sys 0.60.2",
"windows-sys 0.61.0",
]
[[package]]
......@@ -2547,6 +2628,26 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "fax"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f05de7d48f37cd6730705cbca900770cab77a89f413d23e100ad7fad7795a0ab"
dependencies = [
"fax_derive",
]
[[package]]
name = "fax_derive"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0aca10fb742cb43f9e7bb8467c91aa9bcb8e3ffbc6a6f7389bb93ffc920577d"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
]
[[package]]
name = "fdeflate"
version = "0.3.7"
......@@ -2579,6 +2680,12 @@ dependencies = [
"version_check",
]
[[package]]
name = "find-msvc-tools"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ced73b1dacfc750a6db6c0a0c3a3853c8b41997e2e2c563dc90804ae6867959"
[[package]]
name = "find_cuda_helper"
version = "0.2.0"
......@@ -2690,9 +2797,9 @@ dependencies = [
[[package]]
name = "fs-err"
version = "3.1.1"
version = "3.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88d7be93788013f265201256d58f04936a8079ad5dc898743aa20525f503b683"
checksum = "44f150ffc8782f35521cec2b23727707cb4045706ba3c854e86bef66b3a8cdbd"
dependencies = [
"autocfg",
"tokio",
......@@ -2880,7 +2987,7 @@ dependencies = [
"num-complex",
"num-traits",
"paste",
"raw-cpuid 11.5.0",
"raw-cpuid 11.6.0",
"seq-macro",
]
......@@ -2910,7 +3017,7 @@ dependencies = [
"num-complex",
"num-traits",
"paste",
"raw-cpuid 11.5.0",
"raw-cpuid 11.6.0",
"seq-macro",
]
......@@ -2940,7 +3047,7 @@ dependencies = [
"num-complex",
"num-traits",
"paste",
"raw-cpuid 11.5.0",
"raw-cpuid 11.6.0",
"seq-macro",
]
......@@ -2979,7 +3086,7 @@ dependencies = [
"once_cell",
"paste",
"pulp 0.21.5",
"raw-cpuid 11.5.0",
"raw-cpuid 11.6.0",
"rayon",
"seq-macro",
"sysctl 0.6.0",
......@@ -3016,7 +3123,7 @@ dependencies = [
"num-complex",
"num-traits",
"paste",
"raw-cpuid 11.5.0",
"raw-cpuid 11.6.0",
"rayon",
"seq-macro",
]
......@@ -3047,7 +3154,7 @@ dependencies = [
"num-complex",
"num-traits",
"paste",
"raw-cpuid 11.5.0",
"raw-cpuid 11.6.0",
"seq-macro",
]
......@@ -3077,7 +3184,7 @@ dependencies = [
"num-complex",
"num-traits",
"paste",
"raw-cpuid 11.5.0",
"raw-cpuid 11.6.0",
"seq-macro",
]
......@@ -3093,9 +3200,9 @@ dependencies = [
[[package]]
name = "getopts"
version = "0.2.23"
version = "0.2.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cba6ae63eb948698e300f645f87c70f76630d505f23b8907cf1e193ee85048c1"
checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
dependencies = [
"unicode-width 0.2.1",
]
......@@ -3123,7 +3230,7 @@ dependencies = [
"js-sys",
"libc",
"r-efi",
"wasi 0.14.3+wasi-0.2.4",
"wasi 0.14.7+wasi-0.2.4",
"wasm-bindgen",
]
......@@ -3146,7 +3253,7 @@ checksum = "3ac5654356c6f7f6116905aeaf92ab002c3d03414ada5dbe0bb2e32aa5fea173"
dependencies = [
"fancy-regex 0.14.0",
"ggml-quants",
"indexmap 2.11.0",
"indexmap 2.11.4",
"log",
"num_enum",
]
......@@ -3198,7 +3305,7 @@ dependencies = [
"futures-sink",
"futures-util",
"http 0.2.12",
"indexmap 2.11.0",
"indexmap 2.11.4",
"slab",
"tokio",
"tokio-util",
......@@ -3217,7 +3324,7 @@ dependencies = [
"futures-core",
"futures-sink",
"http 1.3.1",
"indexmap 2.11.0",
"indexmap 2.11.4",
"slab",
"tokio",
"tokio-util",
......@@ -3270,6 +3377,12 @@ dependencies = [
"foldhash",
]
[[package]]
name = "hashbrown"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d"
[[package]]
name = "hashlink"
version = "0.10.0"
......@@ -3348,15 +3461,6 @@ dependencies = [
"windows-sys 0.60.2",
]
[[package]]
name = "home"
version = "0.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf"
dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "hound"
version = "3.5.1"
......@@ -3469,9 +3573,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
[[package]]
name = "humantime"
version = "2.2.0"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"
checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424"
[[package]]
name = "hyper"
......@@ -3569,9 +3673,9 @@ dependencies = [
[[package]]
name = "hyper-util"
version = "0.1.16"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e"
checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8"
dependencies = [
"base64 0.22.1",
"bytes",
......@@ -3595,9 +3699,9 @@ dependencies = [
[[package]]
name = "iana-time-zone"
version = "0.1.63"
version = "0.1.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
dependencies = [
"android_system_properties",
"core-foundation-sys",
......@@ -3605,7 +3709,7 @@ dependencies = [
"js-sys",
"log",
"wasm-bindgen",
"windows-core 0.61.2",
"windows-core 0.62.0",
]
[[package]]
......@@ -3732,9 +3836,9 @@ dependencies = [
[[package]]
name = "image"
version = "0.25.6"
version = "0.25.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db35664ce6b9810857a38a906215e75a9c879f0696556a39f59c62829710251a"
checksum = "529feb3e6769d234375c4cf1ee2ce713682b8e76538cb13f9fc23e1400a591e7"
dependencies = [
"bytemuck",
"byteorder-lite",
......@@ -3742,6 +3846,7 @@ dependencies = [
"exr",
"gif",
"image-webp",
"moxcms",
"num-traits",
"png",
"qoi",
......@@ -3765,9 +3870,9 @@ dependencies = [
[[package]]
name = "imgref"
version = "1.11.0"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0263a3d970d5c054ed9312c0057b4f3bde9c0b33836d3637361d4a9e6e7a408"
checksum = "e7c5cedc30da3a610cac6b4ba17597bdf7152cf974e8aab3afb3d54455e371c8"
[[package]]
name = "indexmap"
......@@ -3782,13 +3887,14 @@ dependencies = [
[[package]]
name = "indexmap"
version = "2.11.0"
version = "2.11.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9"
checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5"
dependencies = [
"equivalent",
"hashbrown 0.15.5",
"hashbrown 0.16.0",
"serde",
"serde_core",
]
[[package]]
......@@ -3813,9 +3919,9 @@ checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
[[package]]
name = "insta"
version = "1.43.1"
version = "1.43.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "154934ea70c58054b556dd430b99a98c2a7ff5309ac9891597e339b5c28f4371"
checksum = "46fdb647ebde000f43b5b53f773c30cf9b0cb4300453208713fa38b2c70935a0"
dependencies = [
"console",
"globset",
......@@ -3867,7 +3973,7 @@ version = "0.7.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"cfg-if 1.0.3",
"libc",
]
......@@ -3909,6 +4015,17 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "is-terminal"
version = "0.4.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
dependencies = [
"hermit-abi 0.5.2",
"libc",
"windows-sys 0.59.0",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
......@@ -4026,17 +4143,11 @@ dependencies = [
"libc",
]
[[package]]
name = "jpeg-decoder"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00810f1d8b74be64b13dbf3db89ac67740615d6c891f0e7b6179326533011a07"
[[package]]
name = "js-sys"
version = "0.3.77"
version = "0.3.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305"
dependencies = [
"once_cell",
"wasm-bindgen",
......@@ -4073,7 +4184,7 @@ dependencies = [
"anyhow",
"base64 0.21.7",
"bytecount",
"clap 4.5.46",
"clap 4.5.48",
"fancy-regex 0.11.0",
"fraction",
"getrandom 0.2.16",
......@@ -4090,7 +4201,7 @@ dependencies = [
"serde_json",
"time",
"url",
"uuid 1.18.0",
"uuid 1.18.1",
]
[[package]]
......@@ -4125,23 +4236,17 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "lazycell"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "lebe"
version = "0.5.2"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8"
checksum = "7a79a3332a6609480d7d0c9eab957bca6b455b91bb84e66d19f5ff66294b85b8"
[[package]]
name = "libc"
version = "0.2.175"
version = "0.2.176"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174"
[[package]]
name = "libdynamo_llm"
......@@ -4161,7 +4266,7 @@ dependencies = [
"tokio-stream",
"tracing",
"tracing-subscriber",
"uuid 1.18.0",
"uuid 1.18.1",
]
[[package]]
......@@ -4176,12 +4281,12 @@ dependencies = [
[[package]]
name = "libloading"
version = "0.8.8"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
dependencies = [
"cfg-if 1.0.3",
"windows-targets 0.53.3",
"windows-link 0.2.0",
]
[[package]]
......@@ -4192,25 +4297,19 @@ checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
[[package]]
name = "libredox"
version = "0.1.9"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3"
checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"libc",
]
[[package]]
name = "linux-raw-sys"
version = "0.4.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
[[package]]
name = "linux-raw-sys"
version = "0.9.4"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
[[package]]
name = "litemap"
......@@ -4220,9 +4319,9 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
[[package]]
name = "llama-cpp-2"
version = "0.1.118"
version = "0.1.122"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "108004dde2928f96a3e515c8d8870f4fdef6a283fde6385bf0991993c7dde14b"
checksum = "d574d1f43b31c9e3d0e3bf596a31fa628e4b66894eb5db4dfe78e861c7f74275"
dependencies = [
"enumflags2",
"llama-cpp-sys-2",
......@@ -4233,11 +4332,11 @@ dependencies = [
[[package]]
name = "llama-cpp-sys-2"
version = "0.1.118"
version = "0.1.122"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c070e35e0cb58e7b4dfb7cd04a1cc7686ab0f04355076f2d5cdc284a1d8a0cf"
checksum = "e09bdf53b6f486ecaeb96b08cd8a9d9df162f2aafa37efb5b40cf421a419c755"
dependencies = [
"bindgen 0.72.0",
"bindgen 0.72.1",
"cc",
"cmake",
"find_cuda_helper",
......@@ -4252,7 +4351,7 @@ source = "git+https://github.com/guidance-ai/llguidance.git?rev=c432092#c432092d
dependencies = [
"anyhow",
"derivre",
"indexmap 2.11.0",
"indexmap 2.11.4",
"regex-syntax",
"serde",
"serde_json",
......@@ -4283,9 +4382,9 @@ dependencies = [
[[package]]
name = "log"
version = "0.4.27"
version = "0.4.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
[[package]]
name = "loop9"
......@@ -4483,9 +4582,9 @@ dependencies = [
[[package]]
name = "memchr"
version = "2.7.5"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "memmap2"
......@@ -4518,7 +4617,7 @@ version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c43f73953f8cbe511f021b58f18c3ce1c3d1ae13fe953293e13345bf83217f25"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"block",
"core-graphics-types",
"foreign-types 0.5.0",
......@@ -4533,7 +4632,7 @@ version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"block",
"core-graphics-types",
"foreign-types 0.5.0",
......@@ -4547,7 +4646,7 @@ name = "metrics"
version = "0.5.1"
dependencies = [
"axum 0.8.4",
"clap 4.5.46",
"clap 4.5.48",
"dynamo-llm",
"dynamo-runtime",
"futures",
......@@ -4677,11 +4776,11 @@ dependencies = [
"anyhow",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=95d713f9)",
"candle-nn",
"clap 4.5.46",
"clap 4.5.48",
"either",
"futures",
"image",
"indexmap 2.11.0",
"indexmap 2.11.4",
"mistralrs-core",
"rand 0.9.2",
"reqwest 0.12.23",
......@@ -4722,7 +4821,7 @@ dependencies = [
"candle-nn",
"cfgrammar",
"chrono",
"clap 4.5.46",
"clap 4.5.48",
"csv",
"derive-new",
"derive_more 2.0.1",
......@@ -4738,7 +4837,7 @@ dependencies = [
"html2text",
"http 1.3.1",
"image",
"indexmap 2.11.0",
"indexmap 2.11.4",
"indicatif",
"interprocess",
"itertools 0.14.0",
......@@ -4793,7 +4892,7 @@ dependencies = [
"tracing",
"tracing-subscriber",
"urlencoding",
"uuid 1.18.0",
"uuid 1.18.1",
"variantly",
"vob",
]
......@@ -4815,7 +4914,7 @@ dependencies = [
"tokio-tungstenite 0.24.0",
"tracing",
"utoipa",
"uuid 1.18.0",
"uuid 1.18.1",
]
[[package]]
......@@ -4877,7 +4976,7 @@ version = "0.1.0"
source = "git+https://github.com/ai-dynamo/modelexpress.git?rev=a232220bf268a475d293914d407f4ae186f443e3#a232220bf268a475d293914d407f4ae186f443e3"
dependencies = [
"anyhow",
"clap 4.5.46",
"clap 4.5.48",
"colored",
"futures",
"modelexpress-common",
......@@ -4889,7 +4988,7 @@ dependencies = [
"tonic 0.13.1",
"tracing",
"tracing-subscriber",
"uuid 1.18.0",
"uuid 1.18.1",
]
[[package]]
......@@ -4900,7 +4999,7 @@ dependencies = [
"anyhow",
"async-trait",
"chrono",
"clap 4.5.46",
"clap 4.5.48",
"config",
"hf-hub",
"jiff",
......@@ -4917,25 +5016,36 @@ dependencies = [
[[package]]
name = "monostate"
version = "0.1.14"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aafe1be9d0c75642e3e50fedc7ecadf1ef1cbce6eb66462153fc44245343fbee"
checksum = "1a2901b7478a273256ce419c446289eb3c883a790d0bf5ed2f363c0cc3988012"
dependencies = [
"monostate-impl",
"serde",
"serde_core",
]
[[package]]
name = "monostate-impl"
version = "0.1.14"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c402a4092d5e204f32c9e155431046831fa712637043c58cb73bc6bc6c9663b5"
checksum = "328f7435b7f54322b33832f1e981ff192781f0d12473f12d062705a55015de8d"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
]
[[package]]
name = "moxcms"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd32fa8935aeadb8a8a6b6b351e40225570a37c43de67690383d87ef170cd08"
dependencies = [
"num-traits",
"pxfm",
]
[[package]]
name = "multimap"
version = "0.10.1"
......@@ -5072,7 +5182,7 @@ version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"cfg-if 1.0.3",
"cfg_aliases",
"libc",
......@@ -5374,7 +5484,7 @@ version = "6.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"libc",
"once_cell",
"onig_sys",
......@@ -5405,7 +5515,7 @@ dependencies = [
"anyhow",
"base64 0.22.1",
"bstr",
"clap 4.5.46",
"clap 4.5.48",
"fancy-regex 0.13.0",
"futures",
"image",
......@@ -5426,7 +5536,7 @@ version = "0.10.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"cfg-if 1.0.3",
"foreign-types 0.3.2",
"libc",
......@@ -5596,9 +5706,9 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "pest"
version = "2.8.1"
version = "2.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323"
checksum = "21e0a3a33733faeaf8651dfee72dd0f388f0c8e5ad496a3478fa5a922f49cfa8"
dependencies = [
"memchr",
"thiserror 2.0.16",
......@@ -5607,9 +5717,9 @@ dependencies = [
[[package]]
name = "pest_derive"
version = "2.8.1"
version = "2.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb056d9e8ea77922845ec74a1c4e8fb17e7c218cc4fc11a15c5d25e189aa40bc"
checksum = "bc58706f770acb1dbd0973e6530a3cff4746fb721207feb3a8a6064cd0b6c663"
dependencies = [
"pest",
"pest_generator",
......@@ -5617,9 +5727,9 @@ dependencies = [
[[package]]
name = "pest_generator"
version = "2.8.1"
version = "2.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87e404e638f781eb3202dc82db6760c8ae8a1eeef7fb3fa8264b2ef280504966"
checksum = "6d4f36811dfe07f7b8573462465d5cb8965fffc2e71ae377a33aecf14c2c9a2f"
dependencies = [
"pest",
"pest_meta",
......@@ -5630,9 +5740,9 @@ dependencies = [
[[package]]
name = "pest_meta"
version = "2.8.1"
version = "2.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edd1101f170f5903fde0914f899bb503d9ff5271d7ba76bbb70bea63690cc0d5"
checksum = "42919b05089acbd0a5dcd5405fb304d17d1053847b81163d09c4ad18ce8e8420"
dependencies = [
"pest",
"sha2",
......@@ -5645,7 +5755,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
dependencies = [
"fixedbitset",
"indexmap 2.11.0",
"indexmap 2.11.4",
]
[[package]]
......@@ -5750,12 +5860,12 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "plist"
version = "1.7.4"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1"
checksum = "740ebea15c5d1428f910cd1a5f52cebf8d25006245ed8ade92702f4943d91e07"
dependencies = [
"base64 0.22.1",
"indexmap 2.11.0",
"indexmap 2.11.4",
"quick-xml",
"serde",
"time",
......@@ -5791,11 +5901,11 @@ dependencies = [
[[package]]
name = "png"
version = "0.17.16"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82151a2fc869e011c153adc57cf2789ccb8d9906ce52c0b39a6b5697749d7526"
checksum = "97baced388464909d42d89643fe4361939af9b7ce7a31ee32a168f832a70f2a0"
dependencies = [
"bitflags 1.3.2",
"bitflags 2.9.4",
"crc32fast",
"fdeflate",
"flate2",
......@@ -5868,11 +5978,11 @@ dependencies = [
[[package]]
name = "proc-macro-crate"
version = "3.3.0"
version = "3.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35"
checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983"
dependencies = [
"toml_edit",
"toml_edit 0.23.6",
]
[[package]]
......@@ -5955,13 +6065,13 @@ dependencies = [
[[package]]
name = "proptest"
version = "1.7.0"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f"
checksum = "2bb0be07becd10686a0bb407298fb425360a5c44a663774406340c59a22de4ce"
dependencies = [
"bit-set 0.8.0",
"bit-vec 0.8.0",
"bitflags 2.9.3",
"bitflags 2.9.4",
"lazy_static",
"num-traits",
"rand 0.9.2",
......@@ -6071,6 +6181,15 @@ dependencies = [
"version_check",
]
[[package]]
name = "pxfm"
version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83f9b339b02259ada5c0f4a389b7fb472f933aa17ce176fd2ad98f28bb401fde"
dependencies = [
"num-traits",
]
[[package]]
name = "qoi"
version = "0.4.1"
......@@ -6339,11 +6458,11 @@ dependencies = [
[[package]]
name = "raw-cpuid"
version = "11.5.0"
version = "11.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146"
checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
]
[[package]]
......@@ -6410,7 +6529,7 @@ version = "0.5.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
]
[[package]]
......@@ -6446,9 +6565,9 @@ dependencies = [
[[package]]
name = "regex"
version = "1.11.2"
version = "1.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c"
dependencies = [
"aho-corasick",
"memchr",
......@@ -6458,9 +6577,9 @@ dependencies = [
[[package]]
name = "regex-automata"
version = "0.4.10"
version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad"
dependencies = [
"aho-corasick",
"memchr",
......@@ -6630,7 +6749,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94"
dependencies = [
"base64 0.21.7",
"bitflags 2.9.3",
"bitflags 2.9.4",
"serde",
"serde_derive",
]
......@@ -6776,9 +6895,9 @@ dependencies = [
[[package]]
name = "rustfft"
version = "6.4.0"
version = "6.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6f140db74548f7c9d7cce60912c9ac414e74df5e718dc947d514b051b42f3f4"
checksum = "21db5f9893e91f41798c88680037dba611ca6674703c1a18601b01a72c8adb89"
dependencies = [
"num-complex",
"num-integer",
......@@ -6790,42 +6909,29 @@ dependencies = [
[[package]]
name = "rustix"
version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [
"bitflags 2.9.3",
"errno",
"libc",
"linux-raw-sys 0.4.15",
"windows-sys 0.59.0",
]
[[package]]
name = "rustix"
version = "1.0.8"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8"
checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"errno",
"libc",
"linux-raw-sys 0.9.4",
"windows-sys 0.60.2",
"linux-raw-sys",
"windows-sys 0.61.0",
]
[[package]]
name = "rustls"
version = "0.23.31"
version = "0.23.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc"
checksum = "cd3c25631629d034ce7cd9940adc9d45762d46de2b0f57193c4443b92c6d4d40"
dependencies = [
"aws-lc-rs",
"log",
"once_cell",
"ring",
"rustls-pki-types",
"rustls-webpki 0.103.4",
"rustls-webpki 0.103.6",
"subtle",
"zeroize",
]
......@@ -6852,7 +6958,7 @@ dependencies = [
"openssl-probe",
"rustls-pki-types",
"schannel",
"security-framework 3.3.0",
"security-framework 3.5.0",
]
[[package]]
......@@ -6886,9 +6992,9 @@ dependencies = [
[[package]]
name = "rustls-webpki"
version = "0.103.4"
version = "0.103.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc"
checksum = "8572f3c2cb9934231157b45499fc41e1f58c589fdfb81a844ba873265e80f8eb"
dependencies = [
"aws-lc-rs",
"ring",
......@@ -7026,11 +7132,11 @@ dependencies = [
[[package]]
name = "schannel"
version = "0.1.27"
version = "0.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d"
checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1"
dependencies = [
"windows-sys 0.59.0",
"windows-sys 0.61.0",
]
[[package]]
......@@ -7124,7 +7230,7 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"core-foundation 0.9.4",
"core-foundation-sys",
"libc",
......@@ -7133,11 +7239,11 @@ dependencies = [
[[package]]
name = "security-framework"
version = "3.3.0"
version = "3.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c"
checksum = "cc198e42d9b7510827939c9a15f5062a0c913f3371d765977e586d2fe6c16f4a"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"core-foundation 0.10.1",
"core-foundation-sys",
"libc",
......@@ -7146,9 +7252,9 @@ dependencies = [
[[package]]
name = "security-framework-sys"
version = "2.14.0"
version = "2.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32"
checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0"
dependencies = [
"core-foundation-sys",
"libc",
......@@ -7160,7 +7266,7 @@ version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"cssparser",
"derive_more 0.99.20",
"fxhash",
......@@ -7181,9 +7287,9 @@ checksum = "0f7d95a54511e0c7be3f51e8867aa8cf35148d7b9445d44de2f943e2b206e749"
[[package]]
name = "semver"
version = "1.0.26"
version = "1.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
[[package]]
name = "seq-macro"
......@@ -7193,10 +7299,11 @@ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
[[package]]
name = "serde"
version = "1.0.219"
version = "1.0.226"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
checksum = "0dca6411025b24b60bfa7ec1fe1f8e710ac09782dca409ee8237ba74b51295fd"
dependencies = [
"serde_core",
"serde_derive",
]
......@@ -7211,12 +7318,13 @@ dependencies = [
[[package]]
name = "serde-untagged"
version = "0.1.8"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34836a629bcbc6f1afdf0907a744870039b1e14c0561cb26094fa683b158eff3"
checksum = "f9faf48a4a2d2693be24c6289dbe26552776eb7737074e6722891fadbe6c5058"
dependencies = [
"erased-serde",
"serde",
"serde_core",
"typeid",
]
......@@ -7230,11 +7338,20 @@ dependencies = [
"serde",
]
[[package]]
name = "serde_core"
version = "1.0.226"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba2ba63999edb9dac981fb34b3e5c0d111a69b0924e253ed29d83f7c99e966a4"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.219"
version = "1.0.226"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
checksum = "8db53ae22f34573731bafa1db20f04027b2d25e02d8205921b569171699cdb33"
dependencies = [
"proc-macro2",
"quote",
......@@ -7254,15 +7371,16 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.143"
version = "1.0.145"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
dependencies = [
"indexmap 2.11.0",
"indexmap 2.11.4",
"itoa",
"memchr",
"ryu",
"serde",
"serde_core",
]
[[package]]
......@@ -7276,12 +7394,13 @@ dependencies = [
[[package]]
name = "serde_path_to_error"
version = "0.1.17"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59fab13f937fa393d08645bf3a84bdfe86e296747b506ada67bb15f10f218b2a"
checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457"
dependencies = [
"itoa",
"serde",
"serde_core",
]
[[package]]
......@@ -7315,11 +7434,11 @@ dependencies = [
[[package]]
name = "serde_spanned"
version = "1.0.0"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40734c41988f7306bb04f0ecf60ec0f3f1caa34290e4e8ea471dcd3346483b83"
checksum = "5417783452c2be558477e104686f7de5dae53dba813c28435e0e70f82d9b04ee"
dependencies = [
"serde",
"serde_core",
]
[[package]]
......@@ -7336,15 +7455,15 @@ dependencies = [
[[package]]
name = "serde_with"
version = "3.14.0"
version = "3.14.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5"
checksum = "c522100790450cf78eeac1507263d0a350d4d5b30df0c8e1fe051a10c22b376e"
dependencies = [
"base64 0.22.1",
"chrono",
"hex",
"indexmap 1.9.3",
"indexmap 2.11.0",
"indexmap 2.11.4",
"schemars 0.9.0",
"schemars 1.0.4",
"serde",
......@@ -7356,11 +7475,11 @@ dependencies = [
[[package]]
name = "serde_with_macros"
version = "3.14.0"
version = "3.14.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f"
checksum = "327ada00f7d64abaac1e55a6911e90cf665aa051b9a561c7006c157f4633135e"
dependencies = [
"darling 0.20.11",
"darling 0.21.3",
"proc-macro2",
"quote",
"syn 2.0.106",
......@@ -7372,7 +7491,7 @@ version = "0.9.34+deprecated"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
dependencies = [
"indexmap 2.11.0",
"indexmap 2.11.4",
"itoa",
"ryu",
"serde",
......@@ -7510,9 +7629,9 @@ dependencies = [
[[package]]
name = "simba"
version = "0.9.0"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3a386a501cd104797982c15ae17aafe8b9261315b5d07e3ec803f2ea26be0fa"
checksum = "c99284beb21666094ba2b75bbceda012e610f5479dfcc2d6e2426f53197ffd95"
dependencies = [
"approx",
"num-complex",
......@@ -7661,9 +7780,9 @@ dependencies = [
[[package]]
name = "stop-words"
version = "0.8.1"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c6a86be9f7fa4559b7339669e72026eb437f5e9c5a85c207fe1033079033a17"
checksum = "645a3d441ccf4bf47f2e4b7681461986681a6eeea9937d4c3bc9febd61d17c71"
dependencies = [
"serde_json",
]
......@@ -7927,7 +8046,7 @@ version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"byteorder",
"enum-as-inner",
"libc",
......@@ -7941,7 +8060,7 @@ version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"byteorder",
"enum-as-inner",
"libc",
......@@ -7981,7 +8100,7 @@ version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"core-foundation 0.9.4",
"system-configuration-sys 0.6.0",
]
......@@ -8037,15 +8156,15 @@ dependencies = [
[[package]]
name = "tempfile"
version = "3.21.0"
version = "3.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e"
checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
dependencies = [
"fastrand",
"getrandom 0.3.3",
"once_cell",
"rustix 1.0.8",
"windows-sys 0.60.2",
"rustix",
"windows-sys 0.61.0",
]
[[package]]
......@@ -8065,7 +8184,7 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0"
dependencies = [
"rustix 1.0.8",
"rustix",
"windows-sys 0.60.2",
]
......@@ -8129,20 +8248,23 @@ dependencies = [
[[package]]
name = "tiff"
version = "0.9.1"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba1310fcea54c6a9a4fd1aad794ecc02c31682f6bfbecdf460bf19533eed1e3e"
checksum = "af9605de7fee8d9551863fd692cce7637f548dbd9db9180fcc07ccc6d26c336f"
dependencies = [
"fax",
"flate2",
"jpeg-decoder",
"half 2.6.0",
"quick-error 2.0.1",
"weezl",
"zune-jpeg",
]
[[package]]
name = "time"
version = "0.3.41"
version = "0.3.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40"
checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d"
dependencies = [
"deranged",
"itoa",
......@@ -8157,15 +8279,15 @@ dependencies = [
[[package]]
name = "time-core"
version = "0.1.4"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c"
checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"
[[package]]
name = "time-macros"
version = "0.2.22"
version = "0.2.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49"
checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3"
dependencies = [
"num-conv",
"time-core",
......@@ -8317,9 +8439,9 @@ dependencies = [
[[package]]
name = "tokio-rustls"
version = "0.26.2"
version = "0.26.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b"
checksum = "05f63835928ca123f1bef57abbcd23bb2ba0ac9ae1235f1e65bda0d06e7786bd"
dependencies = [
"rustls",
"tokio",
......@@ -8470,18 +8592,18 @@ dependencies = [
"serde",
"serde_spanned 0.6.9",
"toml_datetime 0.6.11",
"toml_edit",
"toml_edit 0.22.27",
]
[[package]]
name = "toml"
version = "0.9.5"
version = "0.9.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75129e1dc5000bfbaa9fee9d1b21f974f9fbad9daec557a521ee6e080825f6e8"
checksum = "00e5e5d9bf2475ac9d4f0d9edab68cc573dc2fd644b0dba36b0c30a92dd9eaa0"
dependencies = [
"serde",
"serde_spanned 1.0.0",
"toml_datetime 0.7.0",
"serde_core",
"serde_spanned 1.0.2",
"toml_datetime 0.7.2",
"toml_parser",
"winnow",
]
......@@ -8497,11 +8619,11 @@ dependencies = [
[[package]]
name = "toml_datetime"
version = "0.7.0"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bade1c3e902f58d73d3f294cd7f20391c1cb2fbcb643b73566bc773971df91e3"
checksum = "32f1085dec27c2b6632b04c80b3bb1b4300d6495d1e129693bdda7d91e72eec1"
dependencies = [
"serde",
"serde_core",
]
[[package]]
......@@ -8510,7 +8632,7 @@ version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
"indexmap 2.11.0",
"indexmap 2.11.4",
"serde",
"serde_spanned 0.6.9",
"toml_datetime 0.6.11",
......@@ -8518,6 +8640,18 @@ dependencies = [
"winnow",
]
[[package]]
name = "toml_edit"
version = "0.23.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3effe7c0e86fdff4f69cdd2ccc1b96f933e24811c5441d44904e8683e27184b"
dependencies = [
"indexmap 2.11.4",
"toml_datetime 0.7.2",
"toml_parser",
"winnow",
]
[[package]]
name = "toml_parser"
version = "1.0.3"
......@@ -8635,7 +8769,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
dependencies = [
"futures-core",
"futures-util",
"indexmap 2.11.0",
"indexmap 2.11.4",
"pin-project-lite",
"slab",
"sync_wrapper 1.0.2",
......@@ -8652,7 +8786,7 @@ version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
dependencies = [
"bitflags 2.9.3",
"bitflags 2.9.4",
"bytes",
"futures-util",
"http 1.3.1",
......@@ -8968,15 +9102,15 @@ checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
[[package]]
name = "unicode-general-category"
version = "1.0.0"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24adfe8311434967077a6adff125729161e6e4934d76f6b7c55318ac5c9246d3"
checksum = "0b993bddc193ae5bd0d623b49ec06ac3e9312875fdae725a975c51db1cc1677f"
[[package]]
name = "unicode-ident"
version = "1.0.18"
version = "1.0.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
[[package]]
name = "unicode-normalization-alignments"
......@@ -9112,7 +9246,7 @@ version = "5.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fcc29c80c21c31608227e0912b2d7fddba57ad76b606890627ba8ee7964e993"
dependencies = [
"indexmap 2.11.0",
"indexmap 2.11.4",
"serde",
"serde_json",
"utoipa-gen",
......@@ -9140,9 +9274,9 @@ dependencies = [
[[package]]
name = "uuid"
version = "1.18.0"
version = "1.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f33196643e165781c20a5ead5582283a7dacbb87855d867fbc2df3f81eddc1be"
checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
dependencies = [
"getrandom 0.3.3",
"js-sys",
......@@ -9312,30 +9446,40 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]]
name = "wasi"
version = "0.14.3+wasi-0.2.4"
version = "0.14.7+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a51ae83037bdd272a9e28ce236db8c07016dd0d50c27038b3f407533c030c95"
checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c"
dependencies = [
"wasip2",
]
[[package]]
name = "wasip2"
version = "1.0.1+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
dependencies = [
"wit-bindgen",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
version = "0.2.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d"
dependencies = [
"cfg-if 1.0.3",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.100"
version = "0.2.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19"
dependencies = [
"bumpalo",
"log",
......@@ -9347,9 +9491,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.50"
version = "0.4.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c"
dependencies = [
"cfg-if 1.0.3",
"js-sys",
......@@ -9360,9 +9504,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.100"
version = "0.2.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
......@@ -9370,9 +9514,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.100"
version = "0.2.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7"
dependencies = [
"proc-macro2",
"quote",
......@@ -9383,9 +9527,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.100"
version = "0.2.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1"
dependencies = [
"unicode-ident",
]
......@@ -9405,9 +9549,9 @@ dependencies = [
[[package]]
name = "web-sys"
version = "0.3.77"
version = "0.3.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120"
dependencies = [
"js-sys",
"wasm-bindgen",
......@@ -9459,18 +9603,6 @@ version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3"
[[package]]
name = "which"
version = "4.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
dependencies = [
"either",
"home",
"once_cell",
"rustix 0.38.44",
]
[[package]]
name = "wide"
version = "0.7.33"
......@@ -9517,11 +9649,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.10"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys 0.60.2",
"windows-sys 0.61.0",
]
[[package]]
......@@ -9551,15 +9683,15 @@ dependencies = [
[[package]]
name = "windows-core"
version = "0.61.2"
version = "0.62.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
checksum = "57fe7168f7de578d2d8a05b07fd61870d2e73b4020e9f49aa00da8471723497c"
dependencies = [
"windows-implement",
"windows-interface",
"windows-link",
"windows-result",
"windows-strings",
"windows-link 0.2.0",
"windows-result 0.4.0",
"windows-strings 0.5.0",
]
[[package]]
......@@ -9590,15 +9722,21 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
[[package]]
name = "windows-link"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65"
[[package]]
name = "windows-registry"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e"
dependencies = [
"windows-link",
"windows-result",
"windows-strings",
"windows-link 0.1.3",
"windows-result 0.3.4",
"windows-strings 0.4.2",
]
[[package]]
......@@ -9607,7 +9745,16 @@ version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
dependencies = [
"windows-link",
"windows-link 0.1.3",
]
[[package]]
name = "windows-result"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f"
dependencies = [
"windows-link 0.2.0",
]
[[package]]
......@@ -9616,7 +9763,16 @@ version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
dependencies = [
"windows-link",
"windows-link 0.1.3",
]
[[package]]
name = "windows-strings"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda"
dependencies = [
"windows-link 0.2.0",
]
[[package]]
......@@ -9655,6 +9811,15 @@ dependencies = [
"windows-targets 0.53.3",
]
[[package]]
name = "windows-sys"
version = "0.61.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e201184e40b2ede64bc2ea34968b28e33622acdbbf37104f0e4a33f7abe657aa"
dependencies = [
"windows-link 0.2.0",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
......@@ -9692,7 +9857,7 @@ version = "0.53.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
dependencies = [
"windows-link",
"windows-link 0.1.3",
"windows_aarch64_gnullvm 0.53.0",
"windows_aarch64_msvc 0.53.0",
"windows_i686_gnu 0.53.0",
......@@ -9862,9 +10027,9 @@ dependencies = [
[[package]]
name = "wit-bindgen"
version = "0.45.0"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "052283831dbae3d879dc7f51f3d92703a316ca49f91540417d38591826127814"
checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
[[package]]
name = "writeable"
......@@ -9955,18 +10120,18 @@ dependencies = [
[[package]]
name = "zerocopy"
version = "0.8.26"
version = "0.8.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.26"
version = "0.8.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
dependencies = [
"proc-macro2",
"quote",
......@@ -10024,7 +10189,7 @@ dependencies = [
"thiserror 1.0.69",
"tokio",
"tokio-util",
"uuid 1.18.0",
"uuid 1.18.1",
]
[[package]]
......@@ -10080,7 +10245,7 @@ dependencies = [
"crc32fast",
"crossbeam-utils",
"displaydoc",
"indexmap 2.11.0",
"indexmap 2.11.4",
"num_enum",
"thiserror 1.0.69",
]
......@@ -10124,9 +10289,9 @@ dependencies = [
[[package]]
name = "zune-jpeg"
version = "0.4.20"
version = "0.4.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc1f7e205ce79eb2da3cd71c5f55f3589785cb7c79f6a03d1c8d1491bda5d089"
checksum = "29ce2c8a9384ad323cf564b67da86e21d3cfdff87908bc1223ed5c99bc792713"
dependencies = [
"zune-core",
]
......@@ -17,6 +17,7 @@ default = []
integration = []
testing-etcd = [] # Tests that require an active ETCD server
tokio-console = ["dep:console-subscriber", "tokio/tracing"]
compute-validation = [] # Enable validation and timing for compute macros
[dependencies]
# Use workspace dependencies where available
......@@ -63,15 +64,22 @@ nid = { version = "3.0.0", features = ["serde"] }
nix = { version = "0.29", features = ["signal"] }
nuid = { version = "0.5" }
once_cell = { version = "1" }
rayon = { version = "1.10" }
regex = { version = "1" }
socket2 = { version = "0.5.8" }
tokio-rayon = { version = "2.1" }
[dev-dependencies]
assert_matches = { version = "1.5.0" }
criterion = { version = "0.5", features = ["async_tokio"] }
env_logger = { version = "0.11" }
reqwest = { workspace = true }
rstest = { version = "0.23.0" }
temp-env = { version = "0.3.6" , features=["async_closure"] }
stdio-override = {version= "0.2.0"}
jsonschema = {version = "0.17"}
tempfile = { workspace = true }
\ No newline at end of file
tempfile = { workspace = true }
[[bench]]
name = "compute_pool_overhead"
harness = false
\ No newline at end of file
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};
use dynamo_runtime::compute::ComputePool;
use std::sync::Arc;
/// Compute-intensive function: sum of all primes up to n
fn compute_primes_sum(n: u64) -> u64 {
let mut sum = 0u64;
for candidate in 2..=n {
if is_prime(candidate) {
sum += candidate;
}
}
sum
}
fn is_prime(n: u64) -> bool {
if n <= 1 {
return false;
}
if n <= 3 {
return true;
}
if n.is_multiple_of(2) || n.is_multiple_of(3) {
return false;
}
let sqrt_n = (n as f64).sqrt() as u64;
for i in (5..=sqrt_n).step_by(6) {
if n.is_multiple_of(i) || n.is_multiple_of(i + 2) {
return false;
}
}
true
}
fn bench_compute_overhead(c: &mut Criterion) {
// Test 3 representative sizes: small, medium, large
let test_sizes = [10, 1_000, 100_000];
let mut group = c.benchmark_group("compute_overhead");
group.sample_size(10); // Reduce sample size for longer benchmarks
// Setup runtimes
let tokio_4thread = tokio::runtime::Builder::new_multi_thread()
.worker_threads(4)
.max_blocking_threads(1)
.enable_all()
.build()
.unwrap();
let tokio_1thread = tokio::runtime::Builder::new_multi_thread()
.worker_threads(1)
.max_blocking_threads(1)
.enable_all()
.build()
.unwrap();
// Setup compute pool
let compute_config = dynamo_runtime::compute::ComputeConfig {
num_threads: Some(4),
stack_size: Some(2 * 1024 * 1024),
thread_prefix: "bench".to_string(),
pin_threads: false,
};
let compute_pool = Arc::new(ComputePool::new(compute_config).unwrap());
for n in test_sizes {
// Benchmark 1: Direct execution on Tokio (4 threads)
group.bench_with_input(BenchmarkId::new("tokio_direct", n), &n, |b, &n| {
b.to_async(&tokio_4thread)
.iter(|| async move { black_box(compute_primes_sum(black_box(n))) });
});
// Benchmark 2: Rayon offload (1 Tokio thread + 4 Rayon threads)
let pool = compute_pool.clone();
group.bench_with_input(BenchmarkId::new("rayon_offload", n), &n, |b, &n| {
b.to_async(&tokio_1thread).iter(|| {
let pool = pool.clone();
async move {
pool.execute(move || black_box(compute_primes_sum(black_box(n))))
.await
.unwrap()
}
});
});
// Benchmark 3: spawn_blocking (4 Tokio threads)
group.bench_with_input(BenchmarkId::new("spawn_blocking", n), &n, |b, &n| {
b.to_async(&tokio_4thread).iter(|| async move {
tokio::task::spawn_blocking(move || black_box(compute_primes_sum(black_box(n))))
.await
.unwrap()
});
});
}
group.finish();
}
fn bench_parallel_tasks(c: &mut Criterion) {
let mut group = c.benchmark_group("parallel_tasks");
group.sample_size(10); // Even smaller sample for parallel benchmarks
let tokio_runtime = tokio::runtime::Builder::new_multi_thread()
.worker_threads(4)
.max_blocking_threads(1)
.enable_all()
.build()
.unwrap();
let compute_config = dynamo_runtime::compute::ComputeConfig {
num_threads: Some(4),
stack_size: Some(2 * 1024 * 1024),
thread_prefix: "bench".to_string(),
pin_threads: false,
};
let compute_pool = Arc::new(ComputePool::new(compute_config).unwrap());
// Test with different batch sizes
for batch_size in [10, 100] {
let n = 10_000; // Fixed compute size
// Direct parallel execution on Tokio threads
group.bench_with_input(
BenchmarkId::new("tokio_direct_parallel", batch_size),
&batch_size,
|b, &batch_size| {
b.to_async(&tokio_runtime).iter(|| async move {
let tasks = (0..batch_size)
.map(|_| tokio::spawn(async move { compute_primes_sum(n) }))
.collect::<Vec<_>>();
for task in tasks {
black_box(task.await.unwrap());
}
});
},
);
// Parallel execution with Rayon
let pool = compute_pool.clone();
group.bench_with_input(
BenchmarkId::new("rayon_parallel", batch_size),
&batch_size,
|b, &batch_size| {
b.to_async(&tokio_runtime).iter(|| {
let pool = pool.clone();
async move {
let tasks = (0..batch_size)
.map(|_| {
let pool = pool.clone();
tokio::spawn(async move {
pool.execute(move || compute_primes_sum(n)).await.unwrap()
})
})
.collect::<Vec<_>>();
for task in tasks {
black_box(task.await.unwrap());
}
}
});
},
);
// Parallel execution with spawn_blocking
group.bench_with_input(
BenchmarkId::new("spawn_blocking_parallel", batch_size),
&batch_size,
|b, &batch_size| {
b.to_async(&tokio_runtime).iter(|| async move {
let tasks = (0..batch_size)
.map(|_| {
tokio::spawn(async move {
tokio::task::spawn_blocking(move || compute_primes_sum(n))
.await
.unwrap()
})
})
.collect::<Vec<_>>();
for task in tasks {
black_box(task.await.unwrap());
}
});
},
);
}
group.finish();
}
fn bench_block_in_place_overhead(c: &mut Criterion) {
// Test block_in_place overhead for medium-sized tasks
let test_sizes = [10, 1_000, 100_000];
let mut group = c.benchmark_group("block_in_place_overhead");
group.sample_size(10);
// Setup 4-thread runtime for testing
let tokio_runtime = tokio::runtime::Builder::new_multi_thread()
.worker_threads(4)
.max_blocking_threads(1)
.enable_all()
.build()
.unwrap();
// Setup compute pool for comparison
let compute_config = dynamo_runtime::compute::ComputeConfig {
num_threads: Some(4),
stack_size: Some(2 * 1024 * 1024),
thread_prefix: "bench".to_string(),
pin_threads: false,
};
let compute_pool = Arc::new(ComputePool::new(compute_config).unwrap());
for n in test_sizes {
// Benchmark 1: Direct execution (baseline)
group.bench_with_input(BenchmarkId::new("direct", n), &n, |b, &n| {
b.to_async(&tokio_runtime)
.iter(|| async move { black_box(compute_primes_sum(black_box(n))) });
});
// Benchmark 2: block_in_place (no semaphore)
group.bench_with_input(BenchmarkId::new("block_in_place", n), &n, |b, &n| {
b.to_async(&tokio_runtime).iter(|| async move {
tokio::task::block_in_place(|| black_box(compute_primes_sum(black_box(n))))
});
});
// Benchmark 3: spawn_blocking
group.bench_with_input(BenchmarkId::new("spawn_blocking", n), &n, |b, &n| {
b.to_async(&tokio_runtime).iter(|| async move {
tokio::task::spawn_blocking(move || black_box(compute_primes_sum(black_box(n))))
.await
.unwrap()
});
});
// Benchmark 4: Rayon offload
let pool = compute_pool.clone();
group.bench_with_input(BenchmarkId::new("rayon_offload", n), &n, |b, &n| {
b.to_async(&tokio_runtime).iter(|| {
let pool = pool.clone();
async move {
pool.execute(move || black_box(compute_primes_sum(black_box(n))))
.await
.unwrap()
}
});
});
}
group.finish();
}
criterion_group!(
benches,
bench_compute_overhead,
bench_parallel_tasks,
bench_block_in_place_overhead
);
criterion_main!(benches);
# Rayon-Tokio Integration Strategy
## Overview
This document describes the integration strategy for combining Tokio's asynchronous runtime with Rayon's data-parallel compute capabilities in the Dynamo runtime. The core philosophy is simple:
- **Tokio** handles I/O-bound operations, waiting, and coordination
- **Rayon** handles CPU-bound operations and data parallelism
- Multiple async tasks can concurrently submit different types of work to the shared Rayon thread pool
## Architecture
```text
+---------------------------------------------------------------+
| Tokio Runtime |
| +-----------+ +-----------+ +-----------+ |
| | Async | | Async | | Async | |
| | Task 1 | | Task 2 | | Task 3 | |
| | | | | | | |
| | Receives | | Processes | | Handles | |
| | requests | | streams | | batches | |
| +-----+-----+ +-----+-----+ +-----+-----+ |
| | | | |
| +----------------+----------------+ |
| | |
| tokio_rayon::spawn |
| | |
+-------------------------+------------------------------------+
|
v
+---------------------------------------------------------------+
| Rayon Thread Pool |
| |
| +----------------------------------------------------------+ |
| | Work-Stealing Thread Pool (N threads) | |
| | | |
| | +---------+ +-----------+ +------------------+ | |
| | | scope() | | par_iter()| | join() | | |
| | | tasks | | chunks | | computations | | |
| | +---------+ +-----------+ +------------------+ | |
| | | |
| | All patterns share the same thread pool | |
| +----------------------------------------------------------+ |
+---------------------------------------------------------------+
```
## When to Use Tokio vs Rayon
### Use Tokio (async/await) when
- **Waiting for I/O**: Network requests, file I/O, database queries
- **Coordinating tasks**: Channels, synchronization, signaling
- **Stream processing**: Items arrive over time with delays
- **Resource pooling**: Connection pools, async locks
- **Service orchestration**: Managing component lifecycles
### Use Rayon (compute pool) when
- **Batch processing**: You have all data ready for parallel processing
- **CPU-intensive work**: Computation takes >1ms per item
- **Data transformation**: Tokenization, serialization, compression
- **Parallel algorithms**: Matrix operations, sorting, searching
- **Map-reduce patterns**: Aggregations over large datasets
### Decision Thresholds
- Use Rayon when processing **≥10 items** in parallel
- Use `spawn_blocking` when CPU work takes **>1ms**
- Keep Tokio for operations with **>100μs waits** between items
- Use Rayon when you can **saturate multiple CPU cores**
### Overhead Considerations
Based on benchmarks, the async bridge between Tokio and Rayon has:
- **~25μs overhead** for small tasks (due to channel communication)
- **~4% overhead** for tasks taking >2ms
- **Negligible overhead** for tasks taking >10ms
For minimal overhead when using Rayon from async context:
- **Small tasks (<100μs)**: Run directly on Tokio
- **Medium tasks (100μs-1ms)**: Use `spawn_blocking` + `pool.execute_sync()`
- **Large tasks (>1ms)**: Use `pool.execute()` for convenience
## Concurrent Usage Patterns
The key insight is that multiple async tasks can concurrently use the same Rayon thread pool with different parallelization patterns. Rayon's work-stealing scheduler efficiently distributes work regardless of the pattern used.
### Pattern 1: Concurrent Scope and ParIter
```rust,ignore
use std::sync::Arc;
use dynamo_runtime::compute::ComputePool;
async fn concurrent_compute_tasks(pool: Arc<ComputePool>) {
// Task 1: Using scope for dynamic task spawning
let task1 = tokio::spawn({
let pool = pool.clone();
async move {
pool.execute_scoped(|scope| {
// Dynamically spawn tasks based on runtime conditions
for i in 0..num_tasks {
scope.spawn(move |_| {
expensive_computation(i)
});
}
}).await
}
});
// Task 2: Using parallel iterators for batch processing
let task2 = tokio::spawn({
let pool = pool.clone();
async move {
pool.install(|| {
// Process data in parallel chunks
data.par_chunks(100)
.map(|chunk| transform_chunk(chunk))
.collect::<Vec<_>>()
}).await
}
});
// Task 3: Using join for binary parallelism
let task3 = tokio::spawn({
let pool = pool.clone();
async move {
pool.join(
|| compute_left_branch(),
|| compute_right_branch(),
).await
}
});
// All three tasks run concurrently, sharing the Rayon thread pool
let (r1, r2, r3) = tokio::join!(task1, task2, task3);
}
```
### Pattern 2: Stream Processing with Batch Compute
```rust,no_run
# use futures::StreamExt;
# use rayon::prelude::*;
# use std::sync::Arc;
# struct Data;
# fn process_item(_: &Data) -> i32 { 0 }
# async fn send_results(_: Vec<i32>) {}
# use dynamo_runtime::compute::ComputePool;
# use futures::stream::Stream;
/// Example: Process async stream with CPU-intensive batch operations
async fn stream_with_compute(
pool: Arc<ComputePool>,
stream: impl Stream<Item = Vec<Data>>,
) {
// Use for_each_concurrent for proper stream consumption
stream.for_each_concurrent(4, |batch| {
let pool = pool.clone();
async move {
// Process batch using parallel iterators
let result = pool.install(move || {
batch.par_iter()
.map(|item| process_item(item))
.collect::<Vec<_>>()
}).await.unwrap();
// Async I/O to send results
send_results(result).await;
}
}).await;
}
```
### Pattern 3: Mixed Workload Service
```rust,ignore
/// Real-world example: LLM service with mixed workloads
struct LLMService {
runtime: Arc<Runtime>,
tokenizer: Arc<Tokenizer>,
}
impl LLMService {
async fn run(&self) {
let pool = self.runtime.compute_pool()
.expect("Compute pool required");
// Tokenization service - uses parallel iterators
let tokenization_task = {
let pool = pool.clone();
let tokenizer = self.tokenizer.clone();
tokio::spawn(async move {
loop {
// Async I/O: receive batch from network
let texts = receive_tokenization_batch().await;
// CPU-bound: parallel tokenization
let tokens = pool.install(move || {
texts.par_iter()
.map(|text| tokenizer.encode(text))
.collect::<Vec<_>>()
}).await.unwrap();
// Async I/O: send results
send_tokens(tokens).await;
}
})
};
// Embedding service - uses scope for multi-stage computation
let embedding_task = {
let pool = pool.clone();
tokio::spawn(async move {
loop {
// Async I/O: receive request
let request = receive_embedding_request().await;
// CPU-bound: multi-stage parallel computation
let embeddings = pool.execute_scoped(|scope| {
let mut text_emb = None;
let mut context_emb = None;
scope.spawn(|_| {
text_emb = Some(compute_text_embedding(&request.text));
});
scope.spawn(|_| {
context_emb = Some(compute_context_embedding(&request.context));
});
// Scope waits for both to complete
combine_embeddings(text_emb.unwrap(), context_emb.unwrap())
}).await.unwrap();
// Async I/O: send results
send_embeddings(embeddings).await;
}
})
};
// Batch inference service - uses nested parallelism
let inference_task = {
let pool = pool.clone();
tokio::spawn(async move {
loop {
let batch = receive_inference_batch().await;
let results = pool.execute_scoped(|scope| {
let mut results = Vec::with_capacity(batch.len());
// Spawn a task for each item
for item in batch {
scope.spawn(move |s2| {
// Within each task, use parallel iterators
let preprocessed = item.data
.par_chunks(10)
.map(|chunk| preprocess(chunk))
.collect::<Vec<_>>();
// Can spawn more tasks within nested scope
let mut stages = vec![];
for p in preprocessed {
s2.spawn(move |_| {
stages.push(run_inference(p));
});
}
results.push(merge_stages(stages));
});
}
results
}).await.unwrap();
send_inference_results(results).await;
}
})
};
// All services run concurrently, sharing the compute pool
tokio::join!(tokenization_task, embedding_task, inference_task);
}
}
```
## How It Works: Thread Pool Sharing
Rayon's work-stealing scheduler ensures efficient resource utilization even when different async tasks submit different types of work:
1. **Work Queues**: Each Rayon thread has a local deque (double-ended queue)
2. **Local Execution**: Threads prefer executing their own tasks (LIFO for cache locality)
3. **Work Stealing**: Idle threads steal tasks from busy threads (FIFO from the other end)
4. **No Interference**: Different parallelization patterns (scope, par_iter) coexist peacefully
This means:
- A `scope` task spawning many small tasks works alongside `par_chunks` processing large batches
- The thread pool automatically balances load between different types of work
- No manual coordination needed between different async tasks using the pool
## Performance Considerations
### Thread Pool Sizing
```toml
[runtime]
# Tokio threads: optimize for concurrent async tasks
num_worker_threads = 8 # Usually number of cores
# Rayon threads: optimize for CPU saturation
compute_threads = 4 # Often cores/2 to avoid oversubscription
```
### Avoiding Oversubscription
Total threads = Tokio workers + Rayon threads + System threads
**Recommendation**: Keep total ≤ 1.5 × physical cores
### Monitoring Pool Utilization
```rust,ignore
// Check pool metrics
let metrics = pool.metrics();
println!("Active tasks: {}", metrics.tasks_active());
println!("Average duration: {:.2}ms", metrics.avg_task_duration_us() / 1000.0);
println!("Slow tasks (>100ms): {}", metrics.slow_tasks());
// Adjust pool size if consistently over/under utilized
if metrics.tasks_active() > pool.num_threads() * 2 {
// Consider increasing compute_threads
}
```
## Common Patterns and Best Practices
### DO: Batch Collection Before Processing
```rust,ignore
// ✅ Good: Collect async items, then process in parallel
let items = stream.take(100).collect::<Vec<_>>().await;
let processed = pool.install(|| {
items.par_iter().map(|item| process(item)).collect()
}).await?;
```
### DON'T: Mix Async and Compute in Tight Loops
```rust,ignore
// ❌ Bad: Alternating between async and compute
for item in items {
let data = fetch_data(item).await; // Async
let result = pool.execute(|| compute(data)).await?; // Compute
store_result(result).await; // Async
}
// ✅ Good: Batch operations
let all_data = futures::future::join_all(
items.iter().map(|item| fetch_data(item))
).await;
let all_results = pool.install(|| {
all_data.par_iter().map(|data| compute(data)).collect()
}).await?;
futures::future::join_all(
all_results.iter().map(|result| store_result(result))
).await;
```
### DO: Use Scope for Dynamic Parallelism
```rust,ignore
// ✅ Good: When you don't know the parallelism level upfront
pool.execute_scoped(|scope| {
while let Some(work) = find_more_work() {
scope.spawn(move |_| {
process_work(work);
});
}
}).await?;
```
### DO: Use ParIter for Data Parallelism
```rust,ignore
// ✅ Good: When processing collections
pool.install(|| {
data.par_chunks(optimal_chunk_size())
.map(|chunk| process_chunk(chunk))
.reduce(|| initial_value(), |a, b| combine(a, b))
}).await?;
```
## Troubleshooting
### Issue: High Latency Despite Low CPU Usage
**Cause**: Too few Rayon threads for the workload
**Solution**: Increase `compute_threads` configuration
### Issue: System Feels Sluggish
**Cause**: Thread oversubscription
**Solution**: Reduce total thread count (Tokio + Rayon)
### Issue: Uneven Work Distribution
**Cause**: Poor chunk size selection
**Solution**: Use smaller chunks or dynamic scheduling with `scope`
### Issue: Deadlock or Hanging
**Cause**: Nested `install()` calls or blocking in Rayon threads
**Solution**: Use `execute()` instead of `install()` for simple tasks
## Configuration Examples
### High-Throughput Service
```toml
# Many concurrent requests, moderate compute per request
[runtime]
num_worker_threads = 16
compute_threads = 8
compute_stack_size = "4MB"
```
### Batch Processing System
```toml
# Few concurrent tasks, heavy compute per batch
[runtime]
num_worker_threads = 4
compute_threads = 12
compute_stack_size = "8MB"
```
### Mixed Workload
```toml
# Balance between async I/O and compute
[runtime]
num_worker_threads = 8
compute_threads = 6
compute_stack_size = "2MB"
```
## Summary
The Rayon-Tokio integration provides a powerful model for handling mixed workloads:
1. **Tokio** manages async I/O and coordination
2. **Rayon** provides a shared compute thread pool
3. Multiple async tasks can concurrently use different Rayon patterns
4. Work-stealing ensures efficient resource utilization
5. Clear separation between I/O-bound and CPU-bound work
This architecture enables building high-performance services that efficiently handle both network I/O and CPU-intensive computations without manual thread management or complex synchronization.
\ No newline at end of file
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use anyhow::Result;
use dynamo_runtime::compute::{ComputeConfig, ComputePool};
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{Duration, Instant};
use tokio::time::sleep;
/// Compute-intensive function: sum of all primes up to n
fn compute_primes_sum(n: u64) -> u64 {
let mut sum = 0u64;
for candidate in 2..=n {
if is_prime(candidate) {
sum += candidate;
}
}
sum
}
fn is_prime(n: u64) -> bool {
if n <= 1 {
return false;
}
if n <= 3 {
return true;
}
if n.is_multiple_of(2) || n.is_multiple_of(3) {
return false;
}
let sqrt_n = (n as f64).sqrt() as u64;
for i in (5..=sqrt_n).step_by(6) {
if n.is_multiple_of(i) || n.is_multiple_of(i + 2) {
return false;
}
}
true
}
/// Simulated async task that does both I/O and compute
async fn async_task_inline(id: usize, n: u64, io_delay: Duration) -> (usize, Duration) {
let start = Instant::now();
// Simulate async I/O operation
sleep(io_delay).await;
// CPU-intensive work (blocks the async runtime!)
let _result = compute_primes_sum(n);
// More async I/O
sleep(io_delay).await;
(id, start.elapsed())
}
/// Async task that offloads compute to Rayon pool
async fn async_task_rayon(
id: usize,
n: u64,
io_delay: Duration,
pool: Arc<ComputePool>,
) -> (usize, Duration) {
let start = Instant::now();
// Simulate async I/O operation
sleep(io_delay).await;
// CPU-intensive work (offloaded, doesn't block runtime)
let _result = pool.execute(move || compute_primes_sum(n)).await.unwrap();
// More async I/O
sleep(io_delay).await;
(id, start.elapsed())
}
/// Async task using spawn_blocking
async fn async_task_spawn_blocking(id: usize, n: u64, io_delay: Duration) -> (usize, Duration) {
let start = Instant::now();
// Simulate async I/O operation
sleep(io_delay).await;
// CPU-intensive work (offloaded to blocking pool)
let _result = tokio::task::spawn_blocking(move || compute_primes_sum(n))
.await
.unwrap();
// More async I/O
sleep(io_delay).await;
(id, start.elapsed())
}
async fn run_throughput_test(
name: &str,
num_tasks: usize,
n: u64,
io_delay: Duration,
pool: Option<Arc<ComputePool>>,
mode: &str,
) -> (Duration, Vec<Duration>) {
println!("\n Running: {} (n={}, tasks={})", name, n, num_tasks);
let completed = Arc::new(AtomicU64::new(0));
let start = Instant::now();
let tasks: Vec<_> = (0..num_tasks)
.map(|id| {
let pool = pool.clone();
let completed = completed.clone();
let mode = mode.to_string();
tokio::spawn(async move {
let result = match mode.as_str() {
"inline" => async_task_inline(id, n, io_delay).await,
"rayon" => async_task_rayon(id, n, io_delay, pool.unwrap()).await,
"spawn_blocking" => async_task_spawn_blocking(id, n, io_delay).await,
_ => panic!("Unknown mode"),
};
let count = completed.fetch_add(1, Ordering::Relaxed) + 1;
if count.is_multiple_of(10) {
print!(".");
use std::io::{self, Write};
io::stdout().flush().unwrap();
}
result
})
})
.collect();
let mut latencies = Vec::new();
for task in tasks {
let (_id, latency) = task.await.unwrap();
latencies.push(latency);
}
let total_time = start.elapsed();
println!(" Done in {:.2}s", total_time.as_secs_f64());
(total_time, latencies)
}
fn calculate_percentiles(latencies: &mut [Duration]) -> (Duration, Duration, Duration) {
latencies.sort();
let len = latencies.len();
let p50 = latencies[len / 2];
let p95 = latencies[len * 95 / 100];
let p99 = latencies[len * 99 / 100];
(p50, p95, p99)
}
fn print_results(_name: &str, total: Duration, latencies: &mut [Duration]) {
let (p50, p95, p99) = calculate_percentiles(latencies);
let throughput = latencies.len() as f64 / total.as_secs_f64();
println!(" Total time: {:.2}s", total.as_secs_f64());
println!(" Throughput: {:.1} tasks/s", throughput);
println!(" Latency p50: {:.2}ms", p50.as_secs_f64() * 1000.0);
println!(" Latency p95: {:.2}ms", p95.as_secs_f64() * 1000.0);
println!(" Latency p99: {:.2}ms", p99.as_secs_f64() * 1000.0);
}
#[tokio::main]
async fn main() -> Result<()> {
println!("Async Throughput Demonstration");
println!("==================================\n");
println!("This demo shows how compute-intensive work affects async task throughput.\n");
// Create compute pool directly
let compute_config = ComputeConfig {
num_threads: Some(4),
stack_size: Some(2 * 1024 * 1024),
thread_prefix: "demo".to_string(),
pin_threads: false,
};
let pool = Arc::new(ComputePool::new(compute_config)?);
println!("Configuration:");
println!(" Rayon threads: {}", pool.num_threads());
// Test parameters
let num_tasks = 100;
let io_delay = Duration::from_millis(10);
println!("\nTest: {} concurrent async tasks", num_tasks);
println!("Each task: 10ms I/O → compute → 10ms I/O");
println!("Expected minimum time: ~20ms (if no blocking)");
// Test with different compute loads
for n in [10, 1_000, 100_000] {
println!("\n{:=<60}", "");
println!("Compute load: n={} (prime sum)", n);
println!("{:=<60}", "");
// Measure compute time alone
let compute_start = Instant::now();
let _ = compute_primes_sum(n);
let compute_time = compute_start.elapsed();
println!(
"Pure compute time: {:.2}ms",
compute_time.as_secs_f64() * 1000.0
);
// Test 1: Inline execution (blocks async runtime)
let (total1, mut latencies1) = run_throughput_test(
"Inline (blocks runtime)",
num_tasks,
n,
io_delay,
None,
"inline",
)
.await;
print_results("Inline", total1, &mut latencies1);
// Test 2: Rayon offload
let (total2, mut latencies2) = run_throughput_test(
"Rayon offload",
num_tasks,
n,
io_delay,
Some(pool.clone()),
"rayon",
)
.await;
print_results("Rayon", total2, &mut latencies2);
// Test 3: spawn_blocking
let (total3, mut latencies3) = run_throughput_test(
"spawn_blocking",
num_tasks,
n,
io_delay,
None,
"spawn_blocking",
)
.await;
print_results("spawn_blocking", total3, &mut latencies3);
// Analysis
println!("\n Impact Analysis:");
let speedup_rayon = total1.as_secs_f64() / total2.as_secs_f64();
let speedup_spawn = total1.as_secs_f64() / total3.as_secs_f64();
println!(
" Rayon vs Inline: {:.2}x throughput",
speedup_rayon
);
println!(
" spawn_blocking vs Inline: {:.2}x throughput",
speedup_spawn
);
if compute_time.as_millis() > 1 {
let blocking_factor = compute_time.as_secs_f64() / io_delay.as_secs_f64();
println!(
"\n Compute time ({:.1}ms) is {:.1}x the I/O time",
compute_time.as_secs_f64() * 1000.0,
blocking_factor
);
println!(" This severely impacts async concurrency when run inline!");
}
}
// Show pool metrics
println!("\n Compute Pool Metrics:");
println!("========================");
println!("{}", pool.metrics());
println!("\n Conclusion:");
println!("==============");
println!("• Small compute (n=10): Overhead may not justify offloading");
println!("• Medium compute (n=1000): Offloading preserves async throughput");
println!("• Large compute (n=100000): Offloading is essential for responsiveness");
println!("\nKey insight: Even small amounts of blocking compute can destroy");
println!("async throughput when you have many concurrent tasks!");
Ok(())
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Benchmark demonstrating async I/O vs compute workload interaction
//!
//! This example measures how different types of compute workloads interfere with
//! async I/O latency by comparing actual elapsed time vs expected sleep time.
//!
//! Key measurements:
//! - Baseline async overhead with no compute load
//! - Interference from small (<100μs), medium (~500μs), and large (~2-5ms) compute tasks
//! - Comparison between all-async (4 Tokio threads) vs hybrid (2 Tokio + 2 Rayon)
//! - Impact of offloading compute work to dedicated Rayon threads
//!
//! The benchmark spawns many lightweight async tasks doing timed sleeps, then runs
//! a fixed compute workload while measuring how much the compute work delays the
//! async tasks from being revisited after their sleeps complete.
//!
//! Two configurations are tested with EXACTLY 4 total threads:
//! 1. All-Async: 4 Tokio threads (compute runs inline, blocking async work)
//! 2. Hybrid: 2 Tokio threads + 2 Rayon threads (compute offloaded, async stays responsive)
use anyhow::Result;
use dynamo_runtime::{
Runtime,
compute::{ComputeConfig, ComputePool},
compute_large, compute_medium, compute_small,
};
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
use tokio::sync::Semaphore;
use tokio::time::sleep;
use tokio_util::sync::CancellationToken;
/// Sleep latency measurement
#[derive(Debug, Clone)]
struct SleepMeasurement {
_expected_ms: f64,
_actual_ms: f64,
overhead_ms: f64, // actual - expected
}
/// Statistics for latency measurements
#[derive(Debug, Clone)]
struct LatencyStats {
p50: f64,
p95: f64,
p99: f64,
max: f64,
mean: f64,
count: usize,
}
/// Test results for a single configuration
#[derive(Debug)]
struct TestResults {
baseline_overhead: LatencyStats,
compute_overhead: Option<LatencyStats>,
compute_duration: Option<Duration>,
_total_sleep_measurements: usize,
}
/// Type of workload to run
#[derive(Debug, Clone, Copy, PartialEq)]
enum WorkloadType {
None, // No compute (baseline)
Small, // 100% small tasks
Medium, // 100% medium tasks
Large, // 100% large tasks
Mixed, // 33/33/33 mix
}
/// Individual task type
#[derive(Debug, Clone, Copy)]
enum TaskType {
Small,
Medium,
Large,
}
/// Compute-intensive function: sum of all primes up to n
fn compute_primes_sum(n: u64) -> u64 {
let mut sum = 0u64;
for candidate in 2..=n {
if is_prime(candidate) {
sum += candidate;
}
}
sum
}
fn is_prime(n: u64) -> bool {
if n <= 1 {
return false;
}
if n <= 3 {
return true;
}
if n.is_multiple_of(2) || n.is_multiple_of(3) {
return false;
}
let sqrt_n = (n as f64).sqrt() as u64;
for i in (5..=sqrt_n).step_by(6) {
if n.is_multiple_of(i) || n.is_multiple_of(i + 2) {
return false;
}
}
true
}
// Global tuned values (set during calibration)
static mut SMALL_N: u64 = 1_500;
static mut MEDIUM_N: u64 = 20_000;
static mut LARGE_N: u64 = 120_000;
/// Small compute task (~10μs)
fn small_compute() -> u64 {
unsafe { compute_primes_sum(SMALL_N) }
}
/// Medium compute task (~500μs)
fn medium_compute() -> u64 {
unsafe { compute_primes_sum(MEDIUM_N) }
}
/// Large compute task (~2-5ms)
fn large_compute() -> u64 {
unsafe { compute_primes_sum(LARGE_N) }
}
/// Dynamically tune a compute function to hit target time
fn tune_compute_n(target_us: f64, initial_n: u64, name: &str) -> u64 {
let mut n = initial_n;
let mut best_n = n;
let mut best_diff = f64::MAX;
// Binary search for the right value
let mut low = 10u64;
let mut high = 1_000_000u64;
for _ in 0..20 {
// Max 20 iterations
n = (low + high) / 2;
// Measure this n value (average of 3 runs for stability)
let mut total_time = Duration::ZERO;
for _ in 0..3 {
let start = Instant::now();
let _ = compute_primes_sum(n);
total_time += start.elapsed();
}
let elapsed_us = total_time.as_secs_f64() * 1_000_000.0 / 3.0;
let diff = (elapsed_us - target_us).abs();
if diff < best_diff {
best_diff = diff;
best_n = n;
}
// Check if we're close enough (within 20%)
if diff / target_us < 0.20 {
println!(
" ✓ {} tuned to n={} ({:.1}μs, target {:.0}μs)",
name, n, elapsed_us, target_us
);
return n;
}
// Adjust search range
if elapsed_us < target_us {
low = n + 1;
} else {
high = n - 1;
}
if low > high {
break;
}
}
// Use best found value
let start = Instant::now();
let _ = compute_primes_sum(best_n);
let final_time = start.elapsed().as_secs_f64() * 1_000_000.0;
println!(
" ✓ {} tuned to n={} ({:.1}μs, target {:.0}μs)",
name, best_n, final_time, target_us
);
best_n
}
/// Calibrate compute functions to measure actual execution times
fn calibrate_compute_functions() {
println!("\n Dynamically calibrating compute functions for this machine...");
println!("{:-<60}", "");
// Tune each function
unsafe {
SMALL_N = tune_compute_n(10.0, SMALL_N, "Small");
MEDIUM_N = tune_compute_n(500.0, MEDIUM_N, "Medium");
LARGE_N = tune_compute_n(3000.0, LARGE_N, "Large"); // Target 3ms (middle of 2-5ms range)
}
println!();
println!("For future runs on this machine, you can use:");
println!(" SMALL_N = {}", unsafe { SMALL_N });
println!(" MEDIUM_N = {}", unsafe { MEDIUM_N });
println!(" LARGE_N = {}", unsafe { LARGE_N });
}
/// Worker that repeatedly sleeps and measures latency
async fn sleep_worker(
sleep_duration: Duration,
results: Arc<Mutex<Vec<SleepMeasurement>>>,
cancel: CancellationToken,
) {
while !cancel.is_cancelled() {
let start = Instant::now();
tokio::select! {
_ = sleep(sleep_duration) => {
let elapsed = start.elapsed();
let measurement = SleepMeasurement {
_expected_ms: sleep_duration.as_secs_f64() * 1000.0,
_actual_ms: elapsed.as_secs_f64() * 1000.0,
overhead_ms: (elapsed.as_secs_f64() - sleep_duration.as_secs_f64()) * 1000.0,
};
results.lock().unwrap().push(measurement);
}
_ = cancel.cancelled() => break,
}
}
}
/// Execute a single compute task based on type
async fn execute_compute_task(task_type: TaskType, pool: Option<Arc<ComputePool>>) -> Result<u64> {
match task_type {
TaskType::Small => {
// Small tasks always run inline
Ok(compute_small!(small_compute()))
}
TaskType::Medium => {
// Medium tasks: offload if pool available, else run inline (blocking)
if let Some(pool) = pool.clone() {
pool.execute(medium_compute).await
} else {
// No pool - run inline on Tokio thread (will block!)
Ok(medium_compute())
}
}
TaskType::Large => {
// Large tasks: offload if pool available, else run inline (severely blocking)
if let Some(pool) = pool {
pool.execute(large_compute).await
} else {
// No pool - run inline on Tokio thread (will severely block!)
Ok(large_compute())
}
}
}
}
/// Execute a batch of compute tasks with concurrency limiting
async fn execute_compute_batch(
workload_type: WorkloadType,
num_tasks: usize,
concurrency_limit: Arc<Semaphore>,
pool: Option<Arc<ComputePool>>,
) -> Duration {
if workload_type == WorkloadType::None {
return Duration::from_secs(0);
}
let start = Instant::now();
let mut handles = Vec::new();
for i in 0..num_tasks {
let permit = concurrency_limit.clone().acquire_owned().await.unwrap();
let pool = pool.clone();
let task_type = match workload_type {
WorkloadType::Small => TaskType::Small,
WorkloadType::Medium => TaskType::Medium,
WorkloadType::Large => TaskType::Large,
WorkloadType::Mixed => {
// Round-robin: 33% small, 33% medium, 33% large
match i % 3 {
0 => TaskType::Small,
1 => TaskType::Medium,
_ => TaskType::Large,
}
}
WorkloadType::None => unreachable!(),
};
let handle = tokio::spawn(async move {
let _permit = permit; // Hold permit until task completes
execute_compute_task(task_type, pool).await
});
handles.push(handle);
}
// Wait for all compute tasks
for handle in handles {
handle.await.unwrap().unwrap();
}
start.elapsed()
}
/// Calculate statistics from measurements
fn calculate_stats(measurements: &[SleepMeasurement]) -> LatencyStats {
if measurements.is_empty() {
return LatencyStats {
p50: 0.0,
p95: 0.0,
p99: 0.0,
max: 0.0,
mean: 0.0,
count: 0,
};
}
let mut overheads: Vec<f64> = measurements.iter().map(|m| m.overhead_ms).collect();
overheads.sort_by(|a, b| a.partial_cmp(b).unwrap());
let len = overheads.len();
LatencyStats {
p50: overheads[len / 2],
p95: overheads[len * 95 / 100],
p99: overheads[len * 99 / 100],
max: *overheads.last().unwrap(),
mean: overheads.iter().sum::<f64>() / len as f64,
count: len,
}
}
/// Run a single interference test
async fn run_interference_test(
_runtime: Arc<Runtime>,
workload_type: WorkloadType,
pool: Option<Arc<ComputePool>>,
) -> TestResults {
// Configuration
const NUM_SLEEP_TASKS: usize = 100;
const SLEEP_DURATION_MS: u64 = 1;
const NUM_COMPUTE_TASKS: usize = 2000; // Increased for longer workload
const CONCURRENCY_LIMIT: usize = 8; // Allow more parallel work
const BASELINE_DURATION_SECS: u64 = 1;
// 1. Start async load (100 tasks doing 1ms sleeps)
let results = Arc::new(Mutex::new(Vec::new()));
let cancel = CancellationToken::new();
let mut handles = Vec::new();
for _ in 0..NUM_SLEEP_TASKS {
let r = results.clone();
let c = cancel.clone();
handles.push(tokio::spawn(sleep_worker(
Duration::from_millis(SLEEP_DURATION_MS),
r,
c,
)));
}
// 2. Collect baseline measurements
sleep(Duration::from_secs(BASELINE_DURATION_SECS)).await;
let baseline_count = results.lock().unwrap().len();
// 3. Run compute workload (if not baseline)
let compute_duration = if workload_type != WorkloadType::None {
let semaphore = Arc::new(Semaphore::new(CONCURRENCY_LIMIT));
Some(execute_compute_batch(workload_type, NUM_COMPUTE_TASKS, semaphore, pool).await)
} else {
// For baseline, just wait another second
sleep(Duration::from_secs(1)).await;
None
};
// 4. Stop async load
cancel.cancel();
for handle in handles {
handle.await.unwrap();
}
// 5. Analyze results
let all_measurements = results.lock().unwrap().clone();
let baseline = &all_measurements[..baseline_count.min(all_measurements.len())];
let during_compute = if baseline_count < all_measurements.len() {
Some(&all_measurements[baseline_count..])
} else {
None
};
TestResults {
baseline_overhead: calculate_stats(baseline),
compute_overhead: during_compute.map(calculate_stats),
compute_duration,
_total_sleep_measurements: all_measurements.len(),
}
}
/// Run all test workloads for a given configuration
async fn run_all_tests(pool: Option<Arc<ComputePool>>) -> Result<()> {
let workload_types = vec![
("Baseline (no compute)", WorkloadType::None),
("100% Small (~10μs each)", WorkloadType::Small),
("100% Medium (~500μs each)", WorkloadType::Medium),
("100% Large (~2-5ms each)", WorkloadType::Large),
("Mixed 33/33/33", WorkloadType::Mixed),
];
// Create dummy runtime for the test functions
let runtime = Arc::new(Runtime::from_current()?);
for (name, workload) in &workload_types {
println!("\n Workload: {}", name);
println!("{:-<50}", "");
let results = run_interference_test(runtime.clone(), *workload, pool.clone()).await;
// Always show baseline overhead
println!(" Baseline async overhead (first {}s):", 1);
println!(
" Mean: {:.3}ms, P50: {:.3}ms, P95: {:.3}ms, P99: {:.3}ms",
results.baseline_overhead.mean,
results.baseline_overhead.p50,
results.baseline_overhead.p95,
results.baseline_overhead.p99
);
println!(" Measurements: {}", results.baseline_overhead.count);
// Show compute interference if applicable
if let Some(compute_overhead) = results.compute_overhead {
println!("\n During compute workload:");
println!(
" Mean: {:.3}ms, P50: {:.3}ms, P95: {:.3}ms, P99: {:.3}ms",
compute_overhead.mean,
compute_overhead.p50,
compute_overhead.p95,
compute_overhead.p99
);
println!(
" Max: {:.3}ms, Measurements: {}",
compute_overhead.max, compute_overhead.count
);
// Calculate interference factor
if results.baseline_overhead.mean > 0.0 {
let interference_factor = compute_overhead.mean / results.baseline_overhead.mean;
println!(
"\n Interference factor: {:.1}x slower",
interference_factor
);
// Provide interpretation
let impact = if interference_factor < 2.0 {
"Minimal - async remains responsive"
} else if interference_factor < 10.0 {
"Moderate - noticeable async delays"
} else {
"SEVERE - async tasks are heavily blocked!"
};
println!(" Impact: {}", impact);
}
}
// Show compute duration
if let Some(duration) = results.compute_duration {
println!(
"\n Compute workload completed in: {:.2}s",
duration.as_secs_f64()
);
println!(
" Throughput: {:.0} tasks/sec",
1000.0 / duration.as_secs_f64()
);
}
}
Ok(())
}
fn main() -> Result<()> {
println!(" Async vs Compute Interaction Benchmark");
println!("==========================================");
println!();
println!("This benchmark measures how compute workloads interfere with async I/O latency.");
println!("We test with EXACTLY 4 total threads in two configurations:");
println!(" 1. All-Async: 4 Tokio threads (compute blocks async work)");
println!(" 2. Hybrid: 2 Tokio + 2 Rayon threads (compute offloaded)");
println!(" 3. Bonus: Thread-local macro demonstration");
println!();
println!("Lower overhead numbers mean better async responsiveness.");
// Calibrate compute functions
calibrate_compute_functions();
// Test 1: All Async (4 Tokio threads, no Rayon)
println!("\n{:=<70}", "");
println!("Configuration 1: All-Async (4 Tokio threads, no Rayon)");
println!("{:=<70}", "");
println!(" Compute tasks run INLINE on Tokio threads, blocking async work!");
let all_async_rt = tokio::runtime::Builder::new_multi_thread()
.worker_threads(4)
.thread_name("tokio-worker")
.enable_all()
.build()?;
all_async_rt.block_on(async {
// No compute pool for all-async mode
// All compute work will run inline on Tokio threads
run_all_tests(None).await
})?;
// Test 2: Hybrid (2 Tokio + 2 Rayon)
println!("\n{:=<70}", "");
println!("Configuration 2: Hybrid (2 Tokio + 2 Rayon threads)");
println!("{:=<70}", "");
println!(" Compute tasks offloaded to Rayon, keeping async threads free!");
let hybrid_rt = tokio::runtime::Builder::new_multi_thread()
.worker_threads(2)
.thread_name("tokio-worker")
.enable_all()
.build()?;
// Create Rayon pool with 2 threads
let compute_pool = Arc::new(ComputePool::new(ComputeConfig {
num_threads: Some(2),
stack_size: Some(2 * 1024 * 1024),
thread_prefix: "rayon".to_string(),
pin_threads: false,
})?);
hybrid_rt.block_on(async { run_all_tests(Some(compute_pool)).await })?;
// Summary
println!("\n{:=<70}", "");
println!(" Key Takeaway");
println!("{:=<70}", "");
println!();
println!("The benchmark demonstrates that offloading compute work to dedicated");
println!("threads becomes increasingly important as task duration increases.");
println!("Look at the interference factors above to see the actual impact.");
// Test 3: Demonstrate thread-local macros
println!("\n{:=<70}", "");
println!("Configuration 3: Thread-Local Macro Demonstration");
println!("{:=<70}", "");
println!("Testing thread-local compute context initialization...");
// Create a runtime with thread-local setup
let macro_rt = tokio::runtime::Builder::new_multi_thread()
.worker_threads(2)
.thread_name("macro-demo")
.enable_all()
.build()?;
// Create compute pool for macros
let macro_pool = Arc::new(ComputePool::new(ComputeConfig {
num_threads: Some(2),
stack_size: Some(2 * 1024 * 1024),
thread_prefix: "macro-compute".to_string(),
pin_threads: false,
})?);
macro_rt.block_on(async {
// We can't directly create a Runtime with private fields, so we'll
// initialize thread-local context manually using a barrier approach
// Set up semaphore permits
let permits = Arc::new(tokio::sync::Semaphore::new(1)); // 2 workers - 1
// Detect number of worker threads
use std::collections::HashSet;
use std::sync::Mutex;
let thread_ids = Arc::new(Mutex::new(HashSet::new()));
let mut handles = Vec::new();
// Probe to find worker thread count
for _ in 0..50 {
let ids = Arc::clone(&thread_ids);
let handle = tokio::task::spawn_blocking(move || {
let thread_id = std::thread::current().id();
ids.lock().unwrap().insert(thread_id);
});
handles.push(handle);
}
for handle in handles {
let _ = handle.await;
}
let num_workers = thread_ids.lock().unwrap().len();
println!(" Detected {} worker threads", num_workers);
// Now initialize thread-local on all workers using a barrier
let barrier = Arc::new(std::sync::Barrier::new(num_workers));
let mut init_handles = Vec::new();
for i in 0..num_workers {
let barrier_clone = Arc::clone(&barrier);
let pool_clone = Arc::clone(&macro_pool);
let permits_clone = Arc::clone(&permits);
let handle = tokio::task::spawn_blocking(move || {
// Wait at barrier
barrier_clone.wait();
// Initialize thread-local
dynamo_runtime::compute::thread_local::initialize_context(
pool_clone,
permits_clone,
);
println!(" Initialized thread-local on worker {}", i);
});
init_handles.push(handle);
}
for handle in init_handles {
handle.await?;
}
// Test if macros work
println!("\n Testing thread-local macros:");
if dynamo_runtime::compute::thread_local::has_compute_context() {
println!(" Thread-local context is available!");
// Test compute_small! macro
println!("\n Testing compute_small! (inline):");
let start = std::time::Instant::now();
let result = compute_small!(small_compute());
println!(" Result: {}, Time: {:?}", result, start.elapsed());
// Test compute_medium! macro (would use thread-local context)
println!("\n Testing compute_medium! (block_in_place or offload):");
let start = std::time::Instant::now();
let result = compute_medium!(medium_compute());
println!(" Result: {}, Time: {:?}", result, start.elapsed());
// Test compute_large! macro (would use thread-local context)
println!("\n Testing compute_large! (always offload):");
let start = std::time::Instant::now();
let result = compute_large!(large_compute());
println!(" Result: {}, Time: {:?}", result, start.elapsed());
println!("\n All macros work with thread-local context!");
} else {
println!(" Thread-local context NOT available - macros would fail");
}
Ok::<_, anyhow::Error>(())
})?;
println!();
println!(" Benchmark complete!");
Ok(())
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use anyhow::Result;
use dynamo_runtime::compute::{ComputeConfig, ComputePool};
use std::sync::Arc;
use std::time::{Duration, Instant};
/// Compute-intensive function: sum of all primes up to n
fn compute_primes_sum(n: u64) -> u64 {
let mut sum = 0u64;
for candidate in 2..=n {
if is_prime(candidate) {
sum += candidate;
}
}
sum
}
fn is_prime(n: u64) -> bool {
if n <= 1 {
return false;
}
if n <= 3 {
return true;
}
if n.is_multiple_of(2) || n.is_multiple_of(3) {
return false;
}
let sqrt_n = (n as f64).sqrt() as u64;
for i in (5..=sqrt_n).step_by(6) {
if n.is_multiple_of(i) || n.is_multiple_of(i + 2) {
return false;
}
}
true
}
async fn measure_direct(n: u64) -> Duration {
let start = Instant::now();
let _ = compute_primes_sum(n);
start.elapsed()
}
async fn measure_rayon(pool: &ComputePool, n: u64) -> Duration {
let start = Instant::now();
let _ = pool.execute(move || compute_primes_sum(n)).await.unwrap();
start.elapsed()
}
async fn measure_spawn_blocking(n: u64) -> Duration {
let start = Instant::now();
let _ = tokio::task::spawn_blocking(move || compute_primes_sum(n))
.await
.unwrap();
start.elapsed()
}
fn format_duration(d: Duration) -> String {
if d.as_secs() > 0 {
format!("{:.2}s", d.as_secs_f64())
} else if d.as_millis() > 0 {
format!("{:.2}ms", d.as_secs_f64() * 1000.0)
} else if d.as_micros() > 0 {
format!("{:.2}μs", d.as_secs_f64() * 1_000_000.0)
} else {
format!("{}ns", d.as_nanos())
}
}
fn print_table_header() {
println!("\n{:=<120}", "");
println!(
"{:>10} | {:>15} | {:>15} | {:>15} | {:>12} | {:>12} | {:>20}",
"n", "Direct", "Rayon", "spawn_blocking", "Rayon Ratio", "Spawn Ratio", "Winner"
);
println!("{:-<120}", "");
}
fn print_row(
n: u64,
direct: Duration,
rayon: Duration,
spawn_blocking: Duration,
highlight_crossover: bool,
) {
let rayon_ratio = rayon.as_secs_f64() / direct.as_secs_f64();
let spawn_ratio = spawn_blocking.as_secs_f64() / direct.as_secs_f64();
let winner = if rayon_ratio < 1.0 && rayon_ratio < spawn_ratio {
"Rayon ✓"
} else if spawn_ratio < 1.0 && spawn_ratio < rayon_ratio {
"spawn_blocking"
} else {
"Direct"
};
let row = format!(
"{:>10} | {:>15} | {:>15} | {:>15} | {:>12.2}x | {:>12.2}x | {:>20}",
n,
format_duration(direct),
format_duration(rayon),
format_duration(spawn_blocking),
rayon_ratio,
spawn_ratio,
winner
);
if highlight_crossover {
println!(">>> {} <<<", row);
} else {
println!("{}", row);
}
}
#[tokio::main]
async fn main() -> Result<()> {
println!("🔬 Compute Pool Overhead Demonstration");
println!("=====================================\n");
// Create compute pool directly
let compute_config = ComputeConfig {
num_threads: Some(4),
stack_size: Some(2 * 1024 * 1024),
thread_prefix: "demo".to_string(),
pin_threads: false,
};
let pool = Arc::new(ComputePool::new(compute_config)?);
println!("Configuration:");
println!(" Rayon threads: {}", pool.num_threads());
println!();
// Warm up all execution paths
println!("Warming up...");
for _ in 0..5 {
let _ = measure_direct(100).await;
let _ = measure_rayon(&pool, 100).await;
let _ = measure_spawn_blocking(100).await;
}
print_table_header();
// Dynamic scanning with exponential growth
let mut n = 10u64;
let mut results = Vec::new();
let mut found_crossover = false;
let mut last_rayon_ratio = f64::MAX;
while n <= 1_000_000 {
// Measure each approach multiple times and take the minimum
let mut direct_times = Vec::new();
for _ in 0..3 {
direct_times.push(measure_direct(n).await);
}
let direct = direct_times.into_iter().min().unwrap();
let mut rayon_times = Vec::new();
for _ in 0..3 {
rayon_times.push(measure_rayon(&pool, n).await);
}
let rayon = rayon_times.into_iter().min().unwrap();
let mut spawn_times = Vec::new();
for _ in 0..3 {
spawn_times.push(measure_spawn_blocking(n).await);
}
let spawn_blocking = spawn_times.into_iter().min().unwrap();
let rayon_ratio = rayon.as_secs_f64() / direct.as_secs_f64();
// Detect crossover point
let is_crossover = !found_crossover && rayon_ratio < 1.0 && last_rayon_ratio >= 1.0;
if is_crossover {
found_crossover = true;
}
print_row(n, direct, rayon, spawn_blocking, is_crossover);
results.push((n, direct, rayon, spawn_blocking));
last_rayon_ratio = rayon_ratio;
// Adaptive step size
if n < 100 {
n = (n as f64 * 2.0) as u64;
} else if n < 10_000 {
n = (n as f64 * 3.16) as u64; // ~10x every 2 steps
} else {
n *= 10;
}
}
println!("{:=<120}", "");
// Analysis
println!("\n Analysis:");
println!("============\n");
if found_crossover {
let crossover_point = results
.iter()
.find(|(_, d, r, _)| r.as_secs_f64() < d.as_secs_f64())
.map(|(n, _, _, _)| *n);
if let Some(n) = crossover_point {
println!("✓ Rayon becomes beneficial at n ≈ {}", n);
println!(
" Below n={}: Overhead dominates, direct execution is faster",
n
);
println!(
" Above n={}: Compute dominates, Rayon offload is faster",
n
);
}
} else {
println!("✗ No crossover found in tested range");
println!(" Direct execution was always faster (overhead too high)");
}
// Find where spawn_blocking becomes beneficial
let spawn_crossover = results
.iter()
.find(|(_, d, _, s)| s.as_secs_f64() < d.as_secs_f64())
.map(|(n, _, _, _)| *n);
if let Some(n) = spawn_crossover {
println!("\n✓ spawn_blocking becomes beneficial at n ≈ {}", n);
}
// Show overhead at minimum
if let Some((n, direct, rayon, spawn)) = results.first() {
let rayon_overhead = rayon.as_secs_f64() - direct.as_secs_f64();
let spawn_overhead = spawn.as_secs_f64() - direct.as_secs_f64();
println!("\nOverhead at n={}:", n);
println!(
" Rayon: +{}",
format_duration(Duration::from_secs_f64(rayon_overhead))
);
println!(
" spawn_blocking: +{}",
format_duration(Duration::from_secs_f64(spawn_overhead))
);
}
// Show benefit at maximum
if let Some((n, direct, rayon, spawn)) = results.last() {
let rayon_speedup = direct.as_secs_f64() / rayon.as_secs_f64();
let spawn_speedup = direct.as_secs_f64() / spawn.as_secs_f64();
println!("\nSpeedup at n={}:", n);
println!(" Rayon: {:.2}x faster", rayon_speedup);
println!(" spawn_blocking: {:.2}x faster", spawn_speedup);
}
// Print pool metrics
println!("\n Compute Pool Metrics:");
println!("========================");
println!("{}", pool.metrics());
Ok(())
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Example demonstrating the use of ComputePool for CPU-intensive operations
//!
//! This example shows various patterns for using Rayon with Tokio:
//! - Fork-join with scope
//! - Parallel batch processing
//! - Dynamic task spawning
//! - Integration with async services
use anyhow::Result;
use dynamo_runtime::{
Worker,
compute::{ComputePool, ComputePoolExt},
};
use std::sync::{Arc, Mutex};
use std::time::Instant;
/// Simulate expensive CPU-bound computation
fn expensive_computation(input: u64) -> u64 {
// Simulate work with a simple prime check
let mut sum = 0u64;
for i in 2..input {
if is_prime(i) {
sum += i;
}
}
sum
}
fn is_prime(n: u64) -> bool {
if n <= 1 {
return false;
}
for i in 2..((n as f64).sqrt() as u64 + 1) {
if n.is_multiple_of(i) {
return false;
}
}
true
}
/// Example 1: Simple fork-join pattern
async fn example_fork_join(pool: &ComputePool) -> Result<()> {
println!("\n=== Example 1: Fork-Join Pattern ===");
let start = Instant::now();
// Run two expensive computations in parallel
let (result1, result2) = pool
.join(
|| expensive_computation(10000),
|| expensive_computation(20000),
)
.await?;
println!("Fork-join results: {} and {}", result1, result2);
println!("Time: {:?}", start.elapsed());
Ok(())
}
/// Example 2: Scope-based parallel execution
async fn example_scope(pool: &ComputePool) -> Result<()> {
println!("\n=== Example 2: Scope-based Execution ===");
let data = [1000, 2000, 3000, 4000, 5000];
let start = Instant::now();
let results = pool
.execute_scoped(move |scope| {
let results = Arc::new(Mutex::new(vec![0u64; data.len()]));
for (i, &value) in data.iter().enumerate() {
let results = results.clone();
scope.spawn(move |_| {
let result = expensive_computation(value);
let mut r = results.lock().unwrap();
r[i] = result;
});
}
Arc::try_unwrap(results).unwrap().into_inner().unwrap()
})
.await?;
println!("Scope results: {:?}", results);
println!("Time: {:?}", start.elapsed());
Ok(())
}
/// Example 3: Parallel map using extension trait
async fn example_parallel_map(pool: &ComputePool) -> Result<()> {
println!("\n=== Example 3: Parallel Map ===");
let items: Vec<u64> = (1..=10).map(|i| i * 1000).collect();
let start = Instant::now();
let results = pool
.parallel_map(items.clone(), expensive_computation)
.await?;
println!("Parallel map processed {} items", results.len());
println!("Time: {:?}", start.elapsed());
// Compare with sequential processing
let start_seq = Instant::now();
let _sequential: Vec<_> = items.iter().map(|&i| expensive_computation(i)).collect();
println!("Sequential time: {:?}", start_seq.elapsed());
Ok(())
}
/// Example 4: Simulating tokenization workload
async fn example_tokenization(pool: &ComputePool) -> Result<()> {
println!("\n=== Example 4: Batch Tokenization Simulation ===");
// Simulate batch of texts to tokenize
let texts: Vec<String> = (0..100)
.map(|i| {
format!(
"This is sample text number {} that needs to be tokenized",
i
)
})
.collect();
let start = Instant::now();
let texts_len = texts.len();
// Process in parallel using scope
let token_counts = pool
.execute_scoped(move |scope| {
let counts = Arc::new(Mutex::new(vec![0usize; texts_len]));
for (i, text) in texts.iter().enumerate() {
let text = text.clone();
let counts = counts.clone();
scope.spawn(move |_| {
// Simulate tokenization by counting words
let count = text.split_whitespace().count();
// Simulate more work
std::thread::sleep(std::time::Duration::from_micros(100));
let mut c = counts.lock().unwrap();
c[i] = count;
});
}
Arc::try_unwrap(counts).unwrap().into_inner().unwrap()
})
.await?;
let total_tokens: usize = token_counts.iter().sum();
println!(
"Tokenized {} texts, total tokens: {}",
texts_len, total_tokens
);
println!("Time: {:?}", start.elapsed());
Ok(())
}
/// Example 5: Hierarchical computation
async fn example_hierarchical(pool: &ComputePool) -> Result<()> {
println!("\n=== Example 5: Hierarchical Computation ===");
let start = Instant::now();
let result = pool
.execute_scoped(move |scope| {
let phase1_results = Arc::new(Mutex::new(vec![0u64; 4]));
// First level: compute initial values
for i in 0..4 {
let phase1_results = phase1_results.clone();
scope.spawn(move |s2| {
let intermediate = expensive_computation((i + 1) as u64 * 1000);
// Second level: further process each result
let phase2_results = Arc::new(Mutex::new(vec![0u64; 2]));
for j in 0..2 {
let value = intermediate + (j as u64 * 100);
let phase2_results = phase2_results.clone();
s2.spawn(move |_| {
let result = expensive_computation(value);
let mut r = phase2_results.lock().unwrap();
r[j] = result;
});
}
let sum: u64 = phase2_results.lock().unwrap().iter().sum();
let mut p1 = phase1_results.lock().unwrap();
p1[i] = sum;
});
}
phase1_results.lock().unwrap().iter().sum::<u64>()
})
.await?;
println!("Hierarchical computation result: {}", result);
println!("Time: {:?}", start.elapsed());
Ok(())
}
#[tokio::main]
async fn main() -> Result<()> {
// Initialize logging
tracing_subscriber::fmt()
.with_max_level(tracing::Level::INFO)
.init();
// Create worker and runtime
let worker = Worker::from_settings()?;
let runtime = worker.runtime().clone();
// Get compute pool
let pool = runtime
.compute_pool()
.ok_or_else(|| anyhow::anyhow!("Compute pool not initialized"))?
.clone();
println!(
"Compute pool initialized with {} threads",
pool.num_threads()
);
// Run examples
example_fork_join(&pool).await?;
example_scope(&pool).await?;
example_parallel_map(&pool).await?;
example_tokenization(&pool).await?;
example_hierarchical(&pool).await?;
// Print metrics
let metrics = pool.metrics();
println!("\n=== Compute Pool Metrics ===");
println!("{}", metrics);
Ok(())
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use anyhow::Result;
use dynamo_runtime::compute::{ComputeConfig, ComputePool};
use dynamo_runtime::{compute_large, compute_medium, compute_small};
use std::sync::Arc;
#[tokio::main]
async fn main() -> Result<()> {
println!("Testing compute macros...\n");
// Create compute pool
let compute_config = ComputeConfig {
num_threads: Some(4),
stack_size: Some(2 * 1024 * 1024),
thread_prefix: "test".to_string(),
pin_threads: false,
};
let pool = Arc::new(ComputePool::new(compute_config)?);
// Test small macro (direct execution)
println!("Testing compute_small!...");
let result = compute_small!(2 + 2);
println!(" Result: {}", result);
// Test medium macro (block_in_place with fallback)
println!("\nTesting compute_medium!...");
let result = compute_medium!(pool, {
let mut sum = 0u64;
for i in 0..1000 {
sum += i;
}
sum
});
println!(" Result: {}", result);
// Test large macro (always Rayon)
println!("\nTesting compute_large!...");
let result = compute_large!(pool, {
let mut sum = 0u64;
for i in 0..1_000_000 {
sum += i;
}
sum
});
println!(" Result: {}", result);
println!("\n All macros working!");
Ok(())
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Example showing how to integrate ComputePool with tokenization workloads
//!
//! This demonstrates the pattern that could be used in lib/llm/src/preprocessor.rs
//! to leverage the compute pool for batch tokenization operations.
use anyhow::Result;
use dynamo_runtime::{Worker, compute::ComputePool};
use std::sync::{Arc, Mutex};
use std::time::Instant;
/// Mock tokenizer for demonstration
struct MockTokenizer;
impl MockTokenizer {
fn encode(&self, text: &str) -> Vec<u32> {
// Simulate tokenization work
let mut tokens = Vec::new();
for (i, word) in text.split_whitespace().enumerate() {
// Simulate expensive computation
let hash = word
.bytes()
.fold(0u32, |acc, b| acc.wrapping_mul(31).wrapping_add(b as u32));
tokens.push(hash.wrapping_add(i as u32));
}
tokens
}
fn decode(&self, tokens: &[u32]) -> String {
// Simulate detokenization
tokens
.iter()
.map(|t| format!("token_{}", t % 1000))
.collect::<Vec<_>>()
.join(" ")
}
}
/// Pattern 1: Direct replacement for par_iter in preprocessor
///
/// This shows how the existing code in lib/llm/src/preprocessor.rs:330
/// could be enhanced with explicit compute pool control
async fn tokenize_batch_with_pool(
pool: &ComputePool,
tokenizer: Arc<MockTokenizer>,
texts: Vec<String>,
) -> Result<Vec<Vec<u32>>> {
println!(
"\n=== Tokenizing {} texts with compute pool ===",
texts.len()
);
let start = Instant::now();
// Option 1: Using scope for fine control
let token_batches = pool
.execute_scoped(move |scope| {
let results = Arc::new(Mutex::new(vec![Vec::new(); texts.len()]));
for (i, text) in texts.iter().enumerate() {
let tokenizer = tokenizer.clone();
let text = text.clone();
let results = results.clone();
scope.spawn(move |_| {
let tokens = tokenizer.encode(&text);
let mut r = results.lock().unwrap();
r[i] = tokens;
});
}
Arc::try_unwrap(results).unwrap().into_inner().unwrap()
})
.await?;
let total_tokens: usize = token_batches.iter().map(|v| v.len()).sum();
println!(
"Tokenized in {:?}, total tokens: {}",
start.elapsed(),
total_tokens
);
Ok(token_batches)
}
/// Pattern 2: Using rayon's par_iter within the compute pool
///
/// This maintains compatibility with existing code patterns
async fn tokenize_batch_par_iter(
pool: &ComputePool,
tokenizer: Arc<MockTokenizer>,
texts: Vec<String>,
) -> Result<Vec<Vec<u32>>> {
use rayon::prelude::*;
println!("\n=== Tokenizing with par_iter in compute pool ===");
let start = Instant::now();
// This is how the existing preprocessor code could work
let token_batches: Vec<Vec<u32>> = pool
.install(move || {
texts
.par_iter()
.map(|text| tokenizer.encode(text))
.collect()
})
.await?;
let total_tokens: usize = token_batches.iter().map(|v| v.len()).sum();
println!(
"Tokenized in {:?}, total tokens: {}",
start.elapsed(),
total_tokens
);
Ok(token_batches)
}
/// Pattern 3: Mixed async/sync processing
///
/// This shows how to handle a stream of requests where each request
/// contains a batch that needs parallel processing
async fn process_request_stream(pool: &ComputePool, tokenizer: Arc<MockTokenizer>) -> Result<()> {
println!("\n=== Processing request stream ===");
// Simulate incoming requests
let requests = vec![
vec![
"Request 1 text 1".to_string(),
"Request 1 text 2".to_string(),
],
vec![
"Request 2 text 1".to_string(),
"Request 2 text 2".to_string(),
"Request 2 text 3".to_string(),
],
vec!["Request 3 text 1".to_string()],
];
for (i, batch) in requests.into_iter().enumerate() {
println!("Processing request {}", i + 1);
// Each request gets processed in parallel
let tokens = tokenize_batch_with_pool(pool, tokenizer.clone(), batch).await?;
// Simulate async I/O between requests
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
println!(
"Request {} completed with {} token batches",
i + 1,
tokens.len()
);
}
Ok(())
}
/// Pattern 4: Encode/Decode pipeline
///
/// Shows how to chain multiple compute operations
async fn encode_decode_pipeline(
pool: &ComputePool,
tokenizer: Arc<MockTokenizer>,
texts: Vec<String>,
) -> Result<Vec<String>> {
println!("\n=== Encode/Decode Pipeline ===");
let start = Instant::now();
// Step 1: Encode all texts in parallel
let tokenizer_clone = tokenizer.clone();
let encoded = pool
.execute_scoped(move |scope| {
let results = Arc::new(Mutex::new(vec![Vec::new(); texts.len()]));
for (i, text) in texts.iter().enumerate() {
let tokenizer = tokenizer_clone.clone();
let text = text.clone();
let results = results.clone();
scope.spawn(move |_| {
let tokens = tokenizer.encode(&text);
let mut r = results.lock().unwrap();
r[i] = tokens;
});
}
Arc::try_unwrap(results).unwrap().into_inner().unwrap()
})
.await?;
println!("Encoding complete in {:?}", start.elapsed());
// Step 2: Decode all token sequences in parallel
let decoded_start = Instant::now();
let decoded = pool
.execute_scoped(move |scope| {
let results = Arc::new(Mutex::new(vec![String::new(); encoded.len()]));
for (i, tokens) in encoded.iter().enumerate() {
let tokenizer = tokenizer.clone();
let tokens = tokens.clone();
let results = results.clone();
scope.spawn(move |_| {
let text = tokenizer.decode(&tokens);
let mut r = results.lock().unwrap();
r[i] = text;
});
}
Arc::try_unwrap(results).unwrap().into_inner().unwrap()
})
.await?;
println!("Decoding complete in {:?}", decoded_start.elapsed());
println!("Total pipeline time: {:?}", start.elapsed());
Ok(decoded)
}
#[tokio::main]
async fn main() -> Result<()> {
// Initialize logging
tracing_subscriber::fmt()
.with_max_level(tracing::Level::INFO)
.init();
// Set compute pool configuration via environment
unsafe {
std::env::set_var("DYN_COMPUTE_THREADS", "4");
}
// Create worker and runtime
let worker = Worker::from_settings()?;
let runtime = worker.runtime().clone();
// Get compute pool
let pool = runtime
.compute_pool()
.ok_or_else(|| anyhow::anyhow!("Compute pool not initialized"))?
.clone();
println!(
"Compute pool initialized with {} threads",
pool.num_threads()
);
// Create mock tokenizer
let tokenizer = Arc::new(MockTokenizer);
// Generate test data
let texts: Vec<String> = (0..50)
.map(|i| {
format!(
"This is sample text number {} with some words to tokenize. \
The quick brown fox jumps over the lazy dog.",
i
)
})
.collect();
// Run examples
let _ = tokenize_batch_with_pool(&pool, tokenizer.clone(), texts.clone()).await?;
let _ = tokenize_batch_par_iter(&pool, tokenizer.clone(), texts.clone()).await?;
process_request_stream(&pool, tokenizer.clone()).await?;
let decoded = encode_decode_pipeline(&pool, tokenizer.clone(), texts.clone()).await?;
println!("\n=== Results ===");
println!("Processed {} texts", texts.len());
println!("First decoded text: {}", &decoded[0]);
// Print metrics
let metrics = pool.metrics();
println!("\n=== Compute Pool Metrics ===");
println!("{}", metrics);
Ok(())
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Zero-overhead macros for compute task execution with optional validation
//!
//! These macros provide size-aware execution strategies:
//! - `compute_small!`: Direct inline execution for tasks <100μs
//! - `compute_medium!`: Semaphore-guarded block_in_place for tasks 100μs-1ms
//! - `compute_large!`: Rayon offload for tasks >1ms
//!
//! When the `compute-validation` feature is enabled, these macros will
//! time execution and emit warnings if tasks are misclassified.
/// Execute a small compute task (<100μs) directly inline.
///
/// This macro has zero overhead and simply executes the expression directly.
/// When validation is enabled, it will warn if the task takes >100μs.
///
/// # Example
/// ```
/// # use dynamo_runtime::compute_small;
/// let result = compute_small!(2 + 2);
/// assert_eq!(result, 4);
/// ```
#[macro_export]
macro_rules! compute_small {
($expr:expr) => {{
#[cfg(feature = "compute-validation")]
let _start = std::time::Instant::now();
let result = $expr; // Direct execution, zero overhead
#[cfg(feature = "compute-validation")]
$crate::compute::validation::validate_small(_start.elapsed());
result
}};
}
/// Execute a medium compute task (100μs-1ms) with intelligent scheduling.
///
/// This macro first tries to use thread-local context if available (on Tokio worker threads).
/// If no thread-local context, it requires a pool parameter.
///
/// # Example
/// ```ignore
/// # use dynamo_runtime::{compute_medium, compute::ComputePool};
/// # async fn example(pool: &ComputePool) {
/// // With thread-local context (on worker thread)
/// let result = compute_medium!({
/// (0..1000).map(|i| i * 2).sum::<i32>()
/// }).await;
///
/// // Or with explicit pool (fallback)
/// let result = compute_medium!(pool, {
/// (0..1000).map(|i| i * 2).sum::<i32>()
/// }).await;
/// # }
/// ```
#[macro_export]
macro_rules! compute_medium {
// Thread-local version (no pool parameter)
($expr:expr) => {{
#[cfg(feature = "compute-validation")]
let _start = std::time::Instant::now();
let result = async {
// Try thread-local context first
if let Ok(_permit) = $crate::compute::thread_local::try_acquire_block_permit() {
// Got permit - use block_in_place
Ok(tokio::task::block_in_place(|| {
let r = $expr;
drop(_permit); // Release ASAP
r
}))
} else if let Some(pool) = $crate::compute::thread_local::get_pool() {
// No permit but have pool - offload
pool.execute(|| $expr).await
} else {
// No context available - fall back to inline execution
// This may block the async runtime but ensures the macro always works
tracing::warn!("compute_medium: No thread-local context, executing inline (may block async runtime)");
Ok($expr)
}
}
.await?;
#[cfg(feature = "compute-validation")]
$crate::compute::validation::validate_medium(_start.elapsed());
result
}};
// Explicit pool version (fallback)
($pool:expr, $expr:expr) => {{
#[cfg(feature = "compute-validation")]
let _start = std::time::Instant::now();
let result = async {
// Try thread-local permits first, fall back to pool
if let Ok(_permit) = $crate::compute::thread_local::try_acquire_block_permit() {
// Got permit - use block_in_place
Ok(tokio::task::block_in_place(|| {
let r = $expr;
drop(_permit); // Release ASAP
r
}))
} else {
// No permit available - offload to provided pool
$pool.execute(|| $expr).await
}
}
.await?;
#[cfg(feature = "compute-validation")]
$crate::compute::validation::validate_medium(_start.elapsed());
result
}};
}
/// Execute a large compute task (>1ms) on the Rayon thread pool.
///
/// This macro always offloads to Rayon as the overhead is negligible
/// compared to the computation time.
///
/// # Example
/// ```ignore
/// # use dynamo_runtime::{compute_large, compute::ComputePool};
/// # async fn example(pool: &ComputePool) {
/// // With thread-local context
/// let result = compute_large!({
/// expensive_matrix_multiplication()
/// }).await;
///
/// // Or with explicit pool
/// let result = compute_large!(pool, {
/// expensive_matrix_multiplication()
/// }).await;
/// # }
/// ```
#[macro_export]
macro_rules! compute_large {
// Thread-local version
($expr:expr) => {{
#[cfg(feature = "compute-validation")]
let _start = std::time::Instant::now();
let result = async {
if let Some(pool) = $crate::compute::thread_local::get_pool() {
pool.execute(|| $expr).await
} else {
// No pool available - fall back to inline execution
// Warning: Large tasks inline will severely block the async runtime
tracing::warn!("compute_large: No thread-local context, executing inline (will block async runtime!)");
Ok($expr)
}
}
.await?;
#[cfg(feature = "compute-validation")]
$crate::compute::validation::validate_large(_start.elapsed());
result
}};
// Explicit pool version
($pool:expr, $expr:expr) => {{
#[cfg(feature = "compute-validation")]
let _start = std::time::Instant::now();
let result = $pool.execute(|| $expr).await?;
#[cfg(feature = "compute-validation")]
$crate::compute::validation::validate_large(_start.elapsed());
result
}};
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Metrics for monitoring compute pool operations
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
use std::time::Duration;
/// Metrics for the compute pool
#[derive(Debug)]
pub struct ComputeMetrics {
/// Total number of tasks executed
tasks_total: AtomicU64,
/// Number of tasks currently running
tasks_active: AtomicUsize,
/// Total time spent in compute tasks (microseconds)
total_compute_time_us: AtomicU64,
/// Maximum task duration seen (microseconds)
max_task_duration_us: AtomicU64,
/// Number of tasks that took longer than 100ms
slow_tasks: AtomicU64,
}
impl ComputeMetrics {
/// Create new metrics instance
pub fn new() -> Self {
Self {
tasks_total: AtomicU64::new(0),
tasks_active: AtomicUsize::new(0),
total_compute_time_us: AtomicU64::new(0),
max_task_duration_us: AtomicU64::new(0),
slow_tasks: AtomicU64::new(0),
}
}
/// Record that a task has started
pub fn record_task_start(&self) {
self.tasks_active.fetch_add(1, Ordering::Relaxed);
}
/// Record that a task has completed
pub fn record_task_completion(&self, duration: Duration) {
self.tasks_active.fetch_sub(1, Ordering::Relaxed);
self.tasks_total.fetch_add(1, Ordering::Relaxed);
// Use saturating conversion to prevent overflow
let duration_us = duration.as_micros().min(u64::MAX as u128) as u64;
self.total_compute_time_us
.fetch_add(duration_us, Ordering::Relaxed);
// Update max duration
let mut current_max = self.max_task_duration_us.load(Ordering::Relaxed);
while duration_us > current_max {
match self.max_task_duration_us.compare_exchange_weak(
current_max,
duration_us,
Ordering::SeqCst,
Ordering::Relaxed,
) {
Ok(_) => break,
Err(x) => current_max = x,
}
}
// Track slow tasks (> 100ms)
if duration.as_millis() > 100 {
self.slow_tasks.fetch_add(1, Ordering::Relaxed);
}
}
/// Get total number of tasks executed
pub fn tasks_total(&self) -> u64 {
self.tasks_total.load(Ordering::Relaxed)
}
/// Get number of currently active tasks
pub fn tasks_active(&self) -> usize {
self.tasks_active.load(Ordering::Relaxed)
}
/// Get average task duration in microseconds
pub fn avg_task_duration_us(&self) -> f64 {
let total = self.tasks_total.load(Ordering::Relaxed);
if total == 0 {
return 0.0;
}
let total_time = self.total_compute_time_us.load(Ordering::Relaxed);
total_time as f64 / total as f64
}
/// Get maximum task duration in microseconds
pub fn max_task_duration_us(&self) -> u64 {
self.max_task_duration_us.load(Ordering::Relaxed)
}
/// Get number of slow tasks (> 100ms)
pub fn slow_tasks(&self) -> u64 {
self.slow_tasks.load(Ordering::Relaxed)
}
/// Reset all metrics
pub fn reset(&self) {
self.tasks_total.store(0, Ordering::Relaxed);
self.tasks_active.store(0, Ordering::Relaxed);
self.total_compute_time_us.store(0, Ordering::Relaxed);
self.max_task_duration_us.store(0, Ordering::Relaxed);
self.slow_tasks.store(0, Ordering::Relaxed);
}
}
impl Default for ComputeMetrics {
fn default() -> Self {
Self::new()
}
}
/// Format metrics as a human-readable string
impl std::fmt::Display for ComputeMetrics {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"ComputeMetrics {{ tasks_total: {}, tasks_active: {}, avg_duration_ms: {:.2}, max_duration_ms: {:.2}, slow_tasks: {} }}",
self.tasks_total(),
self.tasks_active(),
self.avg_task_duration_us() / 1000.0,
self.max_task_duration_us() as f64 / 1000.0,
self.slow_tasks(),
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_metrics_recording() {
let metrics = ComputeMetrics::new();
assert_eq!(metrics.tasks_total(), 0);
assert_eq!(metrics.tasks_active(), 0);
metrics.record_task_start();
assert_eq!(metrics.tasks_active(), 1);
metrics.record_task_completion(Duration::from_millis(50));
assert_eq!(metrics.tasks_active(), 0);
assert_eq!(metrics.tasks_total(), 1);
assert_eq!(metrics.slow_tasks(), 0);
metrics.record_task_start();
metrics.record_task_completion(Duration::from_millis(150));
assert_eq!(metrics.tasks_total(), 2);
assert_eq!(metrics.slow_tasks(), 1);
}
#[test]
fn test_metrics_reset() {
let metrics = ComputeMetrics::new();
metrics.record_task_start();
metrics.record_task_completion(Duration::from_millis(50));
assert_eq!(metrics.tasks_total(), 1);
metrics.reset();
assert_eq!(metrics.tasks_total(), 0);
assert_eq!(metrics.tasks_active(), 0);
}
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Compute module for CPU-intensive operations using Rayon
//!
//! This module provides a dedicated compute thread pool for CPU-bound work,
//! integrating Rayon's fork-join parallelism with Tokio's async runtime.
//!
//! Key features:
//! - Dedicated Rayon thread pool for compute operations
//! - Seamless async-to-sync bridging via tokio-rayon
//! - Scope-based parallelism for complex computational graphs
//! - Metrics and monitoring for compute operations
//!
#![doc = include_str!("../../docs/rayon-tokio-strategy.md")]
use anyhow::Result;
use rayon::ThreadPoolBuilder;
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::Instant;
pub mod macros;
pub mod metrics;
pub mod pool;
pub mod thread_local;
#[cfg(feature = "compute-validation")]
pub mod validation;
pub use metrics::ComputeMetrics;
pub use pool::{ComputeHandle, ComputePool, ComputePoolExt};
/// Configuration for the compute thread pool
#[derive(Debug, Clone)]
pub struct ComputeConfig {
/// Number of threads in the Rayon pool (defaults to num_cpus / 2)
pub num_threads: Option<usize>,
/// Stack size for compute threads (defaults to 2MB)
pub stack_size: Option<usize>,
/// Thread name prefix (defaults to "compute")
pub thread_prefix: String,
/// Whether to pin threads to CPU cores
pub pin_threads: bool,
}
impl Default for ComputeConfig {
fn default() -> Self {
Self {
num_threads: None, // Will use num_cpus / 2
stack_size: Some(2 * 1024 * 1024), // 2MB
thread_prefix: "compute".to_string(),
pin_threads: false,
}
}
}
impl ComputeConfig {
/// Validate the configuration
pub fn validate(&self) -> Result<()> {
if let Some(num_threads) = self.num_threads
&& num_threads == 0
{
return Err(anyhow::anyhow!(
"Number of compute threads cannot be 0. Use None to disable compute pool entirely."
));
}
if let Some(stack_size) = self.stack_size
&& stack_size < 128 * 1024
{
return Err(anyhow::anyhow!(
"Stack size too small: {}KB. Minimum recommended: 128KB",
stack_size / 1024
));
}
Ok(())
}
/// Create a ThreadPoolBuilder from this configuration
pub(crate) fn build_pool(&self) -> Result<rayon::ThreadPool> {
// Validate configuration first
self.validate()?;
let mut builder = ThreadPoolBuilder::new();
// Set number of threads with better logic for minimum parallelism
let num_threads = self.num_threads.unwrap_or_else(|| {
std::thread::available_parallelism()
.map(|n| {
let total_cores = n.get();
// Use half the cores, but ensure we have at least 2 threads
// for meaningful parallelism, and cap at 16 for efficiency
(total_cores / 2).clamp(2, 16)
})
.unwrap_or(2) // Fallback to 2 threads if detection fails
});
builder = builder.num_threads(num_threads);
// Set stack size if specified
if let Some(stack_size) = self.stack_size {
builder = builder.stack_size(stack_size);
}
// Set thread name prefix
let prefix = self.thread_prefix.clone();
let thread_counter = Arc::new(AtomicU64::new(0));
builder = builder.thread_name(move |_| {
let id = thread_counter.fetch_add(1, Ordering::SeqCst);
format!("{}-{}", prefix, id)
});
// TODO: Add CPU pinning if requested
// if self.pin_threads {
// builder = builder.start_handler(|idx| {
// // Pin thread to CPU core
// });
// }
builder
.build()
.map_err(|e| anyhow::anyhow!("Failed to create Rayon thread pool: {}", e))
}
}
/// Helper trait for scope-based operations
pub trait ScopeExecutor {
/// Execute a function within a Rayon scope
fn execute_in_scope<F, R>(&self, f: F) -> R
where
F: FnOnce(&rayon::Scope) -> R + Send,
R: Send;
}
/// Helper functions for common parallel patterns
pub mod patterns {
use super::*;
/// Execute two functions in parallel and return both results
pub async fn parallel_join<F1, F2, R1, R2>(
pool: &ComputePool,
f1: F1,
f2: F2,
) -> Result<(R1, R2)>
where
F1: FnOnce() -> R1 + Send + 'static,
F2: FnOnce() -> R2 + Send + 'static,
R1: Send + 'static,
R2: Send + 'static,
{
pool.execute(move || rayon::join(f1, f2)).await
}
/// Execute multiple functions in parallel using scope
pub async fn parallel_map<F, T, R>(pool: &ComputePool, items: Vec<T>, f: F) -> Result<Vec<R>>
where
F: Fn(T) -> R + Sync + Send + 'static,
T: Send + 'static,
R: Send + 'static,
{
use rayon::prelude::*;
pool.execute(move || items.into_par_iter().map(f).collect())
.await
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compute_config_default() {
let config = ComputeConfig::default();
assert_eq!(config.thread_prefix, "compute");
assert_eq!(config.stack_size, Some(2 * 1024 * 1024));
assert!(!config.pin_threads);
}
#[test]
fn test_build_pool() {
let config = ComputeConfig {
num_threads: Some(2),
..Default::default()
};
let pool = config.build_pool().unwrap();
assert_eq!(pool.current_num_threads(), 2);
}
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Compute pool implementation with tokio-rayon integration
//!
//! The `ComputePool` allows multiple async tasks to concurrently submit different
//! types of parallel work to a shared Rayon thread pool. This enables efficient
//! CPU utilization without manual thread management.
//!
//! # Concurrent Usage Example
//!
//! ```ignore
//! use std::sync::Arc;
//! use dynamo_runtime::compute::ComputePool;
//! use rayon::prelude::*;
//!
//! async fn concurrent_processing(pool: Arc<ComputePool>) {
//! // Task 1: Using scope for dynamic task generation
//! let task1 = tokio::spawn({
//! let pool = pool.clone();
//! async move {
//! pool.execute_scoped(|scope| {
//! // Dynamically spawn tasks based on runtime conditions
//! for i in 0..100 {
//! scope.spawn(move |_| {
//! // CPU-intensive work
//! let mut sum = 0u64;
//! for j in 0..1000 {
//! sum += (i * j) as u64;
//! }
//! sum
//! });
//! }
//! }).await
//! }
//! });
//!
//! // Task 2: Using parallel iterators for batch processing
//! let task2 = tokio::spawn({
//! let pool = pool.clone();
//! async move {
//! let data: Vec<u32> = (0..10000).collect();
//! pool.install(|| {
//! data.par_chunks(100)
//! .map(|chunk| chunk.iter().sum::<u32>())
//! .collect::<Vec<_>>()
//! }).await
//! }
//! });
//!
//! // Both tasks run concurrently, sharing the same thread pool
//! let (result1, result2) = tokio::join!(task1, task2);
//! }
//! ```
//!
//! # Thread Pool Sharing
//!
//! The Rayon thread pool uses work-stealing to efficiently distribute work from
//! multiple concurrent sources:
//!
//! - Tasks from `scope.spawn()` are pushed to thread-local deques
//! - Parallel iterators distribute work across all threads
//! - Idle threads steal work from busy threads
//! - No coordination needed between different parallelization patterns
use super::{ComputeConfig, ComputeMetrics};
use anyhow::Result;
use async_trait::async_trait;
use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
/// A compute pool that manages CPU-intensive operations
#[derive(Clone)]
pub struct ComputePool {
/// The underlying Rayon thread pool
pool: Arc<rayon::ThreadPool>,
/// Metrics for monitoring compute operations
metrics: Arc<ComputeMetrics>,
/// Configuration used to create this pool
config: ComputeConfig,
}
impl std::fmt::Debug for ComputePool {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ComputePool")
.field("num_threads", &self.pool.current_num_threads())
.field("metrics", &self.metrics)
.field("config", &self.config)
.finish()
}
}
impl ComputePool {
/// Create a new compute pool with the given configuration
pub fn new(config: ComputeConfig) -> Result<Self> {
let pool = config.build_pool()?;
let metrics = Arc::new(ComputeMetrics::new());
Ok(Self {
pool: Arc::new(pool),
metrics,
config,
})
}
/// Create a compute pool with default configuration
pub fn with_defaults() -> Result<Self> {
Self::new(ComputeConfig::default())
}
/// Execute a synchronous computation on the thread pool
///
/// This method is designed to be called from within `spawn_blocking` or other
/// synchronous contexts. It has minimal overhead as it directly uses Rayon
/// without the async bridge.
///
/// # Example
/// ```ignore
/// # use dynamo_runtime::compute::ComputePool;
/// # let pool = ComputePool::new(Default::default()).unwrap();
/// tokio::task::spawn_blocking(move || {
/// pool.execute_sync(|| {
/// // CPU-intensive work
/// expensive_computation()
/// })
/// });
/// ```
pub fn execute_sync<F, R>(&self, f: F) -> R
where
F: FnOnce() -> R + Send,
R: Send,
{
self.pool.install(f)
}
/// Execute a compute task in the Rayon pool
///
/// This bridges from async context to the Rayon thread pool,
/// allowing CPU-intensive work to run without blocking Tokio workers.
///
/// Note: This method has ~25μs overhead for small tasks due to the async
/// channel communication. For very small computations (<100μs), consider
/// running directly on Tokio or using `spawn_blocking` with `execute_sync`.
pub async fn execute<F, R>(&self, f: F) -> Result<R>
where
F: FnOnce() -> R + Send + 'static,
R: Send + 'static,
{
self.metrics.record_task_start();
let start = std::time::Instant::now();
// Use tokio-rayon to bridge to the compute pool
let pool = self.pool.clone();
let result = tokio_rayon::spawn(move || pool.install(f)).await;
self.metrics.record_task_completion(start.elapsed());
Ok(result)
}
/// Execute a function with a Rayon scope
///
/// This allows spawning multiple parallel tasks within the scope,
/// with the guarantee that all tasks complete before returning.
pub async fn execute_scoped<F, R>(&self, f: F) -> Result<R>
where
F: FnOnce(&rayon::Scope) -> R + Send + 'static,
R: Send + 'static,
{
self.metrics.record_task_start();
let start = std::time::Instant::now();
let pool = self.pool.clone();
let result = tokio_rayon::spawn(move || {
pool.install(|| {
let mut result = None;
rayon::scope(|s| {
result = Some(f(s));
});
result.unwrap()
})
})
.await;
self.metrics.record_task_completion(start.elapsed());
Ok(result)
}
/// Execute a function with a FIFO scope
///
/// Similar to execute_scoped, but tasks are prioritized in FIFO order
/// rather than the default LIFO order.
pub async fn execute_scoped_fifo<F, R>(&self, f: F) -> Result<R>
where
F: FnOnce(&rayon::ScopeFifo) -> R + Send + 'static,
R: Send + 'static,
{
self.metrics.record_task_start();
let start = std::time::Instant::now();
let pool = self.pool.clone();
let result = tokio_rayon::spawn(move || {
pool.install(|| {
let mut result = None;
rayon::scope_fifo(|s| {
result = Some(f(s));
});
result.unwrap()
})
})
.await;
self.metrics.record_task_completion(start.elapsed());
Ok(result)
}
/// Join two computations in parallel
pub async fn join<F1, F2, R1, R2>(&self, f1: F1, f2: F2) -> Result<(R1, R2)>
where
F1: FnOnce() -> R1 + Send + 'static,
F2: FnOnce() -> R2 + Send + 'static,
R1: Send + 'static,
R2: Send + 'static,
{
self.execute(move || rayon::join(f1, f2)).await
}
/// Get metrics for this compute pool
pub fn metrics(&self) -> &ComputeMetrics {
&self.metrics
}
/// Get the number of threads in the pool
pub fn num_threads(&self) -> usize {
self.pool.current_num_threads()
}
/// Install this pool as the Rayon pool for the given closure
///
/// This method is essential for using Rayon's parallel iterators (like `par_iter`,
/// `par_chunks`, etc.) with this specific thread pool. Any parallel iterator
/// operations within the closure will execute on this pool's threads.
///
/// # Example
///
/// ```ignore
/// use rayon::prelude::*;
///
/// // Process data using parallel iterators
/// let result = pool.install(|| {
/// data.par_chunks(100)
/// .map(|chunk| process_chunk(chunk))
/// .collect::<Vec<_>>()
/// }).await?;
/// ```
///
/// # Concurrent Usage
///
/// Multiple async tasks can call `install()` concurrently on the same pool.
/// The Rayon work-stealing scheduler will efficiently distribute work from
/// all concurrent operations:
///
/// ```ignore
/// // These can run concurrently without interference
/// let task1 = pool.install(|| data1.par_iter().map(f1).collect());
/// let task2 = pool.install(|| data2.par_chunks(50).map(f2).collect());
/// ```
pub async fn install<F, R>(&self, f: F) -> Result<R>
where
F: FnOnce() -> R + Send + 'static,
R: Send + 'static,
{
let pool = self.pool.clone();
self.metrics.record_task_start();
let start = std::time::Instant::now();
let result = tokio_rayon::spawn(move || pool.install(f)).await;
self.metrics.record_task_completion(start.elapsed());
Ok(result)
}
}
/// A handle to a compute task that's currently running
pub struct ComputeHandle<T> {
inner: Pin<Box<dyn Future<Output = T> + Send>>,
}
impl<T> ComputeHandle<T> {
/// Create a new compute handle from a future
pub(crate) fn new<F>(future: F) -> Self
where
F: Future<Output = T> + Send + 'static,
{
Self {
inner: Box::pin(future),
}
}
}
impl<T> Future for ComputeHandle<T> {
type Output = T;
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
self.inner.as_mut().poll(cx)
}
}
/// Extension trait for ComputePool with additional patterns
#[async_trait]
pub trait ComputePoolExt {
/// Process items in parallel batches
async fn parallel_batch<T, F, R>(
&self,
items: Vec<T>,
batch_size: usize,
f: F,
) -> Result<Vec<R>>
where
T: Send + Sync + 'static,
F: Fn(&[T]) -> Vec<R> + Send + Sync + 'static,
R: Send + 'static;
/// Map over items in parallel using Rayon's par_iter
async fn parallel_map<T, F, R>(&self, items: Vec<T>, f: F) -> Result<Vec<R>>
where
T: Send + Sync + 'static,
F: Fn(T) -> R + Send + Sync + 'static,
R: Send + 'static;
}
#[async_trait]
impl ComputePoolExt for ComputePool {
async fn parallel_batch<T, F, R>(
&self,
items: Vec<T>,
batch_size: usize,
f: F,
) -> Result<Vec<R>>
where
T: Send + Sync + 'static,
F: Fn(&[T]) -> Vec<R> + Send + Sync + 'static,
R: Send + 'static,
{
use rayon::prelude::*;
self.install(move || items.par_chunks(batch_size).flat_map(f).collect())
.await
}
async fn parallel_map<T, F, R>(&self, items: Vec<T>, f: F) -> Result<Vec<R>>
where
T: Send + Sync + 'static,
F: Fn(T) -> R + Send + Sync + 'static,
R: Send + 'static,
{
use rayon::prelude::*;
self.install(move || items.into_par_iter().map(f).collect())
.await
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Mutex;
#[tokio::test]
async fn test_compute_pool_execute() {
let pool = ComputePool::with_defaults().unwrap();
let result = pool
.execute(|| {
// Simulate CPU-intensive work
let mut sum = 0u64;
for i in 0..1000 {
sum += i;
}
sum
})
.await
.unwrap();
assert_eq!(result, 499500);
}
#[tokio::test]
async fn test_compute_pool_join() {
let pool = ComputePool::with_defaults().unwrap();
let (a, b) = pool.join(|| 2 + 2, || 3 * 3).await.unwrap();
assert_eq!(a, 4);
assert_eq!(b, 9);
}
#[tokio::test]
async fn test_compute_pool_execute_sync() {
let pool = Arc::new(ComputePool::with_defaults().unwrap());
// Test using execute_sync from spawn_blocking
let pool_clone = pool.clone();
let result = tokio::task::spawn_blocking(move || {
pool_clone.execute_sync(|| {
let mut sum = 0u64;
for i in 0..1000 {
sum += i;
}
sum
})
})
.await
.unwrap();
assert_eq!(result, 499500);
}
#[tokio::test]
async fn test_compute_pool_scoped() {
use std::sync::mpsc;
let pool = ComputePool::with_defaults().unwrap();
let mut result = pool
.execute_scoped(|scope| {
let (tx, rx) = mpsc::channel();
for i in 0..4 {
let tx = tx.clone();
scope.spawn(move |_| {
tx.send((i, i * 2)).unwrap();
});
}
drop(tx); // Close sender so receiver can finish
let mut results = vec![0; 4];
for (i, val) in rx {
results[i] = val;
}
results
})
.await
.unwrap();
// Results may be in any order due to parallel execution
result.sort();
assert_eq!(result, vec![0, 2, 4, 6]);
}
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Thread-local storage for compute resources
//!
//! This module provides thread-local access to compute resources (Rayon pool and semaphore)
//! for Tokio worker threads. This eliminates the need to pass Runtime or ComputePool
//! references through async function calls.
use super::ComputePool;
use std::cell::RefCell;
use std::sync::Arc;
use tokio::sync::Semaphore;
thread_local! {
/// Thread-local compute context available on Tokio worker threads
static COMPUTE_CONTEXT: RefCell<Option<ComputeContext>> = const { RefCell::new(None) };
}
/// Compute resources available to a Tokio worker thread
#[derive(Clone)]
pub struct ComputeContext {
/// The Rayon compute pool
pub pool: Arc<ComputePool>,
/// Semaphore for block_in_place permits
pub block_in_place_permits: Arc<Semaphore>,
}
/// Initialize the thread-local compute context
///
/// This should be called from the Tokio runtime's `on_thread_start` callback
pub fn initialize_context(pool: Arc<ComputePool>, permits: Arc<Semaphore>) {
COMPUTE_CONTEXT.with(|ctx| {
*ctx.borrow_mut() = Some(ComputeContext {
pool,
block_in_place_permits: permits,
});
});
}
/// Access the thread-local compute context
///
/// Returns None if called from a non-worker thread or if context wasn't initialized
pub fn with_context<F, R>(f: F) -> Option<R>
where
F: FnOnce(&ComputeContext) -> R,
{
COMPUTE_CONTEXT.with(|ctx| ctx.borrow().as_ref().map(f))
}
/// Try to acquire a block_in_place permit from thread-local context
///
/// Returns Ok(permit) if successful, Err if no context or no permits available
pub fn try_acquire_block_permit() -> Result<tokio::sync::OwnedSemaphorePermit, &'static str> {
with_context(|ctx| {
ctx.block_in_place_permits
.clone()
.try_acquire_owned()
.map_err(|_| "No permits available")
})
.ok_or("No compute context on this thread")?
}
/// Get the compute pool from thread-local context
///
/// Returns None if called from a non-worker thread
pub fn get_pool() -> Option<Arc<ComputePool>> {
with_context(|ctx| ctx.pool.clone())
}
/// Check if the current thread has compute context initialized
///
/// Returns true if the thread-local context is initialized with a compute pool
/// and semaphore permits, meaning the compute macros will offload work.
/// Returns false if macros would fall back to inline execution.
pub fn has_compute_context() -> bool {
with_context(|_| ()).is_some()
}
/// Assert that the current thread has compute context initialized
///
/// Panics if the thread-local context is not initialized.
/// Use this to ensure compute macros will offload work rather than run inline.
pub fn assert_compute_context() {
if !has_compute_context() {
panic!(
"Thread-local compute context not initialized! \
Compute macros will fall back to inline execution. \
Call Runtime::initialize_thread_local() on worker threads."
);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_uninitialized_context() {
// Should return None when context not initialized
assert!(get_pool().is_none());
assert!(try_acquire_block_permit().is_err());
assert!(!has_compute_context());
}
#[test]
#[should_panic(expected = "Thread-local compute context not initialized")]
fn test_assert_compute_context_panics() {
// Should panic when context not initialized
assert_compute_context();
}
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Validation module for compute task timing
//!
//! This module is only compiled when the `compute-validation` feature is enabled.
//! It provides functions to validate that compute tasks are correctly classified
//! as small, medium, or large based on their execution time.
#[cfg(feature = "compute-validation")]
use std::sync::atomic::{AtomicU64, Ordering};
#[cfg(feature = "compute-validation")]
use std::time::Duration;
#[cfg(feature = "compute-validation")]
use tracing::warn;
/// Threshold for small tasks in microseconds (<100μs)
#[cfg(feature = "compute-validation")]
pub const SMALL_THRESHOLD_US: u64 = 100;
/// Threshold for medium tasks in microseconds (100μs - 1ms)
#[cfg(feature = "compute-validation")]
pub const MEDIUM_THRESHOLD_US: u64 = 1000;
// Metrics counters for misclassified tasks
#[cfg(feature = "compute-validation")]
static SMALL_MISCLASSIFIED: AtomicU64 = AtomicU64::new(0);
#[cfg(feature = "compute-validation")]
static MEDIUM_MISCLASSIFIED: AtomicU64 = AtomicU64::new(0);
#[cfg(feature = "compute-validation")]
static LARGE_MISCLASSIFIED: AtomicU64 = AtomicU64::new(0);
/// Validate that a task classified as small actually completed within the small threshold
#[cfg(feature = "compute-validation")]
pub fn validate_small(elapsed: Duration) {
let micros = elapsed.as_micros() as u64;
if micros > SMALL_THRESHOLD_US {
SMALL_MISCLASSIFIED.fetch_add(1, Ordering::Relaxed);
warn!(
task_duration_us = micros,
threshold_us = SMALL_THRESHOLD_US,
"compute_small! task exceeded threshold. Consider using compute_medium!"
);
}
}
/// Validate that a task classified as medium is within the medium range
#[cfg(feature = "compute-validation")]
pub fn validate_medium(elapsed: Duration) {
let micros = elapsed.as_micros() as u64;
if micros < SMALL_THRESHOLD_US {
MEDIUM_MISCLASSIFIED.fetch_add(1, Ordering::Relaxed);
warn!(
task_duration_us = micros,
threshold_us = SMALL_THRESHOLD_US,
"compute_medium! task below small threshold. Consider using compute_small!"
);
} else if micros > MEDIUM_THRESHOLD_US {
MEDIUM_MISCLASSIFIED.fetch_add(1, Ordering::Relaxed);
warn!(
task_duration_us = micros,
threshold_us = MEDIUM_THRESHOLD_US,
"compute_medium! task exceeded threshold. Consider using compute_large!"
);
}
}
/// Validate that a task classified as large actually needed offloading
#[cfg(feature = "compute-validation")]
pub fn validate_large(elapsed: Duration) {
let micros = elapsed.as_micros() as u64;
if micros < MEDIUM_THRESHOLD_US {
LARGE_MISCLASSIFIED.fetch_add(1, Ordering::Relaxed);
warn!(
task_duration_us = micros,
threshold_us = MEDIUM_THRESHOLD_US,
"compute_large! task below medium threshold. Consider using compute_medium! or compute_small!"
);
}
}
/// Get metrics about misclassified tasks
#[cfg(feature = "compute-validation")]
pub fn get_misclassification_metrics() -> (u64, u64, u64) {
(
SMALL_MISCLASSIFIED.load(Ordering::Relaxed),
MEDIUM_MISCLASSIFIED.load(Ordering::Relaxed),
LARGE_MISCLASSIFIED.load(Ordering::Relaxed),
)
}
/// Reset misclassification metrics
#[cfg(feature = "compute-validation")]
pub fn reset_misclassification_metrics() {
SMALL_MISCLASSIFIED.store(0, Ordering::Relaxed);
MEDIUM_MISCLASSIFIED.store(0, Ordering::Relaxed);
LARGE_MISCLASSIFIED.store(0, Ordering::Relaxed);
}
......@@ -131,6 +131,26 @@ pub struct RuntimeConfig {
#[builder_field_attr(serde(skip_serializing_if = "Option::is_none"))]
pub system_live_path: String,
/// Number of threads for the Rayon compute pool
/// If not set, defaults to num_cpus / 2
/// Set this at runtime with environment variable DYN_COMPUTE_THREADS
#[builder(default = "None")]
#[builder_field_attr(serde(skip_serializing_if = "Option::is_none"))]
pub compute_threads: Option<usize>,
/// Stack size for compute threads in bytes
/// Defaults to 2MB (2097152 bytes)
/// Set this at runtime with environment variable DYN_COMPUTE_STACK_SIZE
#[builder(default = "Some(2 * 1024 * 1024)")]
#[builder_field_attr(serde(skip_serializing_if = "Option::is_none"))]
pub compute_stack_size: Option<usize>,
/// Thread name prefix for compute pool threads
/// Set this at runtime with environment variable DYN_COMPUTE_THREAD_PREFIX
#[builder(default = "\"compute\".to_string()")]
#[builder_field_attr(serde(skip_serializing_if = "Option::is_none"))]
pub compute_thread_prefix: String,
/// Enable active health checking with payloads
/// Set this at runtime with environment variable DYN_HEALTH_CHECK_ENABLED
#[builder(default = "false")]
......@@ -225,6 +245,23 @@ impl RuntimeConfig {
_ => None,
}
}))
.merge(Env::prefixed("DYN_COMPUTE_").filter_map(|k| {
let full_key = format!("DYN_COMPUTE_{}", k.as_str());
// filters out empty environment variables
match std::env::var(&full_key) {
Ok(v) if !v.is_empty() => {
// Map DYN_COMPUTE_* to the correct field names
let mapped_key = match k.as_str() {
"THREADS" => "compute_threads",
"STACK_SIZE" => "compute_stack_size",
"THREAD_PREFIX" => "compute_thread_prefix",
_ => k.as_str(),
};
Some(mapped_key.into())
}
_ => None,
}
}))
.merge(Env::prefixed("DYN_HEALTH_CHECK_").filter_map(|k| {
let full_key = format!("DYN_HEALTH_CHECK_{}", k.as_str());
// filters out empty environment variables
......@@ -298,6 +335,9 @@ impl RuntimeConfig {
use_endpoint_health_status: vec![],
system_health_path: DEFAULT_SYSTEM_HEALTH_PATH.to_string(),
system_live_path: DEFAULT_SYSTEM_LIVE_PATH.to_string(),
compute_threads: Some(1),
compute_stack_size: Some(2 * 1024 * 1024),
compute_thread_prefix: "compute".to_string(),
health_check_enabled: false,
canary_wait_time_secs: DEFAULT_CANARY_WAIT_TIME_SECS,
health_check_request_timeout_secs: DEFAULT_HEALTH_CHECK_REQUEST_TIMEOUT_SECS,
......@@ -330,6 +370,9 @@ impl Default for RuntimeConfig {
use_endpoint_health_status: vec![],
system_health_path: DEFAULT_SYSTEM_HEALTH_PATH.to_string(),
system_live_path: DEFAULT_SYSTEM_LIVE_PATH.to_string(),
compute_threads: None,
compute_stack_size: Some(2 * 1024 * 1024),
compute_thread_prefix: "compute".to_string(),
health_check_enabled: false,
canary_wait_time_secs: DEFAULT_CANARY_WAIT_TIME_SECS,
health_check_request_timeout_secs: DEFAULT_HEALTH_CHECK_REQUEST_TIMEOUT_SECS,
......
......@@ -21,6 +21,7 @@ mod config;
pub use config::RuntimeConfig;
pub mod component;
pub mod compute;
pub mod discovery;
pub mod engine;
pub mod health_check;
......@@ -73,6 +74,8 @@ pub struct Runtime {
cancellation_token: CancellationToken,
endpoint_shutdown_token: CancellationToken,
graceful_shutdown_tracker: Arc<GracefulShutdownTracker>,
compute_pool: Option<Arc<compute::ComputePool>>,
block_in_place_permits: Option<Arc<tokio::sync::Semaphore>>,
}
/// Type alias for runtime callback functions to reduce complexity
......
......@@ -44,6 +44,11 @@ impl Runtime {
}
};
// Initialize compute pool with default config
// This will be properly configured when created from RuntimeConfig
let compute_pool = None;
let block_in_place_permits = None;
Ok(Runtime {
id,
primary: runtime,
......@@ -51,9 +56,157 @@ impl Runtime {
cancellation_token,
endpoint_shutdown_token,
graceful_shutdown_tracker: Arc::new(GracefulShutdownTracker::new()),
compute_pool,
block_in_place_permits,
})
}
fn new_with_config(
runtime: RuntimeType,
secondary: Option<RuntimeType>,
config: &RuntimeConfig,
) -> Result<Runtime> {
let mut rt = Self::new(runtime, secondary)?;
// Create compute pool from configuration
let compute_config = crate::compute::ComputeConfig {
num_threads: config.compute_threads,
stack_size: config.compute_stack_size,
thread_prefix: config.compute_thread_prefix.clone(),
pin_threads: false,
};
// Check if compute pool is explicitly disabled
if config.compute_threads == Some(0) {
tracing::info!("Compute pool disabled (compute_threads = 0)");
} else {
match crate::compute::ComputePool::new(compute_config) {
Ok(pool) => {
rt.compute_pool = Some(Arc::new(pool));
tracing::debug!(
"Initialized compute pool with {} threads",
rt.compute_pool.as_ref().unwrap().num_threads()
);
}
Err(e) => {
tracing::warn!(
"Failed to create compute pool: {}. CPU-intensive operations will use spawn_blocking",
e
);
}
}
}
// Initialize block_in_place semaphore based on actual worker threads
let num_workers = config
.num_worker_threads
.unwrap_or_else(|| std::thread::available_parallelism().unwrap().get());
// Reserve at least one thread for async work
let permits = num_workers.saturating_sub(1).max(1);
rt.block_in_place_permits = Some(Arc::new(tokio::sync::Semaphore::new(permits)));
tracing::debug!(
"Initialized block_in_place permits: {} (from {} worker threads)",
permits,
num_workers
);
Ok(rt)
}
/// Initialize thread-local compute context on the current thread
/// This should be called on each Tokio worker thread
pub fn initialize_thread_local(&self) {
if let (Some(pool), Some(permits)) = (&self.compute_pool, &self.block_in_place_permits) {
crate::compute::thread_local::initialize_context(Arc::clone(pool), Arc::clone(permits));
}
}
/// Initialize thread-local compute context on all worker threads using a barrier
/// This ensures every worker thread has its thread-local context initialized
pub async fn initialize_all_thread_locals(&self) -> Result<()> {
if let (Some(pool), Some(permits)) = (&self.compute_pool, &self.block_in_place_permits) {
// First, detect how many worker threads we actually have
let num_workers = self.detect_worker_thread_count().await;
if num_workers == 0 {
return Err(anyhow::anyhow!("No worker threads detected"));
}
// Create a barrier that all threads must reach
let barrier = Arc::new(std::sync::Barrier::new(num_workers));
let init_pool = Arc::clone(pool);
let init_permits = Arc::clone(permits);
// Spawn exactly one blocking task per worker thread
let mut handles = Vec::new();
for i in 0..num_workers {
let barrier_clone = Arc::clone(&barrier);
let pool_clone = Arc::clone(&init_pool);
let permits_clone = Arc::clone(&init_permits);
let handle = tokio::task::spawn_blocking(move || {
// Wait at barrier - ensures all threads are participating
barrier_clone.wait();
// Now initialize thread-local storage
crate::compute::thread_local::initialize_context(pool_clone, permits_clone);
// Get thread ID for logging
let thread_id = std::thread::current().id();
tracing::trace!(
"Initialized thread-local compute context on thread {:?} (worker {})",
thread_id,
i
);
});
handles.push(handle);
}
// Wait for all tasks to complete
for handle in handles {
handle.await?;
}
tracing::info!(
"Successfully initialized thread-local compute context on {} worker threads",
num_workers
);
} else {
tracing::debug!("No compute pool configured, skipping thread-local initialization");
}
Ok(())
}
/// Detect the number of worker threads in the runtime
async fn detect_worker_thread_count(&self) -> usize {
use std::collections::HashSet;
use std::sync::Mutex;
let thread_ids = Arc::new(Mutex::new(HashSet::new()));
let mut handles = Vec::new();
// Spawn many blocking tasks to ensure we hit all threads
// We use spawn_blocking because it runs on worker threads
let num_probes = 100;
for _ in 0..num_probes {
let ids = Arc::clone(&thread_ids);
let handle = tokio::task::spawn_blocking(move || {
let thread_id = std::thread::current().id();
ids.lock().unwrap().insert(thread_id);
});
handles.push(handle);
}
// Wait for all probes to complete
for handle in handles {
let _ = handle.await;
}
let count = thread_ids.lock().unwrap().len();
tracing::debug!("Detected {} worker threads in runtime", count);
count
}
pub fn from_current() -> Result<Runtime> {
Runtime::from_handle(tokio::runtime::Handle::current())
}
......@@ -71,7 +224,7 @@ impl Runtime {
let runtime = Arc::new(config.create_runtime()?);
let primary = RuntimeType::Shared(runtime.clone());
let secondary = RuntimeType::External(runtime.handle().clone());
Runtime::new(primary, Some(secondary))
Runtime::new_with_config(primary, Some(secondary), &config)
}
/// Create a [`Runtime`] with two single-threaded async tokio runtime
......@@ -111,6 +264,13 @@ impl Runtime {
self.graceful_shutdown_tracker.clone()
}
/// Get access to the compute pool for CPU-intensive operations
///
/// Returns None if the compute pool was not initialized (e.g., due to configuration error)
pub fn compute_pool(&self) -> Option<&Arc<crate::compute::ComputePool>> {
self.compute_pool.as_ref()
}
/// Shuts down the [`Runtime`] instance
pub fn shutdown(&self) {
tracing::info!("Runtime shutdown initiated");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment