Unverified Commit 91a459c0 authored by GuanLuo's avatar GuanLuo Committed by GitHub
Browse files

feat: KServe gRPC support (#2638)

parent ef535edb
...@@ -100,11 +100,11 @@ jobs: ...@@ -100,11 +100,11 @@ jobs:
# Have an explicit step to build tests first to separate time spent on build vs execution. # Have an explicit step to build tests first to separate time spent on build vs execution.
- name: Compile Tests - name: Compile Tests
working-directory: ${{ matrix.dir }} working-directory: ${{ matrix.dir }}
run: cargo test --locked --no-run run: cargo test --locked --no-run --target-dir ${HOME}/tmp || df -h
- name: Run Doc Tests - name: Run Doc Tests
working-directory: ${{ matrix.dir }} working-directory: ${{ matrix.dir }}
run: cargo doc --no-deps && cargo test --locked --doc run: cargo doc --no-deps && cargo test --locked --doc
- name: Run Unit Tests - name: Run Unit Tests
working-directory: ${{ matrix.dir }} working-directory: ${{ matrix.dir }}
# NOTE: --all-targets doesn't run doc tests # NOTE: --all-targets doesn't run doc tests
run: cargo test --locked --all-targets run: cargo test --locked --all-targets --target-dir ${HOME}/tmp
...@@ -33,7 +33,7 @@ version = "0.8.12" ...@@ -33,7 +33,7 @@ version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"getrandom 0.3.3", "getrandom 0.3.3",
"once_cell", "once_cell",
"serde", "serde",
...@@ -88,9 +88,9 @@ dependencies = [ ...@@ -88,9 +88,9 @@ dependencies = [
[[package]] [[package]]
name = "anstream" name = "anstream"
version = "0.6.19" version = "0.6.20"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192"
dependencies = [ dependencies = [
"anstyle", "anstyle",
"anstyle-parse", "anstyle-parse",
...@@ -118,29 +118,29 @@ dependencies = [ ...@@ -118,29 +118,29 @@ dependencies = [
[[package]] [[package]]
name = "anstyle-query" name = "anstyle-query"
version = "1.1.3" version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2"
dependencies = [ dependencies = [
"windows-sys 0.59.0", "windows-sys 0.60.2",
] ]
[[package]] [[package]]
name = "anstyle-wincon" name = "anstyle-wincon"
version = "3.0.9" version = "3.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a"
dependencies = [ dependencies = [
"anstyle", "anstyle",
"once_cell_polyfill", "once_cell_polyfill",
"windows-sys 0.59.0", "windows-sys 0.60.2",
] ]
[[package]] [[package]]
name = "anyhow" name = "anyhow"
version = "1.0.98" version = "1.0.99"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
[[package]] [[package]]
name = "apodize" name = "apodize"
...@@ -159,9 +159,9 @@ dependencies = [ ...@@ -159,9 +159,9 @@ dependencies = [
[[package]] [[package]]
name = "arbitrary" name = "arbitrary"
version = "1.4.1" version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
dependencies = [ dependencies = [
"derive_arbitrary", "derive_arbitrary",
] ]
...@@ -180,7 +180,7 @@ checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea" ...@@ -180,7 +180,7 @@ checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -257,7 +257,7 @@ checksum = "0289cba6d5143bfe8251d57b4a8cac036adf158525a76533a7082ba65ec76398" ...@@ -257,7 +257,7 @@ checksum = "0289cba6d5143bfe8251d57b4a8cac036adf158525a76533a7082ba65ec76398"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -279,18 +279,18 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" ...@@ -279,18 +279,18 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
name = "async-trait" name = "async-trait"
version = "0.1.88" version = "0.1.89"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -439,7 +439,7 @@ dependencies = [ ...@@ -439,7 +439,7 @@ dependencies = [
"http 1.3.1", "http 1.3.1",
"http-body 1.0.1", "http-body 1.0.1",
"http-body-util", "http-body-util",
"hyper 1.6.0", "hyper 1.7.0",
"hyper-util", "hyper-util",
"itoa", "itoa",
"matchit 0.8.4", "matchit 0.8.4",
...@@ -508,7 +508,7 @@ checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c" ...@@ -508,7 +508,7 @@ checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -522,7 +522,7 @@ dependencies = [ ...@@ -522,7 +522,7 @@ dependencies = [
"fs-err", "fs-err",
"http 1.3.1", "http 1.3.1",
"http-body 1.0.1", "http-body 1.0.1",
"hyper 1.6.0", "hyper 1.7.0",
"hyper-util", "hyper-util",
"pin-project-lite", "pin-project-lite",
"rustls", "rustls",
...@@ -554,7 +554,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -554,7 +554,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
dependencies = [ dependencies = [
"addr2line", "addr2line",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"libc", "libc",
"miniz_oxide", "miniz_oxide",
"object", "object",
...@@ -601,10 +601,10 @@ version = "0.69.5" ...@@ -601,10 +601,10 @@ version = "0.69.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"cexpr", "cexpr",
"clang-sys", "clang-sys",
"itertools 0.10.5", "itertools 0.12.1",
"lazy_static", "lazy_static",
"lazycell", "lazycell",
"log", "log",
...@@ -614,7 +614,7 @@ dependencies = [ ...@@ -614,7 +614,7 @@ dependencies = [
"regex", "regex",
"rustc-hash 1.1.0", "rustc-hash 1.1.0",
"shlex", "shlex",
"syn 2.0.104", "syn 2.0.106",
"which", "which",
] ]
...@@ -624,10 +624,10 @@ version = "0.71.1" ...@@ -624,10 +624,10 @@ version = "0.71.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"cexpr", "cexpr",
"clang-sys", "clang-sys",
"itertools 0.10.5", "itertools 0.13.0",
"log", "log",
"prettyplease", "prettyplease",
"proc-macro2", "proc-macro2",
...@@ -635,7 +635,7 @@ dependencies = [ ...@@ -635,7 +635,7 @@ dependencies = [
"regex", "regex",
"rustc-hash 2.1.1", "rustc-hash 2.1.1",
"shlex", "shlex",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -644,10 +644,10 @@ version = "0.72.0" ...@@ -644,10 +644,10 @@ version = "0.72.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f72209734318d0b619a5e0f5129918b848c416e122a3c4ce054e03cb87b726f" checksum = "4f72209734318d0b619a5e0f5129918b848c416e122a3c4ce054e03cb87b726f"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"cexpr", "cexpr",
"clang-sys", "clang-sys",
"itertools 0.10.5", "itertools 0.13.0",
"log", "log",
"prettyplease", "prettyplease",
"proc-macro2", "proc-macro2",
...@@ -655,7 +655,7 @@ dependencies = [ ...@@ -655,7 +655,7 @@ dependencies = [
"regex", "regex",
"rustc-hash 2.1.1", "rustc-hash 2.1.1",
"shlex", "shlex",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -723,9 +723,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" ...@@ -723,9 +723,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.9.1" version = "2.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d"
[[package]] [[package]]
name = "bitstream-io" name = "bitstream-io"
...@@ -742,7 +742,7 @@ dependencies = [ ...@@ -742,7 +742,7 @@ dependencies = [
"arrayref", "arrayref",
"arrayvec", "arrayvec",
"cc", "cc",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"constant_time_eq", "constant_time_eq",
] ]
...@@ -793,7 +793,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -793,7 +793,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [ dependencies = [
"memchr", "memchr",
"regex-automata 0.4.9", "regex-automata 0.4.10",
"serde", "serde",
] ]
...@@ -817,22 +817,22 @@ checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" ...@@ -817,22 +817,22 @@ checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e"
[[package]] [[package]]
name = "bytemuck" name = "bytemuck"
version = "1.23.1" version = "1.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422" checksum = "3995eaeebcdf32f91f980d360f78732ddc061097ab4e39991ae7a6ace9194677"
dependencies = [ dependencies = [
"bytemuck_derive", "bytemuck_derive",
] ]
[[package]] [[package]]
name = "bytemuck_derive" name = "bytemuck_derive"
version = "1.10.0" version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "441473f2b4b0459a68628c744bc61d23e730fb00128b841d30fa4bb3972257e4" checksum = "4f154e572231cb6ba2bd1176980827e3d5dc04cc183a75dea38109fbdd672d29"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -865,9 +865,9 @@ dependencies = [ ...@@ -865,9 +865,9 @@ dependencies = [
"ahash", "ahash",
"cached_proc_macro", "cached_proc_macro",
"cached_proc_macro_types", "cached_proc_macro_types",
"hashbrown 0.15.4", "hashbrown 0.15.5",
"once_cell", "once_cell",
"thiserror 2.0.12", "thiserror 2.0.16",
"web-time", "web-time",
] ]
...@@ -880,7 +880,7 @@ dependencies = [ ...@@ -880,7 +880,7 @@ dependencies = [
"darling 0.20.11", "darling 0.20.11",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -930,7 +930,7 @@ dependencies = [ ...@@ -930,7 +930,7 @@ dependencies = [
"rand 0.9.2", "rand 0.9.2",
"rand_distr 0.5.1", "rand_distr 0.5.1",
"rayon", "rayon",
"safetensors 0.6.1", "safetensors 0.6.2",
"thiserror 1.0.69", "thiserror 1.0.69",
"ug", "ug",
"ug-cuda", "ug-cuda",
...@@ -970,7 +970,7 @@ dependencies = [ ...@@ -970,7 +970,7 @@ dependencies = [
"metal 0.27.0", "metal 0.27.0",
"num-traits", "num-traits",
"rayon", "rayon",
"safetensors 0.6.1", "safetensors 0.6.2",
"serde", "serde",
"thiserror 1.0.69", "thiserror 1.0.69",
] ]
...@@ -996,24 +996,24 @@ version = "0.27.0" ...@@ -996,24 +996,24 @@ version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fce8dd7fcfcbf3a0a87d8f515194b49d6135acab73e18bd380d1d93bb1a15eb" checksum = "3fce8dd7fcfcbf3a0a87d8f515194b49d6135acab73e18bd380d1d93bb1a15eb"
dependencies = [ dependencies = [
"clap 4.5.42", "clap 4.5.45",
"heck 0.4.1", "heck 0.4.1",
"indexmap 2.10.0", "indexmap 2.11.0",
"log", "log",
"proc-macro2", "proc-macro2",
"quote", "quote",
"serde", "serde",
"serde_json", "serde_json",
"syn 2.0.104", "syn 2.0.106",
"tempfile", "tempfile",
"toml", "toml",
] ]
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.2.33" version = "1.2.34"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ee0f8803222ba5a7e2777dd72ca451868909b1ac410621b676adf07280e9b5f" checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc"
dependencies = [ dependencies = [
"jobserver", "jobserver",
"libc", "libc",
...@@ -1047,9 +1047,9 @@ checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" ...@@ -1047,9 +1047,9 @@ checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.1" version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
[[package]] [[package]]
name = "cfg_aliases" name = "cfg_aliases"
...@@ -1063,7 +1063,7 @@ version = "0.13.10" ...@@ -1063,7 +1063,7 @@ version = "0.13.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf" checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf"
dependencies = [ dependencies = [
"indexmap 2.10.0", "indexmap 2.11.0",
"lazy_static", "lazy_static",
"num-traits", "num-traits",
"regex", "regex",
...@@ -1110,9 +1110,9 @@ dependencies = [ ...@@ -1110,9 +1110,9 @@ dependencies = [
[[package]] [[package]]
name = "clap" name = "clap"
version = "4.5.42" version = "4.5.45"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882" checksum = "1fc0e74a703892159f5ae7d3aac52c8e6c392f5ae5f359c70b5881d60aaac318"
dependencies = [ dependencies = [
"clap_builder", "clap_builder",
"clap_derive", "clap_derive",
...@@ -1120,9 +1120,9 @@ dependencies = [ ...@@ -1120,9 +1120,9 @@ dependencies = [
[[package]] [[package]]
name = "clap_builder" name = "clap_builder"
version = "4.5.42" version = "4.5.44"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966" checksum = "b3e7f4214277f3c7aa526a59dd3fbe306a370daee1f8b7b8c987069cd8e888a8"
dependencies = [ dependencies = [
"anstream", "anstream",
"anstyle", "anstyle",
...@@ -1133,14 +1133,14 @@ dependencies = [ ...@@ -1133,14 +1133,14 @@ dependencies = [
[[package]] [[package]]
name = "clap_derive" name = "clap_derive"
version = "4.5.41" version = "4.5.45"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491" checksum = "14cb31bb0a7d536caef2639baa7fad459e15c3144efefa6dbd1c84562c4739f6"
dependencies = [ dependencies = [
"heck 0.5.0", "heck 0.5.0",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -1177,7 +1177,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -1177,7 +1177,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a"
dependencies = [ dependencies = [
"castaway", "castaway",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"itoa", "itoa",
"rustversion", "rustversion",
"ryu", "ryu",
...@@ -1301,7 +1301,7 @@ version = "1.5.0" ...@@ -1301,7 +1301,7 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
] ]
[[package]] [[package]]
...@@ -1457,7 +1457,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -1457,7 +1457,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
dependencies = [ dependencies = [
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -1506,7 +1506,7 @@ version = "4.1.3" ...@@ -1506,7 +1506,7 @@ version = "4.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be" checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"cpufeatures", "cpufeatures",
"curve25519-dalek-derive", "curve25519-dalek-derive",
"digest", "digest",
...@@ -1523,7 +1523,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" ...@@ -1523,7 +1523,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -1571,7 +1571,7 @@ dependencies = [ ...@@ -1571,7 +1571,7 @@ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"strsim 0.11.1", "strsim 0.11.1",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -1593,7 +1593,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" ...@@ -1593,7 +1593,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
dependencies = [ dependencies = [
"darling_core 0.20.11", "darling_core 0.20.11",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -1611,7 +1611,7 @@ version = "5.5.3" ...@@ -1611,7 +1611,7 @@ version = "5.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"hashbrown 0.14.5", "hashbrown 0.14.5",
"lock_api", "lock_api",
"once_cell", "once_cell",
...@@ -1659,7 +1659,7 @@ checksum = "74ef43543e701c01ad77d3a5922755c6a1d71b22d942cb8042be4994b380caff" ...@@ -1659,7 +1659,7 @@ checksum = "74ef43543e701c01ad77d3a5922755c6a1d71b22d942cb8042be4994b380caff"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -1670,18 +1670,18 @@ checksum = "2cdc8d50f426189eef89dac62fabfa0abb27d5cc008f25bf4156a0203325becc" ...@@ -1670,18 +1670,18 @@ checksum = "2cdc8d50f426189eef89dac62fabfa0abb27d5cc008f25bf4156a0203325becc"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
name = "derive_arbitrary" name = "derive_arbitrary"
version = "1.4.1" version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800" checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -1702,7 +1702,7 @@ dependencies = [ ...@@ -1702,7 +1702,7 @@ dependencies = [
"darling 0.20.11", "darling 0.20.11",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -1712,7 +1712,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -1712,7 +1712,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
dependencies = [ dependencies = [
"derive_builder_core", "derive_builder_core",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -1723,7 +1723,7 @@ checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f" ...@@ -1723,7 +1723,7 @@ checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -1743,7 +1743,7 @@ checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" ...@@ -1743,7 +1743,7 @@ checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -1755,8 +1755,8 @@ dependencies = [ ...@@ -1755,8 +1755,8 @@ dependencies = [
"anyhow", "anyhow",
"bytemuck", "bytemuck",
"bytemuck_derive", "bytemuck_derive",
"hashbrown 0.15.4", "hashbrown 0.15.5",
"regex-syntax 0.8.5", "regex-syntax 0.8.6",
"strum", "strum",
] ]
...@@ -1837,7 +1837,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" ...@@ -1837,7 +1837,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -1904,12 +1904,12 @@ dependencies = [ ...@@ -1904,12 +1904,12 @@ dependencies = [
"eventsource-stream", "eventsource-stream",
"futures", "futures",
"rand 0.9.2", "rand 0.9.2",
"reqwest 0.12.22", "reqwest 0.12.23",
"reqwest-eventsource", "reqwest-eventsource",
"secrecy", "secrecy",
"serde", "serde",
"serde_json", "serde_json",
"thiserror 2.0.12", "thiserror 2.0.16",
"tokio", "tokio",
"tokio-stream", "tokio-stream",
"tokio-test", "tokio-test",
...@@ -1941,7 +1941,7 @@ dependencies = [ ...@@ -1941,7 +1941,7 @@ dependencies = [
"dynamo-llm", "dynamo-llm",
"dynamo-runtime", "dynamo-runtime",
"either", "either",
"indexmap 2.10.0", "indexmap 2.11.0",
"mistralrs", "mistralrs",
"serde_json", "serde_json",
"tokio", "tokio",
...@@ -2002,10 +2002,11 @@ dependencies = [ ...@@ -2002,10 +2002,11 @@ dependencies = [
"parking_lot", "parking_lot",
"prometheus", "prometheus",
"proptest", "proptest",
"prost",
"rand 0.9.2", "rand 0.9.2",
"rayon", "rayon",
"regex", "regex",
"reqwest 0.12.22", "reqwest 0.12.23",
"rmp-serde", "rmp-serde",
"rstest 0.18.2", "rstest 0.18.2",
"rstest_reuse", "rstest_reuse",
...@@ -2015,19 +2016,21 @@ dependencies = [ ...@@ -2015,19 +2016,21 @@ dependencies = [
"serial_test", "serial_test",
"strum", "strum",
"tempfile", "tempfile",
"thiserror 2.0.12", "thiserror 2.0.16",
"tmq", "tmq",
"tokenizers", "tokenizers",
"tokio", "tokio",
"tokio-stream", "tokio-stream",
"tokio-util", "tokio-util",
"toktrie 1.1.1", "toktrie 1.2.0",
"toktrie_hf_tokenizers 1.1.1", "toktrie_hf_tokenizers 1.2.0",
"tonic 0.13.1",
"tonic-build",
"tower-http", "tower-http",
"tracing", "tracing",
"unicode-segmentation", "unicode-segmentation",
"url", "url",
"uuid 1.17.0", "uuid 1.18.0",
"validator", "validator",
"xxhash-rust", "xxhash-rust",
"zeromq", "zeromq",
...@@ -2045,7 +2048,7 @@ dependencies = [ ...@@ -2045,7 +2048,7 @@ dependencies = [
"serde", "serde",
"serde_json", "serde_json",
"tracing", "tracing",
"uuid 1.17.0", "uuid 1.18.0",
] ]
[[package]] [[package]]
...@@ -2055,7 +2058,7 @@ dependencies = [ ...@@ -2055,7 +2058,7 @@ dependencies = [
"anyhow", "anyhow",
"async-stream", "async-stream",
"async-trait", "async-trait",
"clap 4.5.42", "clap 4.5.45",
"dynamo-async-openai", "dynamo-async-openai",
"dynamo-engine-llamacpp", "dynamo-engine-llamacpp",
"dynamo-engine-mistralrs", "dynamo-engine-mistralrs",
...@@ -2073,7 +2076,7 @@ dependencies = [ ...@@ -2073,7 +2076,7 @@ dependencies = [
"tokio-util", "tokio-util",
"tracing", "tracing",
"tracing-subscriber", "tracing-subscriber",
"uuid 1.17.0", "uuid 1.18.0",
"vergen-gitcl", "vergen-gitcl",
] ]
...@@ -2114,7 +2117,7 @@ dependencies = [ ...@@ -2114,7 +2117,7 @@ dependencies = [
"prometheus", "prometheus",
"rand 0.9.2", "rand 0.9.2",
"regex", "regex",
"reqwest 0.12.22", "reqwest 0.12.23",
"rstest 0.23.0", "rstest 0.23.0",
"serde", "serde",
"serde_json", "serde_json",
...@@ -2122,7 +2125,7 @@ dependencies = [ ...@@ -2122,7 +2125,7 @@ dependencies = [
"stdio-override", "stdio-override",
"temp-env", "temp-env",
"tempfile", "tempfile",
"thiserror 2.0.12", "thiserror 2.0.16",
"tokio", "tokio",
"tokio-stream", "tokio-stream",
"tokio-util", "tokio-util",
...@@ -2130,7 +2133,7 @@ dependencies = [ ...@@ -2130,7 +2133,7 @@ dependencies = [
"tracing", "tracing",
"tracing-subscriber", "tracing-subscriber",
"url", "url",
"uuid 1.17.0", "uuid 1.18.0",
"validator", "validator",
"xxhash-rust", "xxhash-rust",
] ]
...@@ -2176,7 +2179,7 @@ dependencies = [ ...@@ -2176,7 +2179,7 @@ dependencies = [
"enum-ordinalize", "enum-ordinalize",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -2206,7 +2209,7 @@ version = "0.8.35" ...@@ -2206,7 +2209,7 @@ version = "0.8.35"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
] ]
[[package]] [[package]]
...@@ -2224,7 +2227,7 @@ dependencies = [ ...@@ -2224,7 +2227,7 @@ dependencies = [
"heck 0.5.0", "heck 0.5.0",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -2244,7 +2247,7 @@ checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff" ...@@ -2244,7 +2247,7 @@ checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -2264,7 +2267,7 @@ checksum = "67c78a4d8fdf9953a5c9d458f9efe940fd97a0cab0941c075a813ac594733827" ...@@ -2264,7 +2267,7 @@ checksum = "67c78a4d8fdf9953a5c9d458f9efe940fd97a0cab0941c075a813ac594733827"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -2307,7 +2310,7 @@ checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" ...@@ -2307,7 +2310,7 @@ checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -2410,8 +2413,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -2410,8 +2413,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
dependencies = [ dependencies = [
"bit-set 0.5.3", "bit-set 0.5.3",
"regex-automata 0.4.9", "regex-automata 0.4.10",
"regex-syntax 0.8.5", "regex-syntax 0.8.6",
] ]
[[package]] [[package]]
...@@ -2421,8 +2424,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -2421,8 +2424,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298" checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
dependencies = [ dependencies = [
"bit-set 0.8.0", "bit-set 0.8.0",
"regex-automata 0.4.9", "regex-automata 0.4.10",
"regex-syntax 0.8.5", "regex-syntax 0.8.6",
] ]
[[package]] [[package]]
...@@ -2538,7 +2541,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" ...@@ -2538,7 +2541,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -2555,9 +2558,9 @@ checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" ...@@ -2555,9 +2558,9 @@ checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b"
[[package]] [[package]]
name = "form_urlencoded" name = "form_urlencoded"
version = "1.2.1" version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
dependencies = [ dependencies = [
"percent-encoding", "percent-encoding",
] ]
...@@ -2670,7 +2673,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" ...@@ -2670,7 +2673,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -2990,7 +2993,7 @@ version = "0.2.16" ...@@ -2990,7 +2993,7 @@ version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"js-sys", "js-sys",
"libc", "libc",
"wasi 0.11.1+wasi-snapshot-preview1", "wasi 0.11.1+wasi-snapshot-preview1",
...@@ -3003,7 +3006,7 @@ version = "0.3.3" ...@@ -3003,7 +3006,7 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"js-sys", "js-sys",
"libc", "libc",
"r-efi", "r-efi",
...@@ -3030,7 +3033,7 @@ checksum = "3ac5654356c6f7f6116905aeaf92ab002c3d03414ada5dbe0bb2e32aa5fea173" ...@@ -3030,7 +3033,7 @@ checksum = "3ac5654356c6f7f6116905aeaf92ab002c3d03414ada5dbe0bb2e32aa5fea173"
dependencies = [ dependencies = [
"fancy-regex 0.14.0", "fancy-regex 0.14.0",
"ggml-quants", "ggml-quants",
"indexmap 2.10.0", "indexmap 2.11.0",
"log", "log",
"num_enum", "num_enum",
] ]
...@@ -3053,9 +3056,9 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" ...@@ -3053,9 +3056,9 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
[[package]] [[package]]
name = "glob" name = "glob"
version = "0.3.2" version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]] [[package]]
name = "globset" name = "globset"
...@@ -3066,8 +3069,8 @@ dependencies = [ ...@@ -3066,8 +3069,8 @@ dependencies = [
"aho-corasick", "aho-corasick",
"bstr", "bstr",
"log", "log",
"regex-automata 0.4.9", "regex-automata 0.4.10",
"regex-syntax 0.8.5", "regex-syntax 0.8.6",
] ]
[[package]] [[package]]
...@@ -3082,7 +3085,7 @@ dependencies = [ ...@@ -3082,7 +3085,7 @@ dependencies = [
"futures-sink", "futures-sink",
"futures-util", "futures-util",
"http 0.2.12", "http 0.2.12",
"indexmap 2.10.0", "indexmap 2.11.0",
"slab", "slab",
"tokio", "tokio",
"tokio-util", "tokio-util",
...@@ -3091,9 +3094,9 @@ dependencies = [ ...@@ -3091,9 +3094,9 @@ dependencies = [
[[package]] [[package]]
name = "h2" name = "h2"
version = "0.4.11" version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17da50a276f1e01e0ba6c029e47b7100754904ee8a278f886546e98575380785" checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386"
dependencies = [ dependencies = [
"atomic-waker", "atomic-waker",
"bytes", "bytes",
...@@ -3101,7 +3104,7 @@ dependencies = [ ...@@ -3101,7 +3104,7 @@ dependencies = [
"futures-core", "futures-core",
"futures-sink", "futures-sink",
"http 1.3.1", "http 1.3.1",
"indexmap 2.10.0", "indexmap 2.11.0",
"slab", "slab",
"tokio", "tokio",
"tokio-util", "tokio-util",
...@@ -3121,7 +3124,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -3121,7 +3124,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"crunchy", "crunchy",
"num-traits", "num-traits",
"rand 0.9.2", "rand 0.9.2",
...@@ -3142,9 +3145,9 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" ...@@ -3142,9 +3145,9 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.15.4" version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [ dependencies = [
"allocator-api2", "allocator-api2",
"equivalent", "equivalent",
...@@ -3211,10 +3214,10 @@ dependencies = [ ...@@ -3211,10 +3214,10 @@ dependencies = [
"log", "log",
"num_cpus", "num_cpus",
"rand 0.9.2", "rand 0.9.2",
"reqwest 0.12.22", "reqwest 0.12.23",
"serde", "serde",
"serde_json", "serde_json",
"thiserror 2.0.12", "thiserror 2.0.16",
"tokio", "tokio",
"ureq", "ureq",
"windows-sys 0.60.2", "windows-sys 0.60.2",
...@@ -3243,7 +3246,7 @@ checksum = "c1637acec3b965bab873352189d887b12c87b4f8d7571f4d185e796be5654ad8" ...@@ -3243,7 +3246,7 @@ checksum = "c1637acec3b965bab873352189d887b12c87b4f8d7571f4d185e796be5654ad8"
dependencies = [ dependencies = [
"html5ever 0.31.0", "html5ever 0.31.0",
"tendril", "tendril",
"thiserror 2.0.12", "thiserror 2.0.16",
"unicode-width 0.2.1", "unicode-width 0.2.1",
] ]
...@@ -3371,20 +3374,22 @@ dependencies = [ ...@@ -3371,20 +3374,22 @@ dependencies = [
[[package]] [[package]]
name = "hyper" name = "hyper"
version = "1.6.0" version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e"
dependencies = [ dependencies = [
"atomic-waker",
"bytes", "bytes",
"futures-channel", "futures-channel",
"futures-util", "futures-core",
"h2 0.4.11", "h2 0.4.12",
"http 1.3.1", "http 1.3.1",
"http-body 1.0.1", "http-body 1.0.1",
"httparse", "httparse",
"httpdate", "httpdate",
"itoa", "itoa",
"pin-project-lite", "pin-project-lite",
"pin-utils",
"smallvec", "smallvec",
"tokio", "tokio",
"want", "want",
...@@ -3397,7 +3402,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -3397,7 +3402,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
dependencies = [ dependencies = [
"http 1.3.1", "http 1.3.1",
"hyper 1.6.0", "hyper 1.7.0",
"hyper-util", "hyper-util",
"rustls", "rustls",
"rustls-native-certs 0.8.1", "rustls-native-certs 0.8.1",
...@@ -3414,7 +3419,7 @@ version = "0.5.2" ...@@ -3414,7 +3419,7 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0"
dependencies = [ dependencies = [
"hyper 1.6.0", "hyper 1.7.0",
"hyper-util", "hyper-util",
"pin-project-lite", "pin-project-lite",
"tokio", "tokio",
...@@ -3429,7 +3434,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" ...@@ -3429,7 +3434,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
dependencies = [ dependencies = [
"bytes", "bytes",
"http-body-util", "http-body-util",
"hyper 1.6.0", "hyper 1.7.0",
"hyper-util", "hyper-util",
"native-tls", "native-tls",
"tokio", "tokio",
...@@ -3450,7 +3455,7 @@ dependencies = [ ...@@ -3450,7 +3455,7 @@ dependencies = [
"futures-util", "futures-util",
"http 1.3.1", "http 1.3.1",
"http-body 1.0.1", "http-body 1.0.1",
"hyper 1.6.0", "hyper 1.7.0",
"ipnet", "ipnet",
"libc", "libc",
"percent-encoding", "percent-encoding",
...@@ -3581,9 +3586,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" ...@@ -3581,9 +3586,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]] [[package]]
name = "idna" name = "idna"
version = "1.0.3" version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
dependencies = [ dependencies = [
"idna_adapter", "idna_adapter",
"smallvec", "smallvec",
...@@ -3652,12 +3657,12 @@ dependencies = [ ...@@ -3652,12 +3657,12 @@ dependencies = [
[[package]] [[package]]
name = "indexmap" name = "indexmap"
version = "2.10.0" version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9"
dependencies = [ dependencies = [
"equivalent", "equivalent",
"hashbrown 0.15.4", "hashbrown 0.15.5",
"serde", "serde",
] ]
...@@ -3704,7 +3709,7 @@ version = "0.1.13" ...@@ -3704,7 +3709,7 @@ version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
] ]
[[package]] [[package]]
...@@ -3715,7 +3720,7 @@ checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60" ...@@ -3715,7 +3720,7 @@ checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -3733,12 +3738,12 @@ dependencies = [ ...@@ -3733,12 +3738,12 @@ dependencies = [
[[package]] [[package]]
name = "io-uring" name = "io-uring"
version = "0.7.9" version = "0.7.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"libc", "libc",
] ]
...@@ -3800,6 +3805,15 @@ dependencies = [ ...@@ -3800,6 +3805,15 @@ dependencies = [
"either", "either",
] ]
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]] [[package]]
name = "itertools" name = "itertools"
version = "0.14.0" version = "0.14.0"
...@@ -3836,14 +3850,14 @@ checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" ...@@ -3836,14 +3850,14 @@ checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
name = "jobserver" name = "jobserver"
version = "0.1.33" version = "0.1.34"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
dependencies = [ dependencies = [
"getrandom 0.3.3", "getrandom 0.3.3",
"libc", "libc",
...@@ -3875,7 +3889,7 @@ dependencies = [ ...@@ -3875,7 +3889,7 @@ dependencies = [
"anyhow", "anyhow",
"base64 0.21.7", "base64 0.21.7",
"bytecount", "bytecount",
"clap 4.5.42", "clap 4.5.45",
"fancy-regex 0.11.0", "fancy-regex 0.11.0",
"fraction", "fraction",
"getrandom 0.2.16", "getrandom 0.2.16",
...@@ -3892,7 +3906,7 @@ dependencies = [ ...@@ -3892,7 +3906,7 @@ dependencies = [
"serde_json", "serde_json",
"time", "time",
"url", "url",
"uuid 1.17.0", "uuid 1.18.0",
] ]
[[package]] [[package]]
...@@ -3935,9 +3949,9 @@ checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8" ...@@ -3935,9 +3949,9 @@ checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.174" version = "0.2.175"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
[[package]] [[package]]
name = "libdynamo_llm" name = "libdynamo_llm"
...@@ -3957,7 +3971,7 @@ dependencies = [ ...@@ -3957,7 +3971,7 @@ dependencies = [
"tokio-stream", "tokio-stream",
"tracing", "tracing",
"tracing-subscriber", "tracing-subscriber",
"uuid 1.17.0", "uuid 1.18.0",
] ]
[[package]] [[package]]
...@@ -3976,7 +3990,7 @@ version = "0.8.8" ...@@ -3976,7 +3990,7 @@ version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"windows-targets 0.53.3", "windows-targets 0.53.3",
] ]
...@@ -3992,7 +4006,7 @@ version = "0.1.9" ...@@ -3992,7 +4006,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"libc", "libc",
] ]
...@@ -4048,8 +4062,8 @@ source = "git+https://github.com/guidance-ai/llguidance.git?rev=c432092#c432092d ...@@ -4048,8 +4062,8 @@ source = "git+https://github.com/guidance-ai/llguidance.git?rev=c432092#c432092d
dependencies = [ dependencies = [
"anyhow", "anyhow",
"derivre", "derivre",
"indexmap 2.10.0", "indexmap 2.11.0",
"regex-syntax 0.8.5", "regex-syntax 0.8.6",
"serde", "serde",
"serde_json", "serde_json",
"toktrie 1.0.0", "toktrie 1.0.0",
...@@ -4063,7 +4077,7 @@ checksum = "656b3b27f8893f7bbf9485148ff9a65f019e3f33bd5cdc87c83cab16b3fd9ec8" ...@@ -4063,7 +4077,7 @@ checksum = "656b3b27f8893f7bbf9485148ff9a65f019e3f33bd5cdc87c83cab16b3fd9ec8"
dependencies = [ dependencies = [
"libc", "libc",
"neli", "neli",
"thiserror 2.0.12", "thiserror 2.0.16",
"windows-sys 0.59.0", "windows-sys 0.59.0",
] ]
...@@ -4175,7 +4189,7 @@ checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b" ...@@ -4175,7 +4189,7 @@ checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -4215,7 +4229,7 @@ version = "0.1.1" ...@@ -4215,7 +4229,7 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519" checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"rayon", "rayon",
] ]
...@@ -4227,9 +4241,9 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" ...@@ -4227,9 +4241,9 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]] [[package]]
name = "memmap2" name = "memmap2"
version = "0.9.7" version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "483758ad303d734cec05e5c12b41d7e93e6a6390c5e9dae6bdeb7c1259012d28" checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7"
dependencies = [ dependencies = [
"libc", "libc",
"stable_deref_trait", "stable_deref_trait",
...@@ -4256,7 +4270,7 @@ version = "0.27.0" ...@@ -4256,7 +4270,7 @@ version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c43f73953f8cbe511f021b58f18c3ce1c3d1ae13fe953293e13345bf83217f25" checksum = "c43f73953f8cbe511f021b58f18c3ce1c3d1ae13fe953293e13345bf83217f25"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"block", "block",
"core-graphics-types", "core-graphics-types",
"foreign-types 0.5.0", "foreign-types 0.5.0",
...@@ -4271,7 +4285,7 @@ version = "0.29.0" ...@@ -4271,7 +4285,7 @@ version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21" checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"block", "block",
"core-graphics-types", "core-graphics-types",
"foreign-types 0.5.0", "foreign-types 0.5.0",
...@@ -4285,16 +4299,16 @@ name = "metrics" ...@@ -4285,16 +4299,16 @@ name = "metrics"
version = "0.4.1" version = "0.4.1"
dependencies = [ dependencies = [
"axum 0.8.4", "axum 0.8.4",
"clap 4.5.42", "clap 4.5.45",
"dynamo-llm", "dynamo-llm",
"dynamo-runtime", "dynamo-runtime",
"futures", "futures",
"prometheus", "prometheus",
"rand 0.9.2", "rand 0.9.2",
"reqwest 0.12.22", "reqwest 0.12.23",
"serde", "serde",
"serde_json", "serde_json",
"thiserror 2.0.12", "thiserror 2.0.16",
"tokio", "tokio",
"tracing", "tracing",
] ]
...@@ -4317,9 +4331,9 @@ dependencies = [ ...@@ -4317,9 +4331,9 @@ dependencies = [
[[package]] [[package]]
name = "minijinja" name = "minijinja"
version = "2.11.0" version = "2.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e60ac08614cc09062820e51d5d94c2fce16b94ea4e5003bb81b99a95f84e876" checksum = "a9f264d75233323f4b7d2f03aefe8a990690cdebfbfe26ea86bcbaec5e9ac990"
dependencies = [ dependencies = [
"memo-map", "memo-map",
"self_cell", "self_cell",
...@@ -4329,9 +4343,9 @@ dependencies = [ ...@@ -4329,9 +4343,9 @@ dependencies = [
[[package]] [[package]]
name = "minijinja-contrib" name = "minijinja-contrib"
version = "2.11.0" version = "2.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f93e5bfa889f16d8c10ec92ac964074a68a7206c0fd9748ff23a31942c85d97c" checksum = "182ba1438db4679ddfa03792c183bdc2b9ce26b58e7d41a749e59b06497cf136"
dependencies = [ dependencies = [
"minijinja", "minijinja",
"serde", "serde",
...@@ -4415,14 +4429,14 @@ dependencies = [ ...@@ -4415,14 +4429,14 @@ dependencies = [
"anyhow", "anyhow",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=95d713f9)", "candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=95d713f9)",
"candle-nn", "candle-nn",
"clap 4.5.42", "clap 4.5.45",
"either", "either",
"futures", "futures",
"image", "image",
"indexmap 2.10.0", "indexmap 2.11.0",
"mistralrs-core", "mistralrs-core",
"rand 0.9.2", "rand 0.9.2",
"reqwest 0.12.22", "reqwest 0.12.23",
"serde", "serde",
"serde_json", "serde_json",
"tokio", "tokio",
...@@ -4460,7 +4474,7 @@ dependencies = [ ...@@ -4460,7 +4474,7 @@ dependencies = [
"candle-nn", "candle-nn",
"cfgrammar", "cfgrammar",
"chrono", "chrono",
"clap 4.5.42", "clap 4.5.45",
"csv", "csv",
"derive-new", "derive-new",
"derive_more 2.0.1", "derive_more 2.0.1",
...@@ -4470,13 +4484,13 @@ dependencies = [ ...@@ -4470,13 +4484,13 @@ dependencies = [
"futures", "futures",
"galil-seiferas", "galil-seiferas",
"half 2.6.0", "half 2.6.0",
"hashbrown 0.15.4", "hashbrown 0.15.5",
"hf-hub", "hf-hub",
"hound", "hound",
"html2text", "html2text",
"http 1.3.1", "http 1.3.1",
"image", "image",
"indexmap 2.10.0", "indexmap 2.11.0",
"indicatif", "indicatif",
"interprocess", "interprocess",
"itertools 0.14.0", "itertools 0.14.0",
...@@ -4502,13 +4516,13 @@ dependencies = [ ...@@ -4502,13 +4516,13 @@ dependencies = [
"rand_isaac", "rand_isaac",
"rayon", "rayon",
"regex", "regex",
"regex-automata 0.4.9", "regex-automata 0.4.10",
"reqwest 0.12.22", "reqwest 0.12.23",
"rubato", "rubato",
"rust-mcp-schema", "rust-mcp-schema",
"rustc-hash 2.1.1", "rustc-hash 2.1.1",
"rustfft", "rustfft",
"safetensors 0.6.1", "safetensors 0.6.2",
"schemars 0.8.22", "schemars 0.8.22",
"scraper", "scraper",
"serde", "serde",
...@@ -4520,7 +4534,7 @@ dependencies = [ ...@@ -4520,7 +4534,7 @@ dependencies = [
"strum", "strum",
"symphonia", "symphonia",
"sysinfo", "sysinfo",
"thiserror 2.0.12", "thiserror 2.0.16",
"tokenizers", "tokenizers",
"tokio", "tokio",
"tokio-rayon", "tokio-rayon",
...@@ -4531,7 +4545,7 @@ dependencies = [ ...@@ -4531,7 +4545,7 @@ dependencies = [
"tracing", "tracing",
"tracing-subscriber", "tracing-subscriber",
"urlencoding", "urlencoding",
"uuid 1.17.0", "uuid 1.18.0",
"variantly", "variantly",
"vob", "vob",
] ]
...@@ -4545,7 +4559,7 @@ dependencies = [ ...@@ -4545,7 +4559,7 @@ dependencies = [
"async-trait", "async-trait",
"futures-util", "futures-util",
"http 1.3.1", "http 1.3.1",
"reqwest 0.12.22", "reqwest 0.12.23",
"rust-mcp-schema", "rust-mcp-schema",
"serde", "serde",
"serde_json", "serde_json",
...@@ -4553,7 +4567,7 @@ dependencies = [ ...@@ -4553,7 +4567,7 @@ dependencies = [
"tokio-tungstenite 0.24.0", "tokio-tungstenite 0.24.0",
"tracing", "tracing",
"utoipa", "utoipa",
"uuid 1.17.0", "uuid 1.18.0",
] ]
[[package]] [[package]]
...@@ -4568,7 +4582,7 @@ dependencies = [ ...@@ -4568,7 +4582,7 @@ dependencies = [
"half 2.6.0", "half 2.6.0",
"metal 0.27.0", "metal 0.27.0",
"once_cell", "once_cell",
"thiserror 2.0.12", "thiserror 2.0.16",
] ]
[[package]] [[package]]
...@@ -4590,10 +4604,10 @@ dependencies = [ ...@@ -4590,10 +4604,10 @@ dependencies = [
"paste", "paste",
"rayon", "rayon",
"regex", "regex",
"safetensors 0.6.1", "safetensors 0.6.2",
"serde", "serde",
"serde_json", "serde_json",
"thiserror 2.0.12", "thiserror 2.0.16",
"tokio", "tokio",
"tracing", "tracing",
"yoke 0.7.5", "yoke 0.7.5",
...@@ -4627,7 +4641,7 @@ checksum = "c402a4092d5e204f32c9e155431046831fa712637043c58cb73bc6bc6c9663b5" ...@@ -4627,7 +4641,7 @@ checksum = "c402a4092d5e204f32c9e155431046831fa712637043c58cb73bc6bc6c9663b5"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -4754,7 +4768,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -4754,7 +4768,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
dependencies = [ dependencies = [
"bitflags 1.3.2", "bitflags 1.3.2",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"libc", "libc",
"memoffset", "memoffset",
"pin-utils", "pin-utils",
...@@ -4766,8 +4780,8 @@ version = "0.29.0" ...@@ -4766,8 +4780,8 @@ version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"cfg_aliases", "cfg_aliases",
"libc", "libc",
] ]
...@@ -4784,7 +4798,7 @@ dependencies = [ ...@@ -4784,7 +4798,7 @@ dependencies = [
"os_info", "os_info",
"pkg-config", "pkg-config",
"serde", "serde",
"thiserror 2.0.12", "thiserror 2.0.16",
"tracing", "tracing",
] ]
...@@ -4916,7 +4930,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" ...@@ -4916,7 +4930,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -4989,7 +5003,7 @@ dependencies = [ ...@@ -4989,7 +5003,7 @@ dependencies = [
"proc-macro-crate", "proc-macro-crate",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -5069,7 +5083,7 @@ version = "6.5.1" ...@@ -5069,7 +5083,7 @@ version = "6.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"libc", "libc",
"once_cell", "once_cell",
"onig_sys", "onig_sys",
...@@ -5100,19 +5114,19 @@ dependencies = [ ...@@ -5100,19 +5114,19 @@ dependencies = [
"anyhow", "anyhow",
"base64 0.22.1", "base64 0.22.1",
"bstr", "bstr",
"clap 4.5.42", "clap 4.5.45",
"fancy-regex 0.13.0", "fancy-regex 0.13.0",
"futures", "futures",
"image", "image",
"regex", "regex",
"reqwest 0.12.22", "reqwest 0.12.23",
"rustc-hash 1.1.0", "rustc-hash 1.1.0",
"serde", "serde",
"serde_json", "serde_json",
"serde_with", "serde_with",
"sha1", "sha1",
"sha2", "sha2",
"thiserror 2.0.12", "thiserror 2.0.16",
] ]
[[package]] [[package]]
...@@ -5121,8 +5135,8 @@ version = "0.10.73" ...@@ -5121,8 +5135,8 @@ version = "0.10.73"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8" checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"foreign-types 0.3.2", "foreign-types 0.3.2",
"libc", "libc",
"once_cell", "once_cell",
...@@ -5138,7 +5152,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" ...@@ -5138,7 +5152,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -5228,7 +5242,7 @@ version = "0.9.11" ...@@ -5228,7 +5242,7 @@ version = "0.9.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"libc", "libc",
"redox_syscall", "redox_syscall",
"smallvec", "smallvec",
...@@ -5261,7 +5275,7 @@ dependencies = [ ...@@ -5261,7 +5275,7 @@ dependencies = [
"proc-macro2", "proc-macro2",
"proc-macro2-diagnostics", "proc-macro2-diagnostics",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -5275,9 +5289,9 @@ dependencies = [ ...@@ -5275,9 +5289,9 @@ dependencies = [
[[package]] [[package]]
name = "percent-encoding" name = "percent-encoding"
version = "2.3.1" version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]] [[package]]
name = "pest" name = "pest"
...@@ -5286,7 +5300,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -5286,7 +5300,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323" checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323"
dependencies = [ dependencies = [
"memchr", "memchr",
"thiserror 2.0.12", "thiserror 2.0.16",
"ucd-trie", "ucd-trie",
] ]
...@@ -5310,7 +5324,7 @@ dependencies = [ ...@@ -5310,7 +5324,7 @@ dependencies = [
"pest_meta", "pest_meta",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -5330,7 +5344,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -5330,7 +5344,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
dependencies = [ dependencies = [
"fixedbitset", "fixedbitset",
"indexmap 2.10.0", "indexmap 2.11.0",
] ]
[[package]] [[package]]
...@@ -5373,7 +5387,7 @@ dependencies = [ ...@@ -5373,7 +5387,7 @@ dependencies = [
"phf_shared", "phf_shared",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -5402,7 +5416,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" ...@@ -5402,7 +5416,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -5440,7 +5454,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -5440,7 +5454,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1" checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1"
dependencies = [ dependencies = [
"base64 0.22.1", "base64 0.22.1",
"indexmap 2.10.0", "indexmap 2.11.0",
"quick-xml", "quick-xml",
"serde", "serde",
"time", "time",
...@@ -5534,12 +5548,12 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" ...@@ -5534,12 +5548,12 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]] [[package]]
name = "prettyplease" name = "prettyplease"
version = "0.2.36" version = "0.2.37"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -5579,14 +5593,14 @@ dependencies = [ ...@@ -5579,14 +5593,14 @@ dependencies = [
"proc-macro-error-attr2", "proc-macro-error-attr2",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.95" version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
dependencies = [ dependencies = [
"unicode-ident", "unicode-ident",
] ]
...@@ -5599,7 +5613,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" ...@@ -5599,7 +5613,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
"version_check", "version_check",
"yansi", "yansi",
] ]
...@@ -5620,7 +5634,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -5620,7 +5634,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52717f9a02b6965224f95ca2a81e2e0c5c43baacd28ca057577988930b6c3d5b" checksum = "52717f9a02b6965224f95ca2a81e2e0c5c43baacd28ca057577988930b6c3d5b"
dependencies = [ dependencies = [
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -5629,13 +5643,13 @@ version = "0.14.0" ...@@ -5629,13 +5643,13 @@ version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a" checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"fnv", "fnv",
"lazy_static", "lazy_static",
"memchr", "memchr",
"parking_lot", "parking_lot",
"protobuf", "protobuf",
"thiserror 2.0.12", "thiserror 2.0.16",
] ]
[[package]] [[package]]
...@@ -5646,13 +5660,13 @@ checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f" ...@@ -5646,13 +5660,13 @@ checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f"
dependencies = [ dependencies = [
"bit-set 0.8.0", "bit-set 0.8.0",
"bit-vec 0.8.0", "bit-vec 0.8.0",
"bitflags 2.9.1", "bitflags 2.9.3",
"lazy_static", "lazy_static",
"num-traits", "num-traits",
"rand 0.9.2", "rand 0.9.2",
"rand_chacha 0.9.0", "rand_chacha 0.9.0",
"rand_xorshift", "rand_xorshift",
"regex-syntax 0.8.5", "regex-syntax 0.8.6",
"rusty-fork", "rusty-fork",
"tempfile", "tempfile",
"unarray", "unarray",
...@@ -5684,7 +5698,7 @@ dependencies = [ ...@@ -5684,7 +5698,7 @@ dependencies = [
"prost", "prost",
"prost-types", "prost-types",
"regex", "regex",
"syn 2.0.104", "syn 2.0.106",
"tempfile", "tempfile",
] ]
...@@ -5698,7 +5712,7 @@ dependencies = [ ...@@ -5698,7 +5712,7 @@ dependencies = [
"itertools 0.14.0", "itertools 0.14.0",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -5749,7 +5763,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -5749,7 +5763,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96b86df24f0a7ddd5e4b95c94fc9ed8a98f1ca94d3b01bdce2824097e7835907" checksum = "96b86df24f0a7ddd5e4b95c94fc9ed8a98f1ca94d3b01bdce2824097e7835907"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"libm", "libm",
"num-complex", "num-complex",
"reborrow", "reborrow",
...@@ -5779,9 +5793,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" ...@@ -5779,9 +5793,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
[[package]] [[package]]
name = "quick-xml" name = "quick-xml"
version = "0.38.0" version = "0.38.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8927b0664f5c5a98265138b7e3f90aa19a6b21353182469ace36d4ac527b7b1b" checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89"
dependencies = [ dependencies = [
"memchr", "memchr",
] ]
...@@ -5800,7 +5814,7 @@ dependencies = [ ...@@ -5800,7 +5814,7 @@ dependencies = [
"rustc-hash 2.1.1", "rustc-hash 2.1.1",
"rustls", "rustls",
"socket2 0.5.10", "socket2 0.5.10",
"thiserror 2.0.12", "thiserror 2.0.16",
"tokio", "tokio",
"tracing", "tracing",
"web-time", "web-time",
...@@ -5821,7 +5835,7 @@ dependencies = [ ...@@ -5821,7 +5835,7 @@ dependencies = [
"rustls", "rustls",
"rustls-pki-types", "rustls-pki-types",
"slab", "slab",
"thiserror 2.0.12", "thiserror 2.0.16",
"tinyvec", "tinyvec",
"tracing", "tracing",
"web-time", "web-time",
...@@ -5975,7 +5989,7 @@ dependencies = [ ...@@ -5975,7 +5989,7 @@ dependencies = [
"av1-grain", "av1-grain",
"bitstream-io", "bitstream-io",
"built", "built",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"interpolate_name", "interpolate_name",
"itertools 0.12.1", "itertools 0.12.1",
"libc", "libc",
...@@ -6028,7 +6042,7 @@ version = "11.5.0" ...@@ -6028,7 +6042,7 @@ version = "11.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146" checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
] ]
[[package]] [[package]]
...@@ -6039,9 +6053,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" ...@@ -6039,9 +6053,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
[[package]] [[package]]
name = "rayon" name = "rayon"
version = "1.10.0" version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
dependencies = [ dependencies = [
"either", "either",
"rayon-core", "rayon-core",
...@@ -6060,9 +6074,9 @@ dependencies = [ ...@@ -6060,9 +6074,9 @@ dependencies = [
[[package]] [[package]]
name = "rayon-core" name = "rayon-core"
version = "1.12.1" version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [ dependencies = [
"crossbeam-deque", "crossbeam-deque",
"crossbeam-utils", "crossbeam-utils",
...@@ -6095,7 +6109,7 @@ version = "0.5.17" ...@@ -6095,7 +6109,7 @@ version = "0.5.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
] ]
[[package]] [[package]]
...@@ -6106,7 +6120,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" ...@@ -6106,7 +6120,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
dependencies = [ dependencies = [
"getrandom 0.2.16", "getrandom 0.2.16",
"libredox", "libredox",
"thiserror 2.0.12", "thiserror 2.0.16",
] ]
[[package]] [[package]]
...@@ -6126,19 +6140,19 @@ checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7" ...@@ -6126,19 +6140,19 @@ checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
name = "regex" name = "regex"
version = "1.11.1" version = "1.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"memchr", "memchr",
"regex-automata 0.4.9", "regex-automata 0.4.10",
"regex-syntax 0.8.5", "regex-syntax 0.8.6",
] ]
[[package]] [[package]]
...@@ -6152,13 +6166,13 @@ dependencies = [ ...@@ -6152,13 +6166,13 @@ dependencies = [
[[package]] [[package]]
name = "regex-automata" name = "regex-automata"
version = "0.4.9" version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"memchr", "memchr",
"regex-syntax 0.8.5", "regex-syntax 0.8.6",
] ]
[[package]] [[package]]
...@@ -6169,9 +6183,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" ...@@ -6169,9 +6183,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]] [[package]]
name = "regex-syntax" name = "regex-syntax"
version = "0.8.5" version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
[[package]] [[package]]
name = "relative-path" name = "relative-path"
...@@ -6217,9 +6231,9 @@ dependencies = [ ...@@ -6217,9 +6231,9 @@ dependencies = [
[[package]] [[package]]
name = "reqwest" name = "reqwest"
version = "0.12.22" version = "0.12.23"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531" checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb"
dependencies = [ dependencies = [
"base64 0.22.1", "base64 0.22.1",
"bytes", "bytes",
...@@ -6227,11 +6241,11 @@ dependencies = [ ...@@ -6227,11 +6241,11 @@ dependencies = [
"futures-channel", "futures-channel",
"futures-core", "futures-core",
"futures-util", "futures-util",
"h2 0.4.11", "h2 0.4.12",
"http 1.3.1", "http 1.3.1",
"http-body 1.0.1", "http-body 1.0.1",
"http-body-util", "http-body-util",
"hyper 1.6.0", "hyper 1.7.0",
"hyper-rustls", "hyper-rustls",
"hyper-tls", "hyper-tls",
"hyper-util", "hyper-util",
...@@ -6277,7 +6291,7 @@ dependencies = [ ...@@ -6277,7 +6291,7 @@ dependencies = [
"mime", "mime",
"nom 7.1.3", "nom 7.1.3",
"pin-project-lite", "pin-project-lite",
"reqwest 0.12.22", "reqwest 0.12.23",
"thiserror 1.0.69", "thiserror 1.0.69",
] ]
...@@ -6294,7 +6308,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -6294,7 +6308,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
dependencies = [ dependencies = [
"cc", "cc",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"getrandom 0.2.16", "getrandom 0.2.16",
"libc", "libc",
"untrusted", "untrusted",
...@@ -6353,14 +6367,14 @@ version = "0.18.2" ...@@ -6353,14 +6367,14 @@ version = "0.18.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d428f8247852f894ee1be110b375111b586d4fa431f6c46e64ba5a0dcccbe605" checksum = "d428f8247852f894ee1be110b375111b586d4fa431f6c46e64ba5a0dcccbe605"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"glob", "glob",
"proc-macro2", "proc-macro2",
"quote", "quote",
"regex", "regex",
"relative-path", "relative-path",
"rustc_version", "rustc_version",
"syn 2.0.104", "syn 2.0.106",
"unicode-ident", "unicode-ident",
] ]
...@@ -6370,7 +6384,7 @@ version = "0.23.0" ...@@ -6370,7 +6384,7 @@ version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "825ea780781b15345a146be27eaefb05085e337e869bff01b4306a4fd4a9ad5a" checksum = "825ea780781b15345a146be27eaefb05085e337e869bff01b4306a4fd4a9ad5a"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"glob", "glob",
"proc-macro-crate", "proc-macro-crate",
"proc-macro2", "proc-macro2",
...@@ -6378,7 +6392,7 @@ dependencies = [ ...@@ -6378,7 +6392,7 @@ dependencies = [
"regex", "regex",
"relative-path", "relative-path",
"rustc_version", "rustc_version",
"syn 2.0.104", "syn 2.0.106",
"unicode-ident", "unicode-ident",
] ]
...@@ -6390,7 +6404,7 @@ checksum = "b3a8fb4672e840a587a66fc577a5491375df51ddb88f2a2c2a792598c326fe14" ...@@ -6390,7 +6404,7 @@ checksum = "b3a8fb4672e840a587a66fc577a5491375df51ddb88f2a2c2a792598c326fe14"
dependencies = [ dependencies = [
"quote", "quote",
"rand 0.8.5", "rand 0.8.5",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -6472,7 +6486,7 @@ version = "0.38.44" ...@@ -6472,7 +6486,7 @@ version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"errno", "errno",
"libc", "libc",
"linux-raw-sys 0.4.15", "linux-raw-sys 0.4.15",
...@@ -6485,7 +6499,7 @@ version = "1.0.8" ...@@ -6485,7 +6499,7 @@ version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"errno", "errno",
"libc", "libc",
"linux-raw-sys 0.9.4", "linux-raw-sys 0.9.4",
...@@ -6530,7 +6544,7 @@ dependencies = [ ...@@ -6530,7 +6544,7 @@ dependencies = [
"openssl-probe", "openssl-probe",
"rustls-pki-types", "rustls-pki-types",
"schannel", "schannel",
"security-framework 3.2.0", "security-framework 3.3.0",
] ]
[[package]] [[package]]
...@@ -6576,9 +6590,9 @@ dependencies = [ ...@@ -6576,9 +6590,9 @@ dependencies = [
[[package]] [[package]]
name = "rustversion" name = "rustversion"
version = "1.0.21" version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]] [[package]]
name = "rusty-fork" name = "rusty-fork"
...@@ -6619,9 +6633,9 @@ dependencies = [ ...@@ -6619,9 +6633,9 @@ dependencies = [
[[package]] [[package]]
name = "safetensors" name = "safetensors"
version = "0.6.1" version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3acc594eede59bd7facbb55c179d1d67980f412c201544b21fce7f9ac3aab4a5" checksum = "172dd94c5a87b5c79f945c863da53b2ebc7ccef4eca24ac63cca66a41aab2178"
dependencies = [ dependencies = [
"serde", "serde",
"serde_json", "serde_json",
...@@ -6638,9 +6652,9 @@ dependencies = [ ...@@ -6638,9 +6652,9 @@ dependencies = [
[[package]] [[package]]
name = "scc" name = "scc"
version = "2.3.4" version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22b2d775fb28f245817589471dd49c5edf64237f4a19d10ce9a92ff4651a27f4" checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc"
dependencies = [ dependencies = [
"sdd", "sdd",
] ]
...@@ -6699,7 +6713,7 @@ dependencies = [ ...@@ -6699,7 +6713,7 @@ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"serde_derive_internals", "serde_derive_internals",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -6745,7 +6759,7 @@ version = "2.11.1" ...@@ -6745,7 +6759,7 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"core-foundation 0.9.4", "core-foundation 0.9.4",
"core-foundation-sys", "core-foundation-sys",
"libc", "libc",
...@@ -6754,11 +6768,11 @@ dependencies = [ ...@@ -6754,11 +6768,11 @@ dependencies = [
[[package]] [[package]]
name = "security-framework" name = "security-framework"
version = "3.2.0" version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"core-foundation 0.10.1", "core-foundation 0.10.1",
"core-foundation-sys", "core-foundation-sys",
"libc", "libc",
...@@ -6781,7 +6795,7 @@ version = "0.26.0" ...@@ -6781,7 +6795,7 @@ version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"cssparser", "cssparser",
"derive_more 0.99.20", "derive_more 0.99.20",
"fxhash", "fxhash",
...@@ -6848,7 +6862,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" ...@@ -6848,7 +6862,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -6859,16 +6873,16 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" ...@@ -6859,16 +6873,16 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.142" version = "1.0.143"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
dependencies = [ dependencies = [
"indexmap 2.10.0", "indexmap 2.11.0",
"itoa", "itoa",
"memchr", "memchr",
"ryu", "ryu",
...@@ -6911,7 +6925,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" ...@@ -6911,7 +6925,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -6945,7 +6959,7 @@ dependencies = [ ...@@ -6945,7 +6959,7 @@ dependencies = [
"chrono", "chrono",
"hex", "hex",
"indexmap 1.9.3", "indexmap 1.9.3",
"indexmap 2.10.0", "indexmap 2.11.0",
"schemars 0.9.0", "schemars 0.9.0",
"schemars 1.0.4", "schemars 1.0.4",
"serde", "serde",
...@@ -6964,7 +6978,7 @@ dependencies = [ ...@@ -6964,7 +6978,7 @@ dependencies = [
"darling 0.20.11", "darling 0.20.11",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -6973,7 +6987,7 @@ version = "0.9.34+deprecated" ...@@ -6973,7 +6987,7 @@ version = "0.9.34+deprecated"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
dependencies = [ dependencies = [
"indexmap 2.10.0", "indexmap 2.11.0",
"itoa", "itoa",
"ryu", "ryu",
"serde", "serde",
...@@ -7002,7 +7016,7 @@ checksum = "5d69265a08751de7844521fd15003ae0a888e035773ba05695c5c759a6f89eef" ...@@ -7002,7 +7016,7 @@ checksum = "5d69265a08751de7844521fd15003ae0a888e035773ba05695c5c759a6f89eef"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -7020,7 +7034,7 @@ version = "0.10.6" ...@@ -7020,7 +7034,7 @@ version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"cpufeatures", "cpufeatures",
"digest", "digest",
] ]
...@@ -7031,7 +7045,7 @@ version = "0.10.9" ...@@ -7031,7 +7045,7 @@ version = "0.10.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"cpufeatures", "cpufeatures",
"digest", "digest",
] ]
...@@ -7080,9 +7094,9 @@ dependencies = [ ...@@ -7080,9 +7094,9 @@ dependencies = [
[[package]] [[package]]
name = "signal-hook-registry" name = "signal-hook-registry"
version = "1.4.5" version = "1.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410" checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b"
dependencies = [ dependencies = [
"libc", "libc",
] ]
...@@ -7151,9 +7165,9 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" ...@@ -7151,9 +7165,9 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
[[package]] [[package]]
name = "slab" name = "slab"
version = "0.4.10" version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
[[package]] [[package]]
name = "smallvec" name = "smallvec"
...@@ -7330,7 +7344,7 @@ dependencies = [ ...@@ -7330,7 +7344,7 @@ dependencies = [
"heck 0.5.0", "heck 0.5.0",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -7487,9 +7501,9 @@ dependencies = [ ...@@ -7487,9 +7501,9 @@ dependencies = [
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.104" version = "2.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
...@@ -7519,7 +7533,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" ...@@ -7519,7 +7533,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -7528,7 +7542,7 @@ version = "0.5.5" ...@@ -7528,7 +7542,7 @@ version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"byteorder", "byteorder",
"enum-as-inner", "enum-as-inner",
"libc", "libc",
...@@ -7542,7 +7556,7 @@ version = "0.6.0" ...@@ -7542,7 +7556,7 @@ version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc" checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"byteorder", "byteorder",
"enum-as-inner", "enum-as-inner",
"libc", "libc",
...@@ -7556,7 +7570,7 @@ version = "0.30.13" ...@@ -7556,7 +7570,7 @@ version = "0.30.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3" checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"core-foundation-sys", "core-foundation-sys",
"libc", "libc",
"ntapi", "ntapi",
...@@ -7582,7 +7596,7 @@ version = "0.6.1" ...@@ -7582,7 +7596,7 @@ version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"core-foundation 0.9.4", "core-foundation 0.9.4",
"system-configuration-sys 0.6.0", "system-configuration-sys 0.6.0",
] ]
...@@ -7638,15 +7652,15 @@ dependencies = [ ...@@ -7638,15 +7652,15 @@ dependencies = [
[[package]] [[package]]
name = "tempfile" name = "tempfile"
version = "3.20.0" version = "3.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" checksum = "15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e"
dependencies = [ dependencies = [
"fastrand", "fastrand",
"getrandom 0.3.3", "getrandom 0.3.3",
"once_cell", "once_cell",
"rustix 1.0.8", "rustix 1.0.8",
"windows-sys 0.59.0", "windows-sys 0.60.2",
] ]
[[package]] [[package]]
...@@ -7662,12 +7676,12 @@ dependencies = [ ...@@ -7662,12 +7676,12 @@ dependencies = [
[[package]] [[package]]
name = "terminal_size" name = "terminal_size"
version = "0.4.2" version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45c6481c4829e4cc63825e62c49186a34538b7b2750b73b266581ffb612fb5ed" checksum = "60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0"
dependencies = [ dependencies = [
"rustix 1.0.8", "rustix 1.0.8",
"windows-sys 0.59.0", "windows-sys 0.60.2",
] ]
[[package]] [[package]]
...@@ -7690,11 +7704,11 @@ dependencies = [ ...@@ -7690,11 +7704,11 @@ dependencies = [
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "2.0.12" version = "2.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0"
dependencies = [ dependencies = [
"thiserror-impl 2.0.12", "thiserror-impl 2.0.16",
] ]
[[package]] [[package]]
...@@ -7705,18 +7719,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" ...@@ -7705,18 +7719,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
name = "thiserror-impl" name = "thiserror-impl"
version = "2.0.12" version = "2.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -7725,7 +7739,7 @@ version = "1.1.9" ...@@ -7725,7 +7739,7 @@ version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
] ]
[[package]] [[package]]
...@@ -7794,9 +7808,9 @@ dependencies = [ ...@@ -7794,9 +7808,9 @@ dependencies = [
[[package]] [[package]]
name = "tinyvec" name = "tinyvec"
version = "1.9.0" version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
dependencies = [ dependencies = [
"tinyvec_macros", "tinyvec_macros",
] ]
...@@ -7845,11 +7859,11 @@ dependencies = [ ...@@ -7845,11 +7859,11 @@ dependencies = [
"rayon", "rayon",
"rayon-cond", "rayon-cond",
"regex", "regex",
"regex-syntax 0.8.5", "regex-syntax 0.8.6",
"serde", "serde",
"serde_json", "serde_json",
"spm_precompiled", "spm_precompiled",
"thiserror 2.0.12", "thiserror 2.0.16",
"unicode-normalization-alignments", "unicode-normalization-alignments",
"unicode-segmentation", "unicode-segmentation",
"unicode_categories", "unicode_categories",
...@@ -7884,7 +7898,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" ...@@ -7884,7 +7898,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -7967,16 +7981,15 @@ dependencies = [ ...@@ -7967,16 +7981,15 @@ dependencies = [
[[package]] [[package]]
name = "tokio-util" name = "tokio-util"
version = "0.7.15" version = "0.7.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df" checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5"
dependencies = [ dependencies = [
"bytes", "bytes",
"futures-core", "futures-core",
"futures-io", "futures-io",
"futures-sink", "futures-sink",
"futures-util", "futures-util",
"hashbrown 0.15.4",
"pin-project-lite", "pin-project-lite",
"tokio", "tokio",
] ]
...@@ -8016,9 +8029,9 @@ dependencies = [ ...@@ -8016,9 +8029,9 @@ dependencies = [
[[package]] [[package]]
name = "toktrie" name = "toktrie"
version = "1.1.1" version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c01fe70e9a91498c029fb6d5aacf9a648bb1bf30a5db4c344d3effb6367b1f3" checksum = "804459e7818d5cc8920f80d555526454353117a4f2fcda38e4b38666639d7e81"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bytemuck", "bytemuck",
...@@ -8042,16 +8055,16 @@ dependencies = [ ...@@ -8042,16 +8055,16 @@ dependencies = [
[[package]] [[package]]
name = "toktrie_hf_tokenizers" name = "toktrie_hf_tokenizers"
version = "1.1.1" version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "759491ad9b56050f817e24d68f48eb7f13bee5f8b48127ba3a9079a579726f40" checksum = "6fc302ed2c53b1d7fcbe3d760a2dc6567def0eb2da7ca09cb21a16b9a9aa4f13"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"log", "log",
"serde", "serde",
"serde_json", "serde_json",
"tokenizers", "tokenizers",
"toktrie 1.1.1", "toktrie 1.2.0",
] ]
[[package]] [[package]]
...@@ -8081,7 +8094,7 @@ version = "0.22.27" ...@@ -8081,7 +8094,7 @@ version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [ dependencies = [
"indexmap 2.10.0", "indexmap 2.11.0",
"serde", "serde",
"serde_spanned", "serde_spanned",
"toml_datetime", "toml_datetime",
...@@ -8106,11 +8119,11 @@ dependencies = [ ...@@ -8106,11 +8119,11 @@ dependencies = [
"axum 0.7.9", "axum 0.7.9",
"base64 0.22.1", "base64 0.22.1",
"bytes", "bytes",
"h2 0.4.11", "h2 0.4.12",
"http 1.3.1", "http 1.3.1",
"http-body 1.0.1", "http-body 1.0.1",
"http-body-util", "http-body-util",
"hyper 1.6.0", "hyper 1.7.0",
"hyper-timeout", "hyper-timeout",
"hyper-util", "hyper-util",
"percent-encoding", "percent-encoding",
...@@ -8135,11 +8148,11 @@ dependencies = [ ...@@ -8135,11 +8148,11 @@ dependencies = [
"axum 0.8.4", "axum 0.8.4",
"base64 0.22.1", "base64 0.22.1",
"bytes", "bytes",
"h2 0.4.11", "h2 0.4.12",
"http 1.3.1", "http 1.3.1",
"http-body 1.0.1", "http-body 1.0.1",
"http-body-util", "http-body-util",
"hyper 1.6.0", "hyper 1.7.0",
"hyper-timeout", "hyper-timeout",
"hyper-util", "hyper-util",
"percent-encoding", "percent-encoding",
...@@ -8166,7 +8179,7 @@ dependencies = [ ...@@ -8166,7 +8179,7 @@ dependencies = [
"prost-build", "prost-build",
"prost-types", "prost-types",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -8197,7 +8210,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" ...@@ -8197,7 +8210,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
dependencies = [ dependencies = [
"futures-core", "futures-core",
"futures-util", "futures-util",
"indexmap 2.10.0", "indexmap 2.11.0",
"pin-project-lite", "pin-project-lite",
"slab", "slab",
"sync_wrapper 1.0.2", "sync_wrapper 1.0.2",
...@@ -8214,7 +8227,7 @@ version = "0.6.6" ...@@ -8214,7 +8227,7 @@ version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
"bytes", "bytes",
"futures-util", "futures-util",
"http 1.3.1", "http 1.3.1",
...@@ -8270,7 +8283,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" ...@@ -8270,7 +8283,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -8379,7 +8392,7 @@ dependencies = [ ...@@ -8379,7 +8392,7 @@ dependencies = [
"bytes", "bytes",
"log", "log",
"rand 0.9.2", "rand 0.9.2",
"thiserror 2.0.12", "thiserror 2.0.16",
"utf-8", "utf-8",
] ]
...@@ -8548,9 +8561,9 @@ dependencies = [ ...@@ -8548,9 +8561,9 @@ dependencies = [
[[package]] [[package]]
name = "url" name = "url"
version = "2.5.4" version = "2.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b"
dependencies = [ dependencies = [
"form_urlencoded", "form_urlencoded",
"idna", "idna",
...@@ -8588,7 +8601,7 @@ version = "5.4.0" ...@@ -8588,7 +8601,7 @@ version = "5.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fcc29c80c21c31608227e0912b2d7fddba57ad76b606890627ba8ee7964e993" checksum = "2fcc29c80c21c31608227e0912b2d7fddba57ad76b606890627ba8ee7964e993"
dependencies = [ dependencies = [
"indexmap 2.10.0", "indexmap 2.11.0",
"serde", "serde",
"serde_json", "serde_json",
"utoipa-gen", "utoipa-gen",
...@@ -8602,7 +8615,7 @@ checksum = "6d79d08d92ab8af4c5e8a6da20c47ae3f61a0f1dabc1997cdf2d082b757ca08b" ...@@ -8602,7 +8615,7 @@ checksum = "6d79d08d92ab8af4c5e8a6da20c47ae3f61a0f1dabc1997cdf2d082b757ca08b"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -8616,9 +8629,9 @@ dependencies = [ ...@@ -8616,9 +8629,9 @@ dependencies = [
[[package]] [[package]]
name = "uuid" name = "uuid"
version = "1.17.0" version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" checksum = "f33196643e165781c20a5ead5582283a7dacbb87855d867fbc2df3f81eddc1be"
dependencies = [ dependencies = [
"getrandom 0.3.3", "getrandom 0.3.3",
"js-sys", "js-sys",
...@@ -8664,7 +8677,7 @@ dependencies = [ ...@@ -8664,7 +8677,7 @@ dependencies = [
"proc-macro-error2", "proc-macro-error2",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -8744,9 +8757,9 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" ...@@ -8744,9 +8757,9 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]] [[package]]
name = "vob" name = "vob"
version = "3.0.5" version = "3.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0baa046ba374a7701d98032a468a0bbd968a8cd3a2ae39c94d74e211fac05c81" checksum = "bc936b5a7202a703aeaf7ce05e7931db2e0c8126813f97db3e9e06d867b0bb38"
dependencies = [ dependencies = [
"num-traits", "num-traits",
"serde", "serde",
...@@ -8801,7 +8814,7 @@ version = "0.2.100" ...@@ -8801,7 +8814,7 @@ version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"once_cell", "once_cell",
"rustversion", "rustversion",
"wasm-bindgen-macro", "wasm-bindgen-macro",
...@@ -8817,7 +8830,7 @@ dependencies = [ ...@@ -8817,7 +8830,7 @@ dependencies = [
"log", "log",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
...@@ -8827,7 +8840,7 @@ version = "0.4.50" ...@@ -8827,7 +8840,7 @@ version = "0.4.50"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"js-sys", "js-sys",
"once_cell", "once_cell",
"wasm-bindgen", "wasm-bindgen",
...@@ -8852,7 +8865,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" ...@@ -8852,7 +8865,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
"wasm-bindgen-backend", "wasm-bindgen-backend",
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
...@@ -8993,11 +9006,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" ...@@ -8993,11 +9006,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]] [[package]]
name = "winapi-util" name = "winapi-util"
version = "0.1.9" version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" checksum = "0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22"
dependencies = [ dependencies = [
"windows-sys 0.59.0", "windows-sys 0.60.2",
] ]
[[package]] [[package]]
...@@ -9046,7 +9059,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" ...@@ -9046,7 +9059,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -9057,7 +9070,7 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" ...@@ -9057,7 +9070,7 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -9319,9 +9332,9 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" ...@@ -9319,9 +9332,9 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
[[package]] [[package]]
name = "winnow" name = "winnow"
version = "0.7.12" version = "0.7.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95" checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf"
dependencies = [ dependencies = [
"memchr", "memchr",
] ]
...@@ -9332,7 +9345,7 @@ version = "0.50.0" ...@@ -9332,7 +9345,7 @@ version = "0.50.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
...@@ -9342,7 +9355,7 @@ version = "0.39.0" ...@@ -9342,7 +9355,7 @@ version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [ dependencies = [
"bitflags 2.9.1", "bitflags 2.9.3",
] ]
[[package]] [[package]]
...@@ -9405,7 +9418,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" ...@@ -9405,7 +9418,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
"synstructure", "synstructure",
] ]
...@@ -9417,7 +9430,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" ...@@ -9417,7 +9430,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
"synstructure", "synstructure",
] ]
...@@ -9438,7 +9451,7 @@ checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" ...@@ -9438,7 +9451,7 @@ checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -9458,7 +9471,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" ...@@ -9458,7 +9471,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
"synstructure", "synstructure",
] ]
...@@ -9492,7 +9505,7 @@ dependencies = [ ...@@ -9492,7 +9505,7 @@ dependencies = [
"thiserror 1.0.69", "thiserror 1.0.69",
"tokio", "tokio",
"tokio-util", "tokio-util",
"uuid 1.17.0", "uuid 1.18.0",
] ]
[[package]] [[package]]
...@@ -9518,9 +9531,9 @@ dependencies = [ ...@@ -9518,9 +9531,9 @@ dependencies = [
[[package]] [[package]]
name = "zerovec" name = "zerovec"
version = "0.11.2" version = "0.11.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428" checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b"
dependencies = [ dependencies = [
"yoke 0.8.0", "yoke 0.8.0",
"zerofrom", "zerofrom",
...@@ -9535,7 +9548,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" ...@@ -9535,7 +9548,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.104", "syn 2.0.106",
] ]
[[package]] [[package]]
...@@ -9548,7 +9561,7 @@ dependencies = [ ...@@ -9548,7 +9561,7 @@ dependencies = [
"crc32fast", "crc32fast",
"crossbeam-utils", "crossbeam-utils",
"displaydoc", "displaydoc",
"indexmap 2.10.0", "indexmap 2.11.0",
"num_enum", "num_enum",
"thiserror 1.0.69", "thiserror 1.0.69",
] ]
......
...@@ -176,6 +176,12 @@ def parse_args(): ...@@ -176,6 +176,12 @@ def parse_args():
default=None, default=None,
help="Prefix for Dynamo frontend metrics. If unset, uses DYN_METRICS_PREFIX env var or 'dynamo_frontend'.", help="Prefix for Dynamo frontend metrics. If unset, uses DYN_METRICS_PREFIX env var or 'dynamo_frontend'.",
) )
parser.add_argument(
"--kserve-grpc-server",
action="store_true",
default=False,
help="Start KServe gRPC server.",
)
flags = parser.parse_args() flags = parser.parse_args()
...@@ -246,6 +252,8 @@ async def async_main(): ...@@ -246,6 +252,8 @@ async def async_main():
try: try:
if flags.interactive: if flags.interactive:
await run_input(runtime, "text", engine) await run_input(runtime, "text", engine)
elif flags.kserve_grpc_server:
await run_input(runtime, "grpc", engine)
else: else:
await run_input(runtime, "http", engine) await run_input(runtime, "http", engine)
except asyncio.exceptions.CancelledError: except asyncio.exceptions.CancelledError:
......
...@@ -83,7 +83,6 @@ pub async fn run( ...@@ -83,7 +83,6 @@ pub async fn run(
rt.clone(), rt.clone(),
) )
.await?; .await?;
// //
// Run in from an input // Run in from an input
// //
......
...@@ -26,7 +26,7 @@ Example: ...@@ -26,7 +26,7 @@ Example:
See `docs/guides/dynamo_run.md` in the repo for full details. See `docs/guides/dynamo_run.md` in the repo for full details.
"#; "#;
const USAGE: &str = "USAGE: dynamo-run in=[http|text|dyn://<path>|batch:<folder>] out=ENGINE_LIST|auto|dyn://<path> [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>] [--context-length=N] [--kv-cache-block-size=16] [--extra-engine-args=args.json] [--static-worker] [--router-mode random|round-robin|kv] [--kv-overlap-score-weight=2.0] [--router-temperature=0.0] [--use-kv-events] [--max-num-batched-tokens=1.0] [--migration-limit=0] [--verbosity (-v|-vv)]"; const USAGE: &str = "USAGE: dynamo-run in=[http|grpc|text|dyn://<path>|batch:<folder>] out=ENGINE_LIST|auto|dyn://<path> [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>] [--context-length=N] [--kv-cache-block-size=16] [--extra-engine-args=args.json] [--static-worker] [--router-mode random|round-robin|kv] [--kv-overlap-score-weight=2.0] [--router-temperature=0.0] [--use-kv-events] [--max-num-batched-tokens=1.0] [--migration-limit=0] [--verbosity (-v|-vv)]";
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
// Set log level based on verbosity flag // Set log level based on verbosity flag
......
...@@ -23,7 +23,7 @@ version = "0.8.12" ...@@ -23,7 +23,7 @@ version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"getrandom 0.3.3", "getrandom 0.3.3",
"once_cell", "once_cell",
"serde", "serde",
...@@ -465,7 +465,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -465,7 +465,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
dependencies = [ dependencies = [
"addr2line", "addr2line",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"libc", "libc",
"miniz_oxide", "miniz_oxide",
"object", "object",
...@@ -506,7 +506,7 @@ version = "0.69.5" ...@@ -506,7 +506,7 @@ version = "0.69.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
"cexpr", "cexpr",
"clang-sys", "clang-sys",
"itertools 0.12.1", "itertools 0.12.1",
...@@ -529,7 +529,7 @@ version = "0.71.1" ...@@ -529,7 +529,7 @@ version = "0.71.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
"cexpr", "cexpr",
"clang-sys", "clang-sys",
"itertools 0.13.0", "itertools 0.13.0",
...@@ -587,9 +587,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" ...@@ -587,9 +587,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.9.2" version = "2.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a65b545ab31d687cff52899d4890855fec459eb6afe0da6417b8a18da87aa29" checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d"
[[package]] [[package]]
name = "bitstream-io" name = "bitstream-io"
...@@ -606,7 +606,7 @@ dependencies = [ ...@@ -606,7 +606,7 @@ dependencies = [
"arrayref", "arrayref",
"arrayvec", "arrayvec",
"cc", "cc",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"constant_time_eq", "constant_time_eq",
] ]
...@@ -637,7 +637,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -637,7 +637,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [ dependencies = [
"memchr", "memchr",
"regex-automata 0.4.9", "regex-automata 0.4.10",
"serde", "serde",
] ]
...@@ -727,9 +727,9 @@ dependencies = [ ...@@ -727,9 +727,9 @@ dependencies = [
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.2.33" version = "1.2.34"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ee0f8803222ba5a7e2777dd72ca451868909b1ac410621b676adf07280e9b5f" checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc"
dependencies = [ dependencies = [
"jobserver", "jobserver",
"libc", "libc",
...@@ -763,9 +763,9 @@ checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" ...@@ -763,9 +763,9 @@ checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.1" version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
[[package]] [[package]]
name = "cfg_aliases" name = "cfg_aliases"
...@@ -865,7 +865,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -865,7 +865,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a"
dependencies = [ dependencies = [
"castaway", "castaway",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"itoa", "itoa",
"rustversion", "rustversion",
"ryu", "ryu",
...@@ -948,7 +948,7 @@ version = "1.5.0" ...@@ -948,7 +948,7 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
] ]
[[package]] [[package]]
...@@ -1047,7 +1047,7 @@ version = "4.1.3" ...@@ -1047,7 +1047,7 @@ version = "4.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be" checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"cpufeatures", "cpufeatures",
"curve25519-dalek-derive", "curve25519-dalek-derive",
"digest", "digest",
...@@ -1117,7 +1117,7 @@ version = "5.5.3" ...@@ -1117,7 +1117,7 @@ version = "5.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"hashbrown 0.14.5", "hashbrown 0.14.5",
"lock_api", "lock_api",
"once_cell", "once_cell",
...@@ -1342,7 +1342,7 @@ dependencies = [ ...@@ -1342,7 +1342,7 @@ dependencies = [
"secrecy", "secrecy",
"serde", "serde",
"serde_json", "serde_json",
"thiserror 2.0.15", "thiserror 2.0.16",
"tokio", "tokio",
"tokio-stream", "tokio-stream",
"tokio-util", "tokio-util",
...@@ -1394,7 +1394,9 @@ dependencies = [ ...@@ -1394,7 +1394,9 @@ dependencies = [
"nixl-sys", "nixl-sys",
"offset-allocator", "offset-allocator",
"oneshot", "oneshot",
"parking_lot",
"prometheus", "prometheus",
"prost",
"rand 0.9.2", "rand 0.9.2",
"rayon", "rayon",
"regex", "regex",
...@@ -1404,7 +1406,7 @@ dependencies = [ ...@@ -1404,7 +1406,7 @@ dependencies = [
"serde_json", "serde_json",
"strum", "strum",
"tempfile", "tempfile",
"thiserror 2.0.15", "thiserror 2.0.16",
"tmq", "tmq",
"tokenizers", "tokenizers",
"tokio", "tokio",
...@@ -1412,6 +1414,8 @@ dependencies = [ ...@@ -1412,6 +1414,8 @@ dependencies = [
"tokio-util", "tokio-util",
"toktrie", "toktrie",
"toktrie_hf_tokenizers", "toktrie_hf_tokenizers",
"tonic",
"tonic-build",
"tower-http", "tower-http",
"tracing", "tracing",
"unicode-segmentation", "unicode-segmentation",
...@@ -1460,7 +1464,7 @@ dependencies = [ ...@@ -1460,7 +1464,7 @@ dependencies = [
"rstest", "rstest",
"serde", "serde",
"serde_json", "serde_json",
"thiserror 2.0.15", "thiserror 2.0.16",
"tokio", "tokio",
"tokio-stream", "tokio-stream",
"tokio-util", "tokio-util",
...@@ -1505,7 +1509,7 @@ dependencies = [ ...@@ -1505,7 +1509,7 @@ dependencies = [
"serde", "serde",
"serde_json", "serde_json",
"socket2 0.5.10", "socket2 0.5.10",
"thiserror 2.0.15", "thiserror 2.0.16",
"tokio", "tokio",
"tokio-stream", "tokio-stream",
"tokio-util", "tokio-util",
...@@ -1573,7 +1577,7 @@ version = "0.8.35" ...@@ -1573,7 +1577,7 @@ version = "0.8.35"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
] ]
[[package]] [[package]]
...@@ -1733,8 +1737,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -1733,8 +1737,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
dependencies = [ dependencies = [
"bit-set 0.5.3", "bit-set 0.5.3",
"regex-automata 0.4.9", "regex-automata 0.4.10",
"regex-syntax 0.8.5", "regex-syntax 0.8.6",
] ]
[[package]] [[package]]
...@@ -1744,8 +1748,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -1744,8 +1748,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298" checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
dependencies = [ dependencies = [
"bit-set 0.8.0", "bit-set 0.8.0",
"regex-automata 0.4.9", "regex-automata 0.4.10",
"regex-syntax 0.8.5", "regex-syntax 0.8.6",
] ]
[[package]] [[package]]
...@@ -1810,9 +1814,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" ...@@ -1810,9 +1814,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]] [[package]]
name = "form_urlencoded" name = "form_urlencoded"
version = "1.2.1" version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
dependencies = [ dependencies = [
"percent-encoding", "percent-encoding",
] ]
...@@ -2207,7 +2211,7 @@ version = "0.2.16" ...@@ -2207,7 +2211,7 @@ version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"js-sys", "js-sys",
"libc", "libc",
"wasi 0.11.1+wasi-snapshot-preview1", "wasi 0.11.1+wasi-snapshot-preview1",
...@@ -2220,7 +2224,7 @@ version = "0.3.3" ...@@ -2220,7 +2224,7 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"js-sys", "js-sys",
"libc", "libc",
"r-efi", "r-efi",
...@@ -2247,7 +2251,7 @@ checksum = "3ac5654356c6f7f6116905aeaf92ab002c3d03414ada5dbe0bb2e32aa5fea173" ...@@ -2247,7 +2251,7 @@ checksum = "3ac5654356c6f7f6116905aeaf92ab002c3d03414ada5dbe0bb2e32aa5fea173"
dependencies = [ dependencies = [
"fancy-regex 0.14.0", "fancy-regex 0.14.0",
"ggml-quants", "ggml-quants",
"indexmap 2.10.0", "indexmap 2.11.0",
"log", "log",
"num_enum", "num_enum",
] ]
...@@ -2286,7 +2290,7 @@ dependencies = [ ...@@ -2286,7 +2290,7 @@ dependencies = [
"futures-core", "futures-core",
"futures-sink", "futures-sink",
"http", "http",
"indexmap 2.10.0", "indexmap 2.11.0",
"slab", "slab",
"tokio", "tokio",
"tokio-util", "tokio-util",
...@@ -2300,7 +2304,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -2300,7 +2304,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"crunchy", "crunchy",
"num-traits", "num-traits",
"rand 0.9.2", "rand 0.9.2",
...@@ -2360,7 +2364,7 @@ dependencies = [ ...@@ -2360,7 +2364,7 @@ dependencies = [
"reqwest", "reqwest",
"serde", "serde",
"serde_json", "serde_json",
"thiserror 2.0.15", "thiserror 2.0.16",
"tokio", "tokio",
"ureq", "ureq",
"windows-sys 0.60.2", "windows-sys 0.60.2",
...@@ -2625,9 +2629,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" ...@@ -2625,9 +2629,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]] [[package]]
name = "idna" name = "idna"
version = "1.0.3" version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
dependencies = [ dependencies = [
"idna_adapter", "idna_adapter",
"smallvec", "smallvec",
...@@ -2696,9 +2700,9 @@ dependencies = [ ...@@ -2696,9 +2700,9 @@ dependencies = [
[[package]] [[package]]
name = "indexmap" name = "indexmap"
version = "2.10.0" version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9"
dependencies = [ dependencies = [
"equivalent", "equivalent",
"hashbrown 0.15.5", "hashbrown 0.15.5",
...@@ -2736,7 +2740,7 @@ version = "0.1.13" ...@@ -2736,7 +2740,7 @@ version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
] ]
[[package]] [[package]]
...@@ -2752,21 +2756,21 @@ dependencies = [ ...@@ -2752,21 +2756,21 @@ dependencies = [
[[package]] [[package]]
name = "inventory" name = "inventory"
version = "0.3.20" version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab08d7cd2c5897f2c949e5383ea7c7db03fb19130ffcfbf7eda795137ae3cb83" checksum = "bc61209c082fbeb19919bee74b176221b27223e27b65d781eb91af24eb1fb46e"
dependencies = [ dependencies = [
"rustversion", "rustversion",
] ]
[[package]] [[package]]
name = "io-uring" name = "io-uring"
version = "0.7.9" version = "0.7.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"libc", "libc",
] ]
...@@ -2836,9 +2840,9 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" ...@@ -2836,9 +2840,9 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]] [[package]]
name = "jobserver" name = "jobserver"
version = "0.1.33" version = "0.1.34"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
dependencies = [ dependencies = [
"getrandom 0.3.3", "getrandom 0.3.3",
"libc", "libc",
...@@ -2920,7 +2924,7 @@ version = "0.8.8" ...@@ -2920,7 +2924,7 @@ version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"windows-targets 0.53.3", "windows-targets 0.53.3",
] ]
...@@ -2936,7 +2940,7 @@ version = "0.1.9" ...@@ -2936,7 +2940,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
"libc", "libc",
] ]
...@@ -2966,7 +2970,7 @@ checksum = "656b3b27f8893f7bbf9485148ff9a65f019e3f33bd5cdc87c83cab16b3fd9ec8" ...@@ -2966,7 +2970,7 @@ checksum = "656b3b27f8893f7bbf9485148ff9a65f019e3f33bd5cdc87c83cab16b3fd9ec8"
dependencies = [ dependencies = [
"libc", "libc",
"neli", "neli",
"thiserror 2.0.15", "thiserror 2.0.16",
"windows-sys 0.59.0", "windows-sys 0.59.0",
] ]
...@@ -3048,7 +3052,7 @@ version = "0.1.1" ...@@ -3048,7 +3052,7 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519" checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"rayon", "rayon",
] ]
...@@ -3060,9 +3064,9 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" ...@@ -3060,9 +3064,9 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]] [[package]]
name = "memmap2" name = "memmap2"
version = "0.9.7" version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "483758ad303d734cec05e5c12b41d7e93e6a6390c5e9dae6bdeb7c1259012d28" checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7"
dependencies = [ dependencies = [
"libc", "libc",
"stable_deref_trait", "stable_deref_trait",
...@@ -3110,9 +3114,9 @@ dependencies = [ ...@@ -3110,9 +3114,9 @@ dependencies = [
[[package]] [[package]]
name = "minijinja" name = "minijinja"
version = "2.11.0" version = "2.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e60ac08614cc09062820e51d5d94c2fce16b94ea4e5003bb81b99a95f84e876" checksum = "a9f264d75233323f4b7d2f03aefe8a990690cdebfbfe26ea86bcbaec5e9ac990"
dependencies = [ dependencies = [
"memo-map", "memo-map",
"self_cell", "self_cell",
...@@ -3121,9 +3125,9 @@ dependencies = [ ...@@ -3121,9 +3125,9 @@ dependencies = [
[[package]] [[package]]
name = "minijinja-contrib" name = "minijinja-contrib"
version = "2.11.0" version = "2.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f93e5bfa889f16d8c10ec92ac964074a68a7206c0fd9748ff23a31942c85d97c" checksum = "182ba1438db4679ddfa03792c183bdc2b9ce26b58e7d41a749e59b06497cf136"
dependencies = [ dependencies = [
"minijinja", "minijinja",
"serde", "serde",
...@@ -3289,7 +3293,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -3289,7 +3293,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
dependencies = [ dependencies = [
"bitflags 1.3.2", "bitflags 1.3.2",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"libc", "libc",
"memoffset 0.7.1", "memoffset 0.7.1",
"pin-utils", "pin-utils",
...@@ -3301,8 +3305,8 @@ version = "0.29.0" ...@@ -3301,8 +3305,8 @@ version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"cfg_aliases", "cfg_aliases",
"libc", "libc",
] ]
...@@ -3319,7 +3323,7 @@ dependencies = [ ...@@ -3319,7 +3323,7 @@ dependencies = [
"os_info", "os_info",
"pkg-config", "pkg-config",
"serde", "serde",
"thiserror 2.0.15", "thiserror 2.0.16",
"tracing", "tracing",
] ]
...@@ -3561,7 +3565,7 @@ version = "6.5.1" ...@@ -3561,7 +3565,7 @@ version = "6.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
"libc", "libc",
"once_cell", "once_cell",
"onig_sys", "onig_sys",
...@@ -3598,7 +3602,7 @@ dependencies = [ ...@@ -3598,7 +3602,7 @@ dependencies = [
"serde_with", "serde_with",
"sha1", "sha1",
"sha2", "sha2",
"thiserror 2.0.15", "thiserror 2.0.16",
] ]
[[package]] [[package]]
...@@ -3653,7 +3657,7 @@ version = "0.9.11" ...@@ -3653,7 +3657,7 @@ version = "0.9.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"libc", "libc",
"redox_syscall", "redox_syscall",
"smallvec", "smallvec",
...@@ -3700,9 +3704,9 @@ dependencies = [ ...@@ -3700,9 +3704,9 @@ dependencies = [
[[package]] [[package]]
name = "percent-encoding" name = "percent-encoding"
version = "2.3.1" version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]] [[package]]
name = "petgraph" name = "petgraph"
...@@ -3711,7 +3715,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -3711,7 +3715,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
dependencies = [ dependencies = [
"fixedbitset", "fixedbitset",
"indexmap 2.10.0", "indexmap 2.11.0",
] ]
[[package]] [[package]]
...@@ -3769,7 +3773,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -3769,7 +3773,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1" checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1"
dependencies = [ dependencies = [
"base64 0.22.1", "base64 0.22.1",
"indexmap 2.10.0", "indexmap 2.11.0",
"quick-xml", "quick-xml",
"serde", "serde",
"time", "time",
...@@ -3915,13 +3919,13 @@ version = "0.14.0" ...@@ -3915,13 +3919,13 @@ version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a" checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"fnv", "fnv",
"lazy_static", "lazy_static",
"memchr", "memchr",
"parking_lot", "parking_lot",
"protobuf", "protobuf",
"thiserror 2.0.15", "thiserror 2.0.16",
] ]
[[package]] [[package]]
...@@ -4015,7 +4019,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -4015,7 +4019,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96b86df24f0a7ddd5e4b95c94fc9ed8a98f1ca94d3b01bdce2824097e7835907" checksum = "96b86df24f0a7ddd5e4b95c94fc9ed8a98f1ca94d3b01bdce2824097e7835907"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"libm", "libm",
"num-complex", "num-complex",
"reborrow", "reborrow",
...@@ -4028,7 +4032,7 @@ version = "0.23.5" ...@@ -4028,7 +4032,7 @@ version = "0.23.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872" checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"indoc", "indoc",
"libc", "libc",
"memoffset 0.9.1", "memoffset 0.9.1",
...@@ -4140,9 +4144,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" ...@@ -4140,9 +4144,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
[[package]] [[package]]
name = "quick-xml" name = "quick-xml"
version = "0.38.1" version = "0.38.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9845d9dccf565065824e69f9f235fafba1587031eda353c1f1561cd6a6be78f4" checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89"
dependencies = [ dependencies = [
"memchr", "memchr",
] ]
...@@ -4161,7 +4165,7 @@ dependencies = [ ...@@ -4161,7 +4165,7 @@ dependencies = [
"rustc-hash 2.1.1", "rustc-hash 2.1.1",
"rustls", "rustls",
"socket2 0.5.10", "socket2 0.5.10",
"thiserror 2.0.15", "thiserror 2.0.16",
"tokio", "tokio",
"tracing", "tracing",
"web-time", "web-time",
...@@ -4182,7 +4186,7 @@ dependencies = [ ...@@ -4182,7 +4186,7 @@ dependencies = [
"rustls", "rustls",
"rustls-pki-types", "rustls-pki-types",
"slab", "slab",
"thiserror 2.0.15", "thiserror 2.0.16",
"tinyvec", "tinyvec",
"tracing", "tracing",
"web-time", "web-time",
...@@ -4298,7 +4302,7 @@ dependencies = [ ...@@ -4298,7 +4302,7 @@ dependencies = [
"av1-grain", "av1-grain",
"bitstream-io", "bitstream-io",
"built", "built",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"interpolate_name", "interpolate_name",
"itertools 0.12.1", "itertools 0.12.1",
"libc", "libc",
...@@ -4351,7 +4355,7 @@ version = "11.5.0" ...@@ -4351,7 +4355,7 @@ version = "11.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146" checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
] ]
[[package]] [[package]]
...@@ -4403,7 +4407,7 @@ version = "0.5.17" ...@@ -4403,7 +4407,7 @@ version = "0.5.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
] ]
[[package]] [[package]]
...@@ -4414,7 +4418,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" ...@@ -4414,7 +4418,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
dependencies = [ dependencies = [
"getrandom 0.2.16", "getrandom 0.2.16",
"libredox", "libredox",
"thiserror 2.0.15", "thiserror 2.0.16",
] ]
[[package]] [[package]]
...@@ -4439,14 +4443,14 @@ dependencies = [ ...@@ -4439,14 +4443,14 @@ dependencies = [
[[package]] [[package]]
name = "regex" name = "regex"
version = "1.11.1" version = "1.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"memchr", "memchr",
"regex-automata 0.4.9", "regex-automata 0.4.10",
"regex-syntax 0.8.5", "regex-syntax 0.8.6",
] ]
[[package]] [[package]]
...@@ -4460,13 +4464,13 @@ dependencies = [ ...@@ -4460,13 +4464,13 @@ dependencies = [
[[package]] [[package]]
name = "regex-automata" name = "regex-automata"
version = "0.4.9" version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"memchr", "memchr",
"regex-syntax 0.8.5", "regex-syntax 0.8.6",
] ]
[[package]] [[package]]
...@@ -4477,9 +4481,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" ...@@ -4477,9 +4481,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]] [[package]]
name = "regex-syntax" name = "regex-syntax"
version = "0.8.5" version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
[[package]] [[package]]
name = "relative-path" name = "relative-path"
...@@ -4563,7 +4567,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -4563,7 +4567,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
dependencies = [ dependencies = [
"cc", "cc",
"cfg-if 1.0.1", "cfg-if 1.0.3",
"getrandom 0.2.16", "getrandom 0.2.16",
"libc", "libc",
"untrusted", "untrusted",
...@@ -4610,7 +4614,7 @@ version = "0.25.0" ...@@ -4610,7 +4614,7 @@ version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f168d99749d307be9de54d23fd226628d99768225ef08f6ffb52e0182a27746" checksum = "1f168d99749d307be9de54d23fd226628d99768225ef08f6ffb52e0182a27746"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"glob", "glob",
"proc-macro-crate", "proc-macro-crate",
"proc-macro2", "proc-macro2",
...@@ -4655,7 +4659,7 @@ version = "0.38.44" ...@@ -4655,7 +4659,7 @@ version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
"errno", "errno",
"libc", "libc",
"linux-raw-sys 0.4.15", "linux-raw-sys 0.4.15",
...@@ -4668,7 +4672,7 @@ version = "1.0.8" ...@@ -4668,7 +4672,7 @@ version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
"errno", "errno",
"libc", "libc",
"linux-raw-sys 0.9.4", "linux-raw-sys 0.9.4",
...@@ -4843,7 +4847,7 @@ version = "2.11.1" ...@@ -4843,7 +4847,7 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
"core-foundation 0.9.4", "core-foundation 0.9.4",
"core-foundation-sys", "core-foundation-sys",
"libc", "libc",
...@@ -4856,7 +4860,7 @@ version = "3.3.0" ...@@ -4856,7 +4860,7 @@ version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c" checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
"core-foundation 0.10.1", "core-foundation 0.10.1",
"core-foundation-sys", "core-foundation-sys",
"libc", "libc",
...@@ -4917,7 +4921,7 @@ version = "1.0.143" ...@@ -4917,7 +4921,7 @@ version = "1.0.143"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
dependencies = [ dependencies = [
"indexmap 2.10.0", "indexmap 2.11.0",
"itoa", "itoa",
"memchr", "memchr",
"ryu", "ryu",
...@@ -4985,7 +4989,7 @@ dependencies = [ ...@@ -4985,7 +4989,7 @@ dependencies = [
"chrono", "chrono",
"hex", "hex",
"indexmap 1.9.3", "indexmap 1.9.3",
"indexmap 2.10.0", "indexmap 2.11.0",
"schemars 0.9.0", "schemars 0.9.0",
"schemars 1.0.4", "schemars 1.0.4",
"serde", "serde",
...@@ -5013,7 +5017,7 @@ version = "0.10.6" ...@@ -5013,7 +5017,7 @@ version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"cpufeatures", "cpufeatures",
"digest", "digest",
] ]
...@@ -5024,7 +5028,7 @@ version = "0.10.9" ...@@ -5024,7 +5028,7 @@ version = "0.10.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"cpufeatures", "cpufeatures",
"digest", "digest",
] ]
...@@ -5254,7 +5258,7 @@ version = "0.5.5" ...@@ -5254,7 +5258,7 @@ version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
"byteorder", "byteorder",
"enum-as-inner", "enum-as-inner",
"libc", "libc",
...@@ -5268,7 +5272,7 @@ version = "0.6.0" ...@@ -5268,7 +5272,7 @@ version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc" checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
"byteorder", "byteorder",
"enum-as-inner", "enum-as-inner",
"libc", "libc",
...@@ -5282,7 +5286,7 @@ version = "0.6.1" ...@@ -5282,7 +5286,7 @@ version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
"core-foundation 0.9.4", "core-foundation 0.9.4",
"system-configuration-sys", "system-configuration-sys",
] ]
...@@ -5318,15 +5322,15 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" ...@@ -5318,15 +5322,15 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
[[package]] [[package]]
name = "tempfile" name = "tempfile"
version = "3.20.0" version = "3.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" checksum = "15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e"
dependencies = [ dependencies = [
"fastrand", "fastrand",
"getrandom 0.3.3", "getrandom 0.3.3",
"once_cell", "once_cell",
"rustix 1.0.8", "rustix 1.0.8",
"windows-sys 0.59.0", "windows-sys 0.60.2",
] ]
[[package]] [[package]]
...@@ -5340,11 +5344,11 @@ dependencies = [ ...@@ -5340,11 +5344,11 @@ dependencies = [
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "2.0.15" version = "2.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80d76d3f064b981389ecb4b6b7f45a0bf9fdac1d5b9204c7bd6714fecc302850" checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0"
dependencies = [ dependencies = [
"thiserror-impl 2.0.15", "thiserror-impl 2.0.16",
] ]
[[package]] [[package]]
...@@ -5360,9 +5364,9 @@ dependencies = [ ...@@ -5360,9 +5364,9 @@ dependencies = [
[[package]] [[package]]
name = "thiserror-impl" name = "thiserror-impl"
version = "2.0.15" version = "2.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d29feb33e986b6ea906bd9c3559a856983f92371b3eaa5e83782a351623de0" checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
...@@ -5375,7 +5379,7 @@ version = "1.1.9" ...@@ -5375,7 +5379,7 @@ version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
] ]
[[package]] [[package]]
...@@ -5485,11 +5489,11 @@ dependencies = [ ...@@ -5485,11 +5489,11 @@ dependencies = [
"rayon", "rayon",
"rayon-cond", "rayon-cond",
"regex", "regex",
"regex-syntax 0.8.5", "regex-syntax 0.8.6",
"serde", "serde",
"serde_json", "serde_json",
"spm_precompiled", "spm_precompiled",
"thiserror 2.0.15", "thiserror 2.0.16",
"unicode-normalization-alignments", "unicode-normalization-alignments",
"unicode-segmentation", "unicode-segmentation",
"unicode_categories", "unicode_categories",
...@@ -5637,7 +5641,7 @@ version = "0.22.27" ...@@ -5637,7 +5641,7 @@ version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [ dependencies = [
"indexmap 2.10.0", "indexmap 2.11.0",
"serde", "serde",
"serde_spanned", "serde_spanned",
"toml_datetime", "toml_datetime",
...@@ -5703,7 +5707,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" ...@@ -5703,7 +5707,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
dependencies = [ dependencies = [
"futures-core", "futures-core",
"futures-util", "futures-util",
"indexmap 2.10.0", "indexmap 2.11.0",
"pin-project-lite", "pin-project-lite",
"slab", "slab",
"sync_wrapper", "sync_wrapper",
...@@ -5720,7 +5724,7 @@ version = "0.6.6" ...@@ -5720,7 +5724,7 @@ version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
"bytes", "bytes",
"futures-util", "futures-util",
"http", "http",
...@@ -5957,9 +5961,9 @@ dependencies = [ ...@@ -5957,9 +5961,9 @@ dependencies = [
[[package]] [[package]]
name = "url" name = "url"
version = "2.5.4" version = "2.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b"
dependencies = [ dependencies = [
"form_urlencoded", "form_urlencoded",
"idna", "idna",
...@@ -6090,7 +6094,7 @@ version = "0.2.100" ...@@ -6090,7 +6094,7 @@ version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"once_cell", "once_cell",
"rustversion", "rustversion",
"wasm-bindgen-macro", "wasm-bindgen-macro",
...@@ -6116,7 +6120,7 @@ version = "0.4.50" ...@@ -6116,7 +6120,7 @@ version = "0.4.50"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
dependencies = [ dependencies = [
"cfg-if 1.0.1", "cfg-if 1.0.3",
"js-sys", "js-sys",
"once_cell", "once_cell",
"wasm-bindgen", "wasm-bindgen",
...@@ -6254,11 +6258,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" ...@@ -6254,11 +6258,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]] [[package]]
name = "winapi-util" name = "winapi-util"
version = "0.1.9" version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" checksum = "0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22"
dependencies = [ dependencies = [
"windows-sys 0.59.0", "windows-sys 0.60.2",
] ]
[[package]] [[package]]
...@@ -6495,9 +6499,9 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" ...@@ -6495,9 +6499,9 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
[[package]] [[package]]
name = "winnow" name = "winnow"
version = "0.7.12" version = "0.7.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95" checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf"
dependencies = [ dependencies = [
"memchr", "memchr",
] ]
...@@ -6508,7 +6512,7 @@ version = "0.39.0" ...@@ -6508,7 +6512,7 @@ version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [ dependencies = [
"bitflags 2.9.2", "bitflags 2.9.3",
] ]
[[package]] [[package]]
...@@ -6714,7 +6718,7 @@ dependencies = [ ...@@ -6714,7 +6718,7 @@ dependencies = [
"crc32fast", "crc32fast",
"crossbeam-utils", "crossbeam-utils",
"displaydoc", "displaydoc",
"indexmap 2.10.0", "indexmap 2.11.0",
"num_enum", "num_enum",
"thiserror 1.0.69", "thiserror 1.0.69",
] ]
......
...@@ -110,6 +110,13 @@ tower-http = {workspace = true} ...@@ -110,6 +110,13 @@ tower-http = {workspace = true}
rustls = { version = "0.23" } rustls = { version = "0.23" }
# grpc-service
# ping version to 0.13.1 so it depends on prost 0.13.5
# which is used across other libraries
tonic = { version = "0.13.1" }
# Request prost specifically so tonic-build properly compiles protobuf message
prost = { version = "0.13.5" }
# tokenizers # tokenizers
tokenizers = { version = "0.21.4", default-features = false, features = [ tokenizers = { version = "0.21.4", default-features = false, features = [
"onig", "onig",
...@@ -157,3 +164,6 @@ insta = { version = "1.41", features = [ ...@@ -157,3 +164,6 @@ insta = { version = "1.41", features = [
] } ] }
aligned-vec = "0.6.4" aligned-vec = "0.6.4"
lazy_static = "1.4" lazy_static = "1.4"
[build-dependencies]
tonic-build = { version = "0.13.1"}
\ No newline at end of file
...@@ -13,8 +13,14 @@ ...@@ -13,8 +13,14 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
fn main() { fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("cargo:warning=Building with CUDA KV off"); println!("cargo:warning=Building with CUDA KV off");
build_protos()
}
fn build_protos() -> Result<(), Box<dyn std::error::Error>> {
tonic_build::compile_protos("src/grpc/protos/kserve.proto")?;
Ok(())
} }
// NOTE: Preserving this build.rs for reference. We may want to re-enable // NOTE: Preserving this build.rs for reference. We may want to re-enable
......
...@@ -18,6 +18,7 @@ pub mod batch; ...@@ -18,6 +18,7 @@ pub mod batch;
mod common; mod common;
pub use common::build_routed_pipeline; pub use common::build_routed_pipeline;
pub mod endpoint; pub mod endpoint;
pub mod grpc;
pub mod http; pub mod http;
pub mod text; pub mod text;
...@@ -43,6 +44,9 @@ pub enum Input { ...@@ -43,6 +44,9 @@ pub enum Input {
/// Batch mode. Run all the prompts, write the outputs, exit. /// Batch mode. Run all the prompts, write the outputs, exit.
Batch(PathBuf), Batch(PathBuf),
// Run an KServe compatible gRPC server
Grpc,
} }
impl FromStr for Input { impl FromStr for Input {
...@@ -59,6 +63,7 @@ impl TryFrom<&str> for Input { ...@@ -59,6 +63,7 @@ impl TryFrom<&str> for Input {
fn try_from(s: &str) -> anyhow::Result<Self> { fn try_from(s: &str) -> anyhow::Result<Self> {
match s { match s {
"http" => Ok(Input::Http), "http" => Ok(Input::Http),
"grpc" => Ok(Input::Grpc),
"text" => Ok(Input::Text), "text" => Ok(Input::Text),
"stdin" => Ok(Input::Stdin), "stdin" => Ok(Input::Stdin),
endpoint_path if endpoint_path.starts_with(ENDPOINT_SCHEME) => { endpoint_path if endpoint_path.starts_with(ENDPOINT_SCHEME) => {
...@@ -77,6 +82,7 @@ impl fmt::Display for Input { ...@@ -77,6 +82,7 @@ impl fmt::Display for Input {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let s = match self { let s = match self {
Input::Http => "http", Input::Http => "http",
Input::Grpc => "grpc",
Input::Text => "text", Input::Text => "text",
Input::Stdin => "stdin", Input::Stdin => "stdin",
Input::Endpoint(path) => path, Input::Endpoint(path) => path,
...@@ -113,6 +119,9 @@ pub async fn run_input( ...@@ -113,6 +119,9 @@ pub async fn run_input(
Input::Http => { Input::Http => {
http::run(runtime, engine_config).await?; http::run(runtime, engine_config).await?;
} }
Input::Grpc => {
grpc::run(runtime, engine_config).await?;
}
Input::Text => { Input::Text => {
text::run(runtime, None, engine_config).await?; text::run(runtime, None, engine_config).await?;
} }
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use std::sync::Arc;
use crate::{
discovery::{MODEL_ROOT_PATH, ModelManager, ModelWatcher},
engines::StreamingEngineAdapter,
entrypoint::{self, EngineConfig, input::common},
grpc::service::kserve,
kv_router::KvRouterConfig,
types::openai::{
chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse},
completions::{NvCreateCompletionRequest, NvCreateCompletionResponse},
},
};
use dynamo_runtime::transports::etcd;
use dynamo_runtime::{DistributedRuntime, Runtime};
use dynamo_runtime::{distributed::DistributedConfig, pipeline::RouterMode};
/// Build and run an KServe gRPC service
pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Result<()> {
let mut grpc_service_builder = kserve::KserveService::builder()
.port(engine_config.local_model().http_port()) // [WIP] generalize port..
.with_request_template(engine_config.local_model().request_template());
let grpc_service = match engine_config {
EngineConfig::Dynamic(_) => {
let distributed_runtime = DistributedRuntime::from_settings(runtime.clone()).await?;
let etcd_client = distributed_runtime.etcd_client();
// This allows the /health endpoint to query etcd for active instances
grpc_service_builder = grpc_service_builder.with_etcd_client(etcd_client.clone());
let grpc_service = grpc_service_builder.build()?;
match etcd_client {
Some(ref etcd_client) => {
let router_config = engine_config.local_model().router_config();
// Listen for models registering themselves in etcd, add them to gRPC service
run_watcher(
distributed_runtime,
grpc_service.state().manager_clone(),
etcd_client.clone(),
MODEL_ROOT_PATH,
router_config.router_mode,
Some(router_config.kv_router_config),
router_config.busy_threshold,
)
.await?;
}
None => {
// Static endpoints don't need discovery
}
}
grpc_service
}
EngineConfig::StaticRemote(local_model) => {
let card = local_model.card();
let router_mode = local_model.router_config().router_mode;
let dst_config = DistributedConfig::from_settings(true); // true means static
let distributed_runtime = DistributedRuntime::new(runtime.clone(), dst_config).await?;
let grpc_service = grpc_service_builder.build()?;
let manager = grpc_service.model_manager();
let endpoint_id = local_model.endpoint_id();
let component = distributed_runtime
.namespace(&endpoint_id.namespace)?
.component(&endpoint_id.component)?;
let client = component.endpoint(&endpoint_id.name).client().await?;
let kv_chooser = if router_mode == RouterMode::KV {
Some(
manager
.kv_chooser_for(
local_model.display_name(),
&component,
card.kv_cache_block_size,
Some(local_model.router_config().kv_router_config),
)
.await?,
)
} else {
None
};
let chat_engine = entrypoint::build_routed_pipeline::<
NvCreateChatCompletionRequest,
NvCreateChatCompletionStreamResponse,
>(card, &client, router_mode, None, kv_chooser.clone())
.await?;
manager.add_chat_completions_model(local_model.display_name(), chat_engine)?;
let completions_engine = entrypoint::build_routed_pipeline::<
NvCreateCompletionRequest,
NvCreateCompletionResponse,
>(card, &client, router_mode, None, kv_chooser)
.await?;
manager.add_completions_model(local_model.display_name(), completions_engine)?;
grpc_service
}
EngineConfig::StaticFull { engine, model, .. } => {
let grpc_service = grpc_service_builder.build()?;
let engine = Arc::new(StreamingEngineAdapter::new(engine));
let manager = grpc_service.model_manager();
manager.add_completions_model(model.service_name(), engine.clone())?;
manager.add_chat_completions_model(model.service_name(), engine)?;
grpc_service
}
EngineConfig::StaticCore {
engine: inner_engine,
model,
..
} => {
let grpc_service = grpc_service_builder.build()?;
let manager = grpc_service.model_manager();
let chat_pipeline = common::build_pipeline::<
NvCreateChatCompletionRequest,
NvCreateChatCompletionStreamResponse,
>(model.card(), inner_engine.clone())
.await?;
manager.add_chat_completions_model(model.service_name(), chat_pipeline)?;
let cmpl_pipeline = common::build_pipeline::<
NvCreateCompletionRequest,
NvCreateCompletionResponse,
>(model.card(), inner_engine)
.await?;
manager.add_completions_model(model.service_name(), cmpl_pipeline)?;
grpc_service
}
};
grpc_service.run(runtime.primary_token()).await?;
runtime.shutdown(); // Cancel primary token
Ok(())
}
/// Spawns a task that watches for new models in etcd at network_prefix,
/// and registers them with the ModelManager so that the HTTP service can use them.
async fn run_watcher(
runtime: DistributedRuntime,
model_manager: Arc<ModelManager>,
etcd_client: etcd::Client,
network_prefix: &str,
router_mode: RouterMode,
kv_router_config: Option<KvRouterConfig>,
busy_threshold: Option<f64>,
) -> anyhow::Result<()> {
let watch_obj = ModelWatcher::new(
runtime,
model_manager,
router_mode,
kv_router_config,
busy_threshold,
);
tracing::info!("Watching for remote model at {network_prefix}");
let models_watcher = etcd_client.kv_get_and_watch_prefix(network_prefix).await?;
let (_prefix, _watcher, receiver) = models_watcher.dissolve();
// [gluo NOTE] This is different from http::run_watcher where it alters the HTTP service
// endpoint being exposed, gRPC doesn't have the same concept as the KServe service
// only has one kind of inference endpoint.
// Pass the sender to the watcher
let _watcher_task = tokio::spawn(async move {
watch_obj.watch(receiver).await;
});
Ok(())
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
pub mod service;
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
syntax = "proto3";
package inference;
//@@.. cpp:namespace:: inference
import "model_config.proto";
//@@
//@@.. cpp:var:: service InferenceService
//@@
//@@ Inference Server GRPC endpoints.
//@@
service GRPCInferenceService
{
//@@ .. cpp:var:: rpc ModelMetadata(ModelMetadataRequest) returns
//@@ (ModelMetadataResponse)
//@@
//@@ Get model metadata.
//@@
rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {}
//@@ .. cpp:var:: rpc ModelInfer(ModelInferRequest) returns
//@@ (ModelInferResponse)
//@@
//@@ Perform inference using a specific model.
//@@
rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {}
//@@ .. cpp:var:: rpc ModelStreamInfer(stream ModelInferRequest) returns
//@@ (stream ModelStreamInferResponse)
//@@
//@@ Perform streaming inference.
//@@
rpc ModelStreamInfer(stream ModelInferRequest) returns (stream ModelStreamInferResponse) {}
//@@ .. cpp:var:: rpc ModelConfig(ModelConfigRequest) returns
//@@ (ModelConfigResponse)
//@@
//@@ Get model configuration.
//@@
rpc ModelConfig(ModelConfigRequest) returns (ModelConfigResponse) {}
}
//@@
//@@.. cpp:var:: message ModelMetadataRequest
//@@
//@@ Request message for ModelMetadata.
//@@
message ModelMetadataRequest
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model.
//@@
string name = 1;
//@@ .. cpp:var:: string version
//@@
//@@ The version of the model to check for readiness. If not
//@@ given the server will choose a version based on the
//@@ model and internal policy.
//@@
string version = 2;
}
//@@
//@@.. cpp:var:: message ModelMetadataResponse
//@@
//@@ Response message for ModelMetadata.
//@@
message ModelMetadataResponse
{
//@@
//@@ .. cpp:var:: message TensorMetadata
//@@
//@@ Metadata for a tensor.
//@@
message TensorMetadata
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The tensor name.
//@@
string name = 1;
//@@
//@@ .. cpp:var:: string datatype
//@@
//@@ The tensor data type.
//@@
string datatype = 2;
//@@
//@@ .. cpp:var:: int64 shape (repeated)
//@@
//@@ The tensor shape. A variable-size dimension is represented
//@@ by a -1 value.
//@@
repeated int64 shape = 3;
}
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The model name.
//@@
string name = 1;
//@@
//@@ .. cpp:var:: string versions (repeated)
//@@
//@@ The versions of the model.
//@@
repeated string versions = 2;
//@@
//@@ .. cpp:var:: string platform
//@@
//@@ The model's platform.
//@@
string platform = 3;
//@@
//@@ .. cpp:var:: TensorMetadata inputs (repeated)
//@@
//@@ The model's inputs.
//@@
repeated TensorMetadata inputs = 4;
//@@
//@@ .. cpp:var:: TensorMetadata outputs (repeated)
//@@
//@@ The model's outputs.
//@@
repeated TensorMetadata outputs = 5;
}
//@@
//@@.. cpp:var:: message InferParameter
//@@
//@@ An inference parameter value.
//@@
message InferParameter
{
//@@ .. cpp:var:: oneof parameter_choice
//@@
//@@ The parameter value can be a string, an int64,
//@@ an uint64, a double, or a boolean
//@@
//@@ Note: double and uint64 are currently
//@@ placeholders for future use and
//@@ are not supported for custom parameters
//@@
oneof parameter_choice
{
//@@ .. cpp:var:: bool bool_param
//@@
//@@ A boolean parameter value.
//@@
bool bool_param = 1;
//@@ .. cpp:var:: int64 int64_param
//@@
//@@ An int64 parameter value.
//@@
int64 int64_param = 2;
//@@ .. cpp:var:: string string_param
//@@
//@@ A string parameter value.
//@@
string string_param = 3;
//@@ .. cpp:var:: double double_param
//@@
//@@ A double parameter value.
//@@
double double_param = 4;
//@@ .. cpp:var:: uint64 uint64_param
//@@
//@@ A uint64 parameter value.
//@@
//@@ Not supported for custom parameters
//@@
uint64 uint64_param = 5;
}
}
//@@
//@@.. cpp:var:: message InferTensorContents
//@@
//@@ The data contained in a tensor represented by the repeated type
//@@ that matches the tensor's data type. Protobuf oneof is not used
//@@ because oneofs cannot contain repeated fields.
//@@
message InferTensorContents
{
//@@
//@@ .. cpp:var:: bool bool_contents (repeated)
//@@
//@@ Representation for BOOL data type. The size must match what is
//@@ expected by the tensor's shape. The contents must be the flattened,
//@@ one-dimensional, row-major order of the tensor elements.
//@@
repeated bool bool_contents = 1;
//@@
//@@ .. cpp:var:: int32 int_contents (repeated)
//@@
//@@ Representation for INT8, INT16, and INT32 data types. The size
//@@ must match what is expected by the tensor's shape. The contents
//@@ must be the flattened, one-dimensional, row-major order of the
//@@ tensor elements.
//@@
repeated int32 int_contents = 2;
//@@
//@@ .. cpp:var:: int64 int64_contents (repeated)
//@@
//@@ Representation for INT64 data types. The size must match what
//@@ is expected by the tensor's shape. The contents must be the
//@@ flattened, one-dimensional, row-major order of the tensor elements.
//@@
repeated int64 int64_contents = 3;
//@@
//@@ .. cpp:var:: uint32 uint_contents (repeated)
//@@
//@@ Representation for UINT8, UINT16, and UINT32 data types. The size
//@@ must match what is expected by the tensor's shape. The contents
//@@ must be the flattened, one-dimensional, row-major order of the
//@@ tensor elements.
//@@
repeated uint32 uint_contents = 4;
//@@
//@@ .. cpp:var:: uint64 uint64_contents (repeated)
//@@
//@@ Representation for UINT64 data types. The size must match what
//@@ is expected by the tensor's shape. The contents must be the
//@@ flattened, one-dimensional, row-major order of the tensor elements.
//@@
repeated uint64 uint64_contents = 5;
//@@
//@@ .. cpp:var:: float fp32_contents (repeated)
//@@
//@@ Representation for FP32 data type. The size must match what is
//@@ expected by the tensor's shape. The contents must be the flattened,
//@@ one-dimensional, row-major order of the tensor elements.
//@@
repeated float fp32_contents = 6;
//@@
//@@ .. cpp:var:: double fp64_contents (repeated)
//@@
//@@ Representation for FP64 data type. The size must match what is
//@@ expected by the tensor's shape. The contents must be the flattened,
//@@ one-dimensional, row-major order of the tensor elements.
//@@
repeated double fp64_contents = 7;
//@@
//@@ .. cpp:var:: bytes bytes_contents (repeated)
//@@
//@@ Representation for BYTES data type. The size must match what is
//@@ expected by the tensor's shape. The contents must be the flattened,
//@@ one-dimensional, row-major order of the tensor elements.
//@@
repeated bytes bytes_contents = 8;
}
//@@
//@@.. cpp:var:: message ModelInferRequest
//@@
//@@ Request message for ModelInfer.
//@@
message ModelInferRequest
{
//@@
//@@ .. cpp:var:: message InferInputTensor
//@@
//@@ An input tensor for an inference request.
//@@
message InferInputTensor
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The tensor name.
//@@
string name = 1;
//@@
//@@ .. cpp:var:: string datatype
//@@
//@@ The tensor data type.
//@@
string datatype = 2;
//@@
//@@ .. cpp:var:: int64 shape (repeated)
//@@
//@@ The tensor shape.
//@@
repeated int64 shape = 3;
//@@ .. cpp:var:: map<string,InferParameter> parameters
//@@
//@@ Optional inference input tensor parameters.
//@@
map<string, InferParameter> parameters = 4;
//@@ .. cpp:var:: InferTensorContents contents
//@@
//@@ The tensor contents using a data-type format. This field
//@@ must not be specified if tensor contents are being specified
//@@ in ModelInferRequest.raw_input_contents.
//@@
InferTensorContents contents = 5;
}
//@@
//@@ .. cpp:var:: message InferRequestedOutputTensor
//@@
//@@ An output tensor requested for an inference request.
//@@
message InferRequestedOutputTensor
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The tensor name.
//@@
string name = 1;
//@@ .. cpp:var:: map<string,InferParameter> parameters
//@@
//@@ Optional requested output tensor parameters.
//@@
map<string, InferParameter> parameters = 2;
}
//@@ .. cpp:var:: string model_name
//@@
//@@ The name of the model to use for inferencing.
//@@
string model_name = 1;
//@@ .. cpp:var:: string model_version
//@@
//@@ The version of the model to use for inference. If not
//@@ given the latest/most-recent version of the model is used.
//@@
string model_version = 2;
//@@ .. cpp:var:: string id
//@@
//@@ Optional identifier for the request. If specified will be
//@@ returned in the response.
//@@
string id = 3;
//@@ .. cpp:var:: map<string,InferParameter> parameters
//@@
//@@ Optional inference parameters.
//@@
map<string, InferParameter> parameters = 4;
//@@
//@@ .. cpp:var:: InferInputTensor inputs (repeated)
//@@
//@@ The input tensors for the inference.
//@@
repeated InferInputTensor inputs = 5;
//@@
//@@ .. cpp:var:: InferRequestedOutputTensor outputs (repeated)
//@@
//@@ The requested output tensors for the inference. Optional, if not
//@@ specified all outputs specified in the model config will be
//@@ returned.
//@@
repeated InferRequestedOutputTensor outputs = 6;
//@@
//@@ .. cpp:var:: bytes raw_input_contents
//@@
//@@ The data contained in an input tensor can be represented in
//@@ "raw" bytes form or in the repeated type that matches the
//@@ tensor's data type. Using the "raw" bytes form will
//@@ typically allow higher performance due to the way protobuf
//@@ allocation and reuse interacts with GRPC. For example, see
//@@ https://github.com/grpc/grpc/issues/23231.
//@@
//@@ To use the raw representation 'raw_input_contents' must be
//@@ initialized with data for each tensor in the same order as
//@@ 'inputs'. For each tensor, the size of this content must
//@@ match what is expected by the tensor's shape and data
//@@ type. The raw data must be the flattened, one-dimensional,
//@@ row-major order of the tensor elements without any stride
//@@ or padding between the elements. Note that the FP16 and BF16 data
//@@ types must be represented as raw content as there is no
//@@ specific data type for a 16-bit float type.
//@@
//@@ If this field is specified then InferInputTensor::contents
//@@ must not be specified for any input tensor.
//@@
repeated bytes raw_input_contents = 7;
}
//@@
//@@.. cpp:var:: message ModelInferResponse
//@@
//@@ Response message for ModelInfer.
//@@
message ModelInferResponse
{
//@@
//@@ .. cpp:var:: message InferOutputTensor
//@@
//@@ An output tensor returned for an inference request.
//@@
message InferOutputTensor
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The tensor name.
//@@
string name = 1;
//@@
//@@ .. cpp:var:: string datatype
//@@
//@@ The tensor data type.
//@@
string datatype = 2;
//@@
//@@ .. cpp:var:: int64 shape (repeated)
//@@
//@@ The tensor shape.
//@@
repeated int64 shape = 3;
//@@ .. cpp:var:: map<string,InferParameter> parameters
//@@
//@@ Optional output tensor parameters.
//@@
map<string, InferParameter> parameters = 4;
//@@ .. cpp:var:: InferTensorContents contents
//@@
//@@ The tensor contents using a data-type format. This field
//@@ must not be specified if tensor contents are being specified
//@@ in ModelInferResponse.raw_output_contents.
//@@
InferTensorContents contents = 5;
}
//@@ .. cpp:var:: string model_name
//@@
//@@ The name of the model used for inference.
//@@
string model_name = 1;
//@@ .. cpp:var:: string model_version
//@@
//@@ The version of the model used for inference.
//@@
string model_version = 2;
//@@ .. cpp:var:: string id
//@@
//@@ The id of the inference request if one was specified.
//@@
string id = 3;
//@@ .. cpp:var:: map<string,InferParameter> parameters
//@@
//@@ Optional inference response parameters.
//@@
map<string, InferParameter> parameters = 4;
//@@
//@@ .. cpp:var:: InferOutputTensor outputs (repeated)
//@@
//@@ The output tensors holding inference results.
//@@
repeated InferOutputTensor outputs = 5;
//@@
//@@ .. cpp:var:: bytes raw_output_contents
//@@
//@@ The data contained in an output tensor can be represented in
//@@ "raw" bytes form or in the repeated type that matches the
//@@ tensor's data type. Using the "raw" bytes form will
//@@ typically allow higher performance due to the way protobuf
//@@ allocation and reuse interacts with GRPC. For example, see
//@@ https://github.com/grpc/grpc/issues/23231.
//@@
//@@ To use the raw representation 'raw_output_contents' must be
//@@ initialized with data for each tensor in the same order as
//@@ 'outputs'. For each tensor, the size of this content must
//@@ match what is expected by the tensor's shape and data
//@@ type. The raw data must be the flattened, one-dimensional,
//@@ row-major order of the tensor elements without any stride
//@@ or padding between the elements. Note that the FP16 and BF16 data
//@@ types must be represented as raw content as there is no
//@@ specific data type for a 16-bit float type.
//@@
//@@ If this field is specified then InferOutputTensor::contents
//@@ must not be specified for any output tensor.
//@@
repeated bytes raw_output_contents = 6;
}
//@@
//@@.. cpp:var:: message ModelStreamInferResponse
//@@
//@@ Response message for ModelStreamInfer.
//@@
message ModelStreamInferResponse
{
//@@
//@@ .. cpp:var:: string error_message
//@@
//@@ The message describing the error. The empty message
//@@ indicates the inference was successful without errors.
//@@
string error_message = 1;
//@@
//@@ .. cpp:var:: ModelInferResponse infer_response
//@@
//@@ Holds the results of the request.
//@@
ModelInferResponse infer_response = 2;
}
//@@
//@@.. cpp:var:: message ModelConfigRequest
//@@
//@@ Request message for ModelConfig.
//@@
message ModelConfigRequest
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model.
//@@
string name = 1;
//@@ .. cpp:var:: string version
//@@
//@@ The version of the model. If not given the model version
//@@ is selected automatically based on the version policy.
//@@
string version = 2;
}
//@@
//@@.. cpp:var:: message ModelConfigResponse
//@@
//@@ Response message for ModelConfig.
//@@
message ModelConfigResponse
{
//@@
//@@ .. cpp:var:: ModelConfig config
//@@
//@@ The model configuration.
//@@
ModelConfig config = 1;
}
//@@
//@@.. cpp:var:: message ModelRepositoryParameter
//@@
//@@ An model repository parameter value.
//@@
message ModelRepositoryParameter
{
//@@ .. cpp:var:: oneof parameter_choice
//@@
//@@ The parameter value can be a string, an int64 or
//@@ a boolean
//@@
oneof parameter_choice
{
//@@ .. cpp:var:: bool bool_param
//@@
//@@ A boolean parameter value.
//@@
bool bool_param = 1;
//@@ .. cpp:var:: int64 int64_param
//@@
//@@ An int64 parameter value.
//@@
int64 int64_param = 2;
//@@ .. cpp:var:: string string_param
//@@
//@@ A string parameter value.
//@@
string string_param = 3;
//@@ .. cpp:var:: bytes bytes_param
//@@
//@@ A bytes parameter value.
//@@
bytes bytes_param = 4;
}
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
syntax = "proto3";
package inference;
//@@.. cpp:namespace:: inference
//@@
//@@.. cpp:enum:: DataType
//@@
//@@ Data types supported for input and output tensors.
//@@
enum DataType {
//@@ .. cpp:enumerator:: DataType::INVALID = 0
TYPE_INVALID = 0;
//@@ .. cpp:enumerator:: DataType::BOOL = 1
TYPE_BOOL = 1;
//@@ .. cpp:enumerator:: DataType::UINT8 = 2
TYPE_UINT8 = 2;
//@@ .. cpp:enumerator:: DataType::UINT16 = 3
TYPE_UINT16 = 3;
//@@ .. cpp:enumerator:: DataType::UINT32 = 4
TYPE_UINT32 = 4;
//@@ .. cpp:enumerator:: DataType::UINT64 = 5
TYPE_UINT64 = 5;
//@@ .. cpp:enumerator:: DataType::INT8 = 6
TYPE_INT8 = 6;
//@@ .. cpp:enumerator:: DataType::INT16 = 7
TYPE_INT16 = 7;
//@@ .. cpp:enumerator:: DataType::INT32 = 8
TYPE_INT32 = 8;
//@@ .. cpp:enumerator:: DataType::INT64 = 9
TYPE_INT64 = 9;
//@@ .. cpp:enumerator:: DataType::FP16 = 10
TYPE_FP16 = 10;
//@@ .. cpp:enumerator:: DataType::FP32 = 11
TYPE_FP32 = 11;
//@@ .. cpp:enumerator:: DataType::FP64 = 12
TYPE_FP64 = 12;
//@@ .. cpp:enumerator:: DataType::STRING = 13
TYPE_STRING = 13;
//@@ .. cpp:enumerator:: DataType::BF16 = 14
TYPE_BF16 = 14;
}
//@@
//@@ .. cpp:var:: message ModelRateLimiter
//@@
//@@ The specifications required by the rate limiter to properly
//@@ schedule the inference requests across the different models
//@@ and their instances.
//@@
message ModelRateLimiter
{
//@@ .. cpp:var:: message Resource
//@@
//@@ The resource property.
//@@
message Resource
{
//@@ .. cpp:var:: string name
//@@
//@@ The name associated with the resource.
//@@
string name = 1;
//@@ .. cpp:var:: bool global
//@@
//@@ Whether or not the resource is global. If true then the resource
//@@ is assumed to be shared among the devices otherwise specified
//@@ count of the resource is assumed for each device associated
//@@ with the instance.
//@@
bool global = 2;
//@@ .. cpp:var:: uint32 count
//@@
//@@ The number of resources required for the execution of the model
//@@ instance.
//@@
uint32 count = 3;
}
//@@ .. cpp:var:: Resource resources (repeated)
//@@
//@@ The resources required to execute the request on a model instance.
//@@ Resources are just names with a corresponding count. The execution
//@@ of the instance will be blocked until the specified resources are
//@@ available. By default an instance uses no rate-limiter resources.
//@@
repeated Resource resources = 1;
//@@ .. cpp:var:: uint32 priority
//@@
//@@ The optional weighting value to be used for prioritizing across
//@@ instances. An instance with priority 2 will be given 1/2 the
//@@ number of scheduling chances as an instance_group with priority
//@@ 1. The default priority is 1. The priority of value 0 will be
//@@ treated as priority 1.
//@@
uint32 priority = 2;
}
//@@
//@@.. cpp:var:: message ModelInstanceGroup
//@@
//@@ A group of one or more instances of a model and resources made
//@@ available for those instances.
//@@
message ModelInstanceGroup
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ Kind of this instance group.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::KIND_AUTO = 0
//@@
//@@ This instance group represents instances that can run on either
//@@ CPU or GPU. If all GPUs listed in 'gpus' are available then
//@@ instances will be created on GPU(s), otherwise instances will
//@@ be created on CPU.
//@@
KIND_AUTO = 0;
//@@ .. cpp:enumerator:: Kind::KIND_GPU = 1
//@@
//@@ This instance group represents instances that must run on the
//@@ GPU.
//@@
KIND_GPU = 1;
//@@ .. cpp:enumerator:: Kind::KIND_CPU = 2
//@@
//@@ This instance group represents instances that must run on the
//@@ CPU.
//@@
KIND_CPU = 2;
//@@ .. cpp:enumerator:: Kind::KIND_MODEL = 3
//@@
//@@ This instance group represents instances that should run on the
//@@ CPU and/or GPU(s) as specified by the model or backend itself.
//@@ The inference server will not override the model/backend
//@@ settings.
//@@
KIND_MODEL = 3;
}
//@@
//@@ .. cpp:var:: message SecondaryDevice
//@@
//@@ A secondary device required for a model instance.
//@@
message SecondaryDevice
{
//@@
//@@ .. cpp:enum:: SecondaryDeviceKind
//@@
//@@ The kind of the secondary device.
//@@
enum SecondaryDeviceKind {
//@@ .. cpp:enumerator:: SecondaryDeviceKind::KIND_NVDLA = 0
//@@
//@@ An NVDLA core. http://nvdla.org
//@@ Currently KIND_NVDLA is only supported by the TensorRT backend.
//@@
KIND_NVDLA = 0;
}
//@@ .. cpp:var:: SecondaryDeviceKind kind
//@@
//@@ The secondary device kind.
//@@
SecondaryDeviceKind kind = 1;
//@@ .. cpp:var:: int64 device_id
//@@
//@@ Identifier for the secondary device.
//@@
int64 device_id = 2;
}
//@@ .. cpp:var:: string name
//@@
//@@ Optional name of this group of instances. If not specified the
//@@ name will be formed as <model name>_<group number>. The name of
//@@ individual instances will be further formed by a unique instance
//@@ number and GPU index:
//@@
string name = 1;
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this instance group. Default is KIND_AUTO. If
//@@ KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and
//@@ may be specified. If KIND_CPU or KIND_MODEL only 'count' is valid
//@@ and 'gpu' cannot be specified.
//@@
Kind kind = 4;
//@@ .. cpp:var:: int32 count
//@@
//@@ For a group assigned to GPU, the number of instances created for
//@@ each GPU listed in 'gpus'. For a group assigned to CPU the number
//@@ of instances created. Default is 1.
int32 count = 2;
//@@ .. cpp:var:: ModelRateLimiter rate_limiter
//@@
//@@ The rate limiter specific settings to be associated with this
//@@ instance group. Optional, if not specified no rate limiting
//@@ will be applied to this instance group.
//@@
ModelRateLimiter rate_limiter = 6;
//@@ .. cpp:var:: int32 gpus (repeated)
//@@
//@@ GPU(s) where instances should be available. For each GPU listed,
//@@ 'count' instances of the model will be available. Setting 'gpus'
//@@ to empty (or not specifying at all) is equivalent to listing all
//@@ available GPUs.
//@@
repeated int32 gpus = 3;
//@@ .. cpp:var:: SecondaryDevice secondary_devices (repeated)
//@@
//@@ Secondary devices that are required by instances specified by this
//@@ instance group. Optional.
//@@
repeated SecondaryDevice secondary_devices = 8;
//@@ .. cpp:var:: string profile (repeated)
//@@
//@@ For TensorRT models containing multiple optimization profile, this
//@@ parameter specifies a set of optimization profiles available to this
//@@ instance group. The inference server will choose the optimal profile
//@@ based on the shapes of the input tensors. This field should lie
//@@ between 0 and <TotalNumberOfOptimizationProfilesInPlanModel> - 1
//@@ and be specified only for TensorRT backend, otherwise an error will
//@@ be generated. If not specified, the server will select the first
//@@ optimization profile by default.
//@@
repeated string profile = 5;
//@@ .. cpp:var:: bool passive
//@@
//@@ Whether the instances within this instance group will be accepting
//@@ inference requests from the scheduler. If true, the instances will
//@@ not be added to the scheduler. Default value is false.
//@@
bool passive = 7;
//@@ .. cpp:var:: string host_policy
//@@
//@@ The host policy name that the instance to be associated with.
//@@ The default value is set to reflect the device kind of the instance,
//@@ for instance, KIND_CPU is "cpu", KIND_MODEL is "model" and
//@@ KIND_GPU is "gpu_<gpu_id>".
//@@
string host_policy = 9;
}
//@@
//@@.. cpp:var:: message ModelTensorReshape
//@@
//@@ Reshape specification for input and output tensors.
//@@
message ModelTensorReshape
{
//@@ .. cpp:var:: int64 shape (repeated)
//@@
//@@ The shape to use for reshaping.
//@@
repeated int64 shape = 1;
}
//@@
//@@.. cpp:var:: message ModelInput
//@@
//@@ An input required by the model.
//@@
message ModelInput
{
//@@
//@@ .. cpp:enum:: Format
//@@
//@@ The format for the input.
//@@
enum Format {
//@@ .. cpp:enumerator:: Format::FORMAT_NONE = 0
//@@
//@@ The input has no specific format. This is the default.
//@@
FORMAT_NONE = 0;
//@@ .. cpp:enumerator:: Format::FORMAT_NHWC = 1
//@@
//@@ HWC image format. Tensors with this format require 3 dimensions
//@@ if the model does not support batching (max_batch_size = 0) or 4
//@@ dimensions if the model does support batching (max_batch_size
//@@ >= 1). In either case the 'dims' below should only specify the
//@@ 3 non-batch dimensions (i.e. HWC or CHW).
//@@
FORMAT_NHWC = 1;
//@@ .. cpp:enumerator:: Format::FORMAT_NCHW = 2
//@@
//@@ CHW image format. Tensors with this format require 3 dimensions
//@@ if the model does not support batching (max_batch_size = 0) or 4
//@@ dimensions if the model does support batching (max_batch_size
//@@ >= 1). In either case the 'dims' below should only specify the
//@@ 3 non-batch dimensions (i.e. HWC or CHW).
//@@
FORMAT_NCHW = 2;
}
//@@ .. cpp:var:: string name
//@@
//@@ The name of the input.
//@@
string name = 1;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the input.
//@@
DataType data_type = 2;
//@@ .. cpp:var:: Format format
//@@
//@@ The format of the input. Optional.
//@@
Format format = 3;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The dimensions/shape of the input tensor that must be provided
//@@ when invoking the inference API for this model.
//@@
repeated int64 dims = 4;
//@@ .. cpp:var:: ModelTensorReshape reshape
//@@
//@@ The shape expected for this input by the backend. The input will
//@@ be reshaped to this before being presented to the backend. The
//@@ reshape must have the same number of elements as the input shape
//@@ specified by 'dims'. Optional.
//@@
ModelTensorReshape reshape = 5;
//@@ .. cpp:var:: bool is_shape_tensor
//@@
//@@ Whether or not the input is a shape tensor to the model. This field
//@@ is currently supported only for the TensorRT model. An error will be
//@@ generated if this specification does not comply with underlying
//@@ model.
//@@
bool is_shape_tensor = 6;
//@@ .. cpp:var:: bool allow_ragged_batch
//@@
//@@ Whether or not the input is allowed to be "ragged" in a dynamically
//@@ created batch. Default is false indicating that two requests will
//@@ only be batched if this tensor has the same shape in both requests.
//@@ True indicates that two requests can be batched even if this tensor
//@@ has a different shape in each request.
//@@
bool allow_ragged_batch = 7;
//@@ .. cpp:var:: bool optional
//@@
//@@ Whether or not the input is optional for the model execution.
//@@ If true, the input is not required in the inference request.
//@@ Default value is false.
//@@
bool optional = 8;
//@@ .. cpp:var:: bool is_non_linear_format_io
//@@
//@@ Indicates whether the input tensor uses a non-linear IO format. This
//@@ field is currently supported only for TensorRT models. An error will
//@@ be generated if this specification does not comply with the
//@@ underlying model.
//@@
bool is_non_linear_format_io = 9;
}
//@@
//@@.. cpp:var:: message ModelOutput
//@@
//@@ An output produced by the model.
//@@
message ModelOutput
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the output.
//@@
string name = 1;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the output.
//@@
DataType data_type = 2;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The dimensions/shape of the output tensor.
//@@
repeated int64 dims = 3;
//@@ .. cpp:var:: ModelTensorReshape reshape
//@@
//@@ The shape produced for this output by the backend. The output will
//@@ be reshaped from this to the shape specified in 'dims' before being
//@@ returned in the inference response. The reshape must have the same
//@@ number of elements as the output shape specified by 'dims'. Optional.
//@@
ModelTensorReshape reshape = 5;
//@@ .. cpp:var:: string label_filename
//@@
//@@ The label file associated with this output. Should be specified only
//@@ for outputs that represent classifications. Optional.
//@@
string label_filename = 4;
//@@ .. cpp:var:: bool is_shape_tensor
//@@
//@@ Whether or not the output is a shape tensor to the model. This field
//@@ is currently supported only for the TensorRT model. An error will be
//@@ generated if this specification does not comply with underlying
//@@ model.
//@@
bool is_shape_tensor = 6;
//@@ .. cpp:var:: bool is_non_linear_format_io
//@@
//@@ Indicates whether the output tensor uses a non-linear IO format. This
//@@ field is currently supported only for TensorRT models. An error will
//@@ be generated if this specification does not comply with the
//@@ underlying model.
//@@
bool is_non_linear_format_io = 7;
}
//@@ .. cpp:var:: message BatchInput
//@@
//@@ A batch input is an additional input that must be added by
//@@ the backend based on all the requests in a batch.
//@@
message BatchInput
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ The kind of the batch input.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::BATCH_ELEMENT_COUNT = 0
//@@
//@@ The element count of the 'source_input' will be added as
//@@ input with shape [1].
//@@
BATCH_ELEMENT_COUNT = 0;
//@@ .. cpp:enumerator:: Kind::BATCH_ACCUMULATED_ELEMENT_COUNT = 1
//@@
//@@ The accumulated element count of the 'source_input' will be
//@@ added as input with shape [1]. For example, if there is a
//@@ batch of two request, each with 2 elements, an input of value
//@@ 2 will be added to the first request, and an input of value
//@@ 4 will be added to the second request.
//@@
BATCH_ACCUMULATED_ELEMENT_COUNT = 1;
//@@ .. cpp:enumerator::
//@@ Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2
//@@
//@@ The accumulated element count of the 'source_input' will be
//@@ added as input with shape [1], except for the first request
//@@ in the batch. For the first request in the batch, the input
//@@ will have shape [2] where the first element is value 0.
//@@
BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2;
//@@ .. cpp:enumerator:: Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3
//@@
//@@ Among the requests in the batch, the max element count of the
//@@ 'source_input' will be added as input with shape
//@@ [max_element_count] for the first request in the batch.
//@@ For other requests, such input will be with shape [0].
//@@ The data of the tensor will be uninitialized.
//@@
BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3;
//@@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE = 4
//@@
//@@ Among the requests in the batch, the shape of the
//@@ 'source_input' will be added as input with shape
//@@ [batch_size, len(input_dim)]. For example, if one
//@@ batch-2 input with shape [3, 1] and batch-1 input
//@@ with shape [2, 2] are batched, the batch input will
//@@ have shape [3, 2] and value [ [3, 1], [3, 1], [2, 2]].
//@@
BATCH_ITEM_SHAPE = 4;
//@@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE_FLATTEN = 5
//@@
//@@ Among the requests in the batch, the shape of the
//@@ 'source_input' will be added as input with single dimensional
//@@ shape [batch_size * len(input_dim)]. For example, if one
//@@ batch-2 input with shape [3, 1] and batch-1 input
//@@ with shape [2, 2] are batched, the batch input will
//@@ have shape [6] and value [3, 1, 3, 1, 2, 2].
//@@
BATCH_ITEM_SHAPE_FLATTEN = 5;
}
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this batch input.
//@@
Kind kind = 1;
//@@ .. cpp:var:: string target_name (repeated)
//@@
//@@ The name of the model inputs that the backend will create
//@@ for this batch input.
//@@
repeated string target_name = 2;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The input's datatype. The data type can be TYPE_INT32 or
//@@ TYPE_FP32.
//@@
DataType data_type = 3;
//@@ .. cpp:var:: string source_input (repeated)
//@@
//@@ The backend derives the value for each batch input from one or
//@@ more other inputs. 'source_input' gives the names of those
//@@ inputs.
//@@
repeated string source_input = 4;
}
//@@.. cpp:var:: message BatchOutput
//@@
//@@ A batch output is an output produced by the model that must be handled
//@@ differently by the backend based on all the requests in a batch.
//@@
message BatchOutput
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ The kind of the batch output.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::BATCH_SCATTER_WITH_INPUT_SHAPE = 0
//@@
//@@ The output should be scattered according to the shape of
//@@ 'source_input'. The dynamic dimension of the output will
//@@ be set to the value of the same dimension in the input.
//@@
BATCH_SCATTER_WITH_INPUT_SHAPE = 0;
}
//@@ .. cpp:var:: string target_name (repeated)
//@@
//@@ The name of the outputs to be produced by this batch output
//@@ specification.
//@@
repeated string target_name = 1;
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this batch output.
//@@
Kind kind = 2;
//@@ .. cpp:var:: string source_input (repeated)
//@@
//@@ The backend derives each batch output from one or more inputs.
//@@ 'source_input' gives the names of those inputs.
//@@
repeated string source_input = 3;
}
//@@
//@@.. cpp:var:: message ModelVersionPolicy
//@@
//@@ Policy indicating which versions of a model should be made
//@@ available by the inference server.
//@@
message ModelVersionPolicy
{
//@@ .. cpp:var:: message Latest
//@@
//@@ Serve only the latest version(s) of a model. This is
//@@ the default policy.
//@@
message Latest
{
//@@ .. cpp:var:: uint32 num_versions
//@@
//@@ Serve only the 'num_versions' highest-numbered versions. T
//@@ The default value of 'num_versions' is 1, indicating that by
//@@ default only the single highest-number version of a
//@@ model will be served.
//@@
uint32 num_versions = 1;
}
//@@ .. cpp:var:: message All
//@@
//@@ Serve all versions of the model.
//@@
message All {}
//@@ .. cpp:var:: message Specific
//@@
//@@ Serve only specific versions of the model.
//@@
message Specific
{
//@@ .. cpp:var:: int64 versions (repeated)
//@@
//@@ The specific versions of the model that will be served.
//@@
repeated int64 versions = 1;
}
//@@ .. cpp:var:: oneof policy_choice
//@@
//@@ Each model must implement only a single version policy. The
//@@ default policy is 'Latest'.
//@@
oneof policy_choice
{
//@@ .. cpp:var:: Latest latest
//@@
//@@ Serve only latest version(s) of the model.
//@@
Latest latest = 1;
//@@ .. cpp:var:: All all
//@@
//@@ Serve all versions of the model.
//@@
All all = 2;
//@@ .. cpp:var:: Specific specific
//@@
//@@ Serve only specific version(s) of the model.
//@@
Specific specific = 3;
}
}
//@@
//@@.. cpp:var:: message ModelOptimizationPolicy
//@@
//@@ Optimization settings for a model. These settings control if/how a
//@@ model is optimized and prioritized by the backend framework when
//@@ it is loaded.
//@@
message ModelOptimizationPolicy
{
//@@
//@@ .. cpp:var:: message Graph
//@@
//@@ Enable generic graph optimization of the model. If not specified
//@@ the framework's default level of optimization is used. Supports
//@@ TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow
//@@ causes XLA to be enabled/disabled for the model. For Onnx defaults
//@@ to enabling all optimizations, -1 enables only basic optimizations,
//@@ +1 enables only basic and extended optimizations.
//@@
message Graph
{
//@@ .. cpp:var:: int32 level
//@@
//@@ The optimization level. Defaults to 0 (zero) if not specified.
//@@
//@@ - -1: Disabled
//@@ - 0: Framework default
//@@ - 1+: Enable optimization level (greater values indicate
//@@ higher optimization levels)
//@@
int32 level = 1;
}
//@@
//@@ .. cpp:enum:: ModelPriority
//@@
//@@ Model priorities. A model will be given scheduling and execution
//@@ preference over models at lower priorities. Current model
//@@ priorities only work for TensorRT models.
//@@
enum ModelPriority {
//@@ .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0
//@@
//@@ The default model priority.
//@@
PRIORITY_DEFAULT = 0;
//@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1
//@@
//@@ The maximum model priority.
//@@
PRIORITY_MAX = 1;
//@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2
//@@
//@@ The minimum model priority.
//@@
PRIORITY_MIN = 2;
}
//@@
//@@ .. cpp:var:: message Cuda
//@@
//@@ CUDA-specific optimization settings.
//@@
message Cuda
{
//@@ .. cpp:var:: message GraphSpec
//@@
//@@ Specification of the CUDA graph to be captured.
//@@
message GraphSpec
{
//@@ .. cpp:var:: message Dims
//@@
//@@ Specification of tensor dimension.
//@@
message Shape
{
//@@ .. cpp:var:: int64 dim (repeated)
//@@
//@@ The dimension.
//@@
repeated int64 dim = 1;
}
message LowerBound
{
//@@ .. cpp:var:: int32 batch_size
//@@
//@@ The batch size of the CUDA graph. If 'max_batch_size' is 0,
//@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must
//@@ be set to value between 1 and 'max_batch_size'.
//@@
int32 batch_size = 1;
//@@ .. cpp:var:: map<string, Shape> input
//@@
//@@ The specification of the inputs. 'Shape' is the shape of
//@@ the input without batching dimension.
//@@
map<string, Shape> input = 2;
}
//@@ .. cpp:var:: int32 batch_size
//@@
//@@ The batch size of the CUDA graph. If 'max_batch_size' is 0,
//@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must
//@@ be set to value between 1 and 'max_batch_size'.
//@@
int32 batch_size = 1;
//@@ .. cpp:var:: map<string, Shape> input
//@@
//@@ The specification of the inputs. 'Shape' is the shape of the
//@@ input without batching dimension.
//@@
map<string, Shape> input = 2;
//@@ .. cpp:var:: LowerBound graph_lower_bound
//@@
//@@ Specify the lower bound of the CUDA graph. Optional.
//@@ If specified, the graph can be used for input shapes and
//@@ batch sizes that are in closed interval between the lower
//@@ bound specification and graph specification. For dynamic
//@@ shape model, this allows CUDA graphs to be launched
//@@ frequently without capturing all possible shape combinations.
//@@ However, using graph for shape combinations different from
//@@ the one used for capturing introduces uninitialized data for
//@@ execution and it may distort the inference result if
//@@ the model is sensitive to uninitialized data.
//@@
LowerBound graph_lower_bound = 3;
}
//@@ .. cpp:var:: bool graphs
//@@
//@@ Use CUDA graphs API to capture model operations and execute
//@@ them more efficiently. Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool graphs = 1;
//@@ .. cpp:var:: bool busy_wait_events
//@@
//@@ Use busy-waiting to synchronize CUDA events to achieve minimum
//@@ latency from event complete to host thread to be notified, with
//@@ the cost of high CPU load. Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool busy_wait_events = 2;
//@@ .. cpp:var:: GraphSpec graph_spec (repeated)
//@@
//@@ Specification of the CUDA graph to be captured. If not specified
//@@ and 'graphs' is true, the default CUDA graphs will be captured
//@@ based on model settings.
//@@ Currently only recognized by TensorRT backend.
//@@
repeated GraphSpec graph_spec = 3;
//@@ .. cpp:var:: bool output_copy_stream
//@@
//@@ Uses a CUDA stream separate from the inference stream to copy the
//@@ output to host. However, be aware that setting this option to
//@@ true will lead to an increase in the memory consumption of the
//@@ model as Triton will allocate twice as much GPU memory for its
//@@ I/O tensor buffers. Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool output_copy_stream = 4;
}
//@@
//@@ .. cpp:var:: message ExecutionAccelerators
//@@
//@@ Specify the preferred execution accelerators to be used to execute
//@@ the model. Currently only recognized by ONNX Runtime backend and
//@@ TensorFlow backend.
//@@
//@@ For ONNX Runtime backend, it will deploy the model with the execution
//@@ accelerators by priority, the priority is determined based on the
//@@ order that they are set, i.e. the provider at the front has highest
//@@ priority. Overall, the priority will be in the following order:
//@@ <gpu_execution_accelerator> (if instance is on GPU)
//@@ CUDA Execution Provider (if instance is on GPU)
//@@ <cpu_execution_accelerator>
//@@ Default CPU Execution Provider
//@@
message ExecutionAccelerators
{
//@@
//@@ .. cpp:var:: message Accelerator
//@@
//@@ Specify the accelerator to be used to execute the model.
//@@ Accelerator with the same name may accept different parameters
//@@ depending on the backends.
//@@
message Accelerator
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the execution accelerator.
//@@
string name = 1;
//@@ .. cpp:var:: map<string, string> parameters
//@@
//@@ Additional parameters used to configure the accelerator.
//@@
map<string, string> parameters = 2;
}
//@@ .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
//@@
//@@ The preferred execution provider to be used if the model instance
//@@ is deployed on GPU.
//@@
//@@ For ONNX Runtime backend, possible value is "tensorrt" as name,
//@@ and no parameters are required.
//@@
//@@ For TensorFlow backend, possible values are "tensorrt",
//@@ "auto_mixed_precision", "gpu_io".
//@@
//@@ For "tensorrt", the following parameters can be specified:
//@@ "precision_mode": The precision used for optimization.
//@@ Allowed values are "FP32" and "FP16". Default value is "FP32".
//@@
//@@ "max_cached_engines": The maximum number of cached TensorRT
//@@ engines in dynamic TensorRT ops. Default value is 100.
//@@
//@@ "minimum_segment_size": The smallest model subgraph that will
//@@ be considered for optimization by TensorRT. Default value is 3.
//@@
//@@ "max_workspace_size_bytes": The maximum GPU memory the model
//@@ can use temporarily during execution. Default value is 1GB.
//@@
//@@ For "auto_mixed_precision", no parameters are required. If set,
//@@ the model will try to use FP16 for better performance.
//@@ This optimization can not be set with "tensorrt".
//@@
//@@ For "gpu_io", no parameters are required. If set, the model will
//@@ be executed using TensorFlow Callable API to set input and output
//@@ tensors in GPU memory if possible, which can reduce data transfer
//@@ overhead if the model is used in ensemble. However, the Callable
//@@ object will be created on model creation and it will request all
//@@ outputs for every model execution, which may impact the
//@@ performance if a request does not require all outputs. This
//@@ optimization will only take affect if the model instance is
//@@ created with KIND_GPU.
//@@
repeated Accelerator gpu_execution_accelerator = 1;
//@@ .. cpp:var:: Accelerator cpu_execution_accelerator (repeated)
//@@
//@@ The preferred execution provider to be used if the model instance
//@@ is deployed on CPU.
//@@
//@@ For ONNX Runtime backend, possible value is "openvino" as name,
//@@ and no parameters are required.
//@@
repeated Accelerator cpu_execution_accelerator = 2;
}
//@@
//@@ .. cpp:var:: message PinnedMemoryBuffer
//@@
//@@ Specify whether to use a pinned memory buffer when transferring data
//@@ between non-pinned system memory and GPU memory. Using a pinned
//@@ memory buffer for system from/to GPU transfers will typically provide
//@@ increased performance. For example, in the common use case where the
//@@ request provides inputs and delivers outputs via non-pinned system
//@@ memory, if the model instance accepts GPU IOs, the inputs will be
//@@ processed by two copies: from non-pinned system memory to pinned
//@@ memory, and from pinned memory to GPU memory. Similarly, pinned
//@@ memory will be used for delivering the outputs.
//@@
message PinnedMemoryBuffer
{
//@@ .. cpp:var:: bool enable
//@@
//@@ Use pinned memory buffer. Default is true.
//@@
bool enable = 1;
}
//@@ .. cpp:var:: Graph graph
//@@
//@@ The graph optimization setting for the model. Optional.
//@@
Graph graph = 1;
//@@ .. cpp:var:: ModelPriority priority
//@@
//@@ The priority setting for the model. Optional.
//@@
ModelPriority priority = 2;
//@@ .. cpp:var:: Cuda cuda
//@@
//@@ CUDA-specific optimization settings. Optional.
//@@
Cuda cuda = 3;
//@@ .. cpp:var:: ExecutionAccelerators execution_accelerators
//@@
//@@ The accelerators used for the model. Optional.
//@@
ExecutionAccelerators execution_accelerators = 4;
//@@ .. cpp:var:: PinnedMemoryBuffer input_pinned_memory
//@@
//@@ Use pinned memory buffer when the data transfer for inputs
//@@ is between GPU memory and non-pinned system memory.
//@@ Default is true.
//@@
PinnedMemoryBuffer input_pinned_memory = 5;
//@@ .. cpp:var:: PinnedMemoryBuffer output_pinned_memory
//@@
//@@ Use pinned memory buffer when the data transfer for outputs
//@@ is between GPU memory and non-pinned system memory.
//@@ Default is true.
//@@
PinnedMemoryBuffer output_pinned_memory = 6;
//@@ .. cpp:var:: uint32 gather_kernel_buffer_threshold
//@@
//@@ The backend may use a gather kernel to gather input data if the
//@@ device has direct access to the source buffer and the destination
//@@ buffer. In such case, the gather kernel will be used only if the
//@@ number of buffers to be gathered is greater or equal to
//@@ the specified value. If 0, the gather kernel will be disabled.
//@@ Default value is 0.
//@@ Currently only recognized by TensorRT backend.
//@@
uint32 gather_kernel_buffer_threshold = 7;
//@@ .. cpp:var:: bool eager_batching
//@@
//@@ Start preparing the next batch before the model instance is ready
//@@ for the next inference. This option can be used to overlap the
//@@ batch preparation with model execution, with the trade-off that
//@@ the next batch might be smaller than what it could have been.
//@@ Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool eager_batching = 8;
}
//@@
//@@.. cpp:var:: message ModelQueuePolicy
//@@
//@@ Queue policy for inference requests.
//@@
message ModelQueuePolicy
{
//@@
//@@ .. cpp:enum:: TimeoutAction
//@@
//@@ The action applied to timed-out requests.
//@@
enum TimeoutAction {
//@@ .. cpp:enumerator:: Action::REJECT = 0
//@@
//@@ Reject the request and return error message accordingly.
//@@
REJECT = 0;
//@@ .. cpp:enumerator:: Action::DELAY = 1
//@@
//@@ Delay the request until all other requests at the same
//@@ (or higher) priority levels that have not reached their timeouts
//@@ are processed. A delayed request will eventually be processed,
//@@ but may be delayed indefinitely due to newly arriving requests.
//@@
DELAY = 1;
}
//@@
//@@ .. cpp:var:: TimeoutAction timeout_action
//@@
//@@ The action applied to timed-out request.
//@@ The default action is REJECT.
//@@
TimeoutAction timeout_action = 1;
//@@
//@@ .. cpp:var:: uint64 default_timeout_microseconds
//@@
//@@ The default timeout for every request, in microseconds.
//@@ The default value is 0 which indicates that no timeout is set.
//@@
uint64 default_timeout_microseconds = 2;
//@@
//@@ .. cpp:var:: bool allow_timeout_override
//@@
//@@ Whether individual request can override the default timeout value.
//@@ When true, individual requests can set a timeout that is less than
//@@ the default timeout value but may not increase the timeout.
//@@ The default value is false.
//@@
bool allow_timeout_override = 3;
//@@
//@@ .. cpp:var:: uint32 max_queue_size
//@@
//@@ The maximum queue size for holding requests. A request will be
//@@ rejected immediately if it can't be enqueued because the queue is
//@@ full. The default value is 0 which indicates that no maximum
//@@ queue size is enforced.
//@@
uint32 max_queue_size = 4;
}
//@@
//@@.. cpp:var:: message ModelDynamicBatching
//@@
//@@ Dynamic batching configuration. These settings control how dynamic
//@@ batching operates for the model.
//@@
message ModelDynamicBatching
{
//@@ .. cpp:var:: int32 preferred_batch_size (repeated)
//@@
//@@ Preferred batch sizes for dynamic batching. If a batch of one of
//@@ these sizes can be formed it will be executed immediately. If
//@@ not specified a preferred batch size will be chosen automatically
//@@ based on model and GPU characteristics.
//@@
repeated int32 preferred_batch_size = 1;
//@@ .. cpp:var:: uint64 max_queue_delay_microseconds
//@@
//@@ The maximum time, in microseconds, a request will be delayed in
//@@ the scheduling queue to wait for additional requests for
//@@ batching. Default is 0.
//@@
uint64 max_queue_delay_microseconds = 2;
//@@ .. cpp:var:: bool preserve_ordering
//@@
//@@ Should the dynamic batcher preserve the ordering of responses to
//@@ match the order of requests received by the scheduler. Default is
//@@ false. If true, the responses will be returned in the same order as
//@@ the order of requests sent to the scheduler. If false, the responses
//@@ may be returned in arbitrary order. This option is specifically
//@@ needed when a sequence of related inference requests (i.e. inference
//@@ requests with the same correlation ID) are sent to the dynamic
//@@ batcher to ensure that the sequence responses are in the correct
//@@ order.
//@@
bool preserve_ordering = 3;
//@@ .. cpp:var:: uint64 priority_levels
//@@
//@@ The number of priority levels to be enabled for the model,
//@@ the priority level starts from 1 and 1 is the highest priority.
//@@ Requests are handled in priority order with all priority 1 requests
//@@ processed before priority 2, all priority 2 requests processed before
//@@ priority 3, etc. Requests with the same priority level will be
//@@ handled in the order that they are received.
//@@
uint64 priority_levels = 4;
//@@ .. cpp:var:: uint64 default_priority_level
//@@
//@@ The priority level used for requests that don't specify their
//@@ priority. The value must be in the range [ 1, 'priority_levels' ].
//@@
uint64 default_priority_level = 5;
//@@ .. cpp:var:: ModelQueuePolicy default_queue_policy
//@@
//@@ The default queue policy used for requests that don't require
//@@ priority handling and requests that specify priority levels where
//@@ there is no specific policy given. If not specified, a policy with
//@@ default field values will be used.
//@@
ModelQueuePolicy default_queue_policy = 6;
//@@ .. cpp:var:: map<uint64, ModelQueuePolicy> priority_queue_policy
//@@
//@@ Specify the queue policy for the priority level. The default queue
//@@ policy will be used if a priority level doesn't specify a queue
//@@ policy.
//@@
map<uint64, ModelQueuePolicy> priority_queue_policy = 7;
}
//@@
//@@.. cpp:var:: message ModelSequenceBatching
//@@
//@@ Sequence batching configuration. These settings control how sequence
//@@ batching operates for the model.
//@@
message ModelSequenceBatching
{
//@@ .. cpp:var:: message Control
//@@
//@@ A control is a signal that the sequence batcher uses to
//@@ communicate with a backend.
//@@
message Control
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ The kind of the control.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_START = 0
//@@
//@@ A new sequence is/is-not starting. If true a sequence is
//@@ starting, if false a sequence is continuing. Must
//@@ specify either int32_false_true, fp32_false_true or
//@@ bool_false_true for this control. This control is optional.
//@@
CONTROL_SEQUENCE_START = 0;
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_READY = 1
//@@
//@@ A sequence is/is-not ready for inference. If true the
//@@ input tensor data is valid and should be used. If false
//@@ the input tensor data is invalid and inferencing should
//@@ be "skipped". Must specify either int32_false_true,
//@@ fp32_false_true or bool_false_true for this control. This
//@@ control is optional.
//@@
CONTROL_SEQUENCE_READY = 1;
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_END = 2
//@@
//@@ A sequence is/is-not ending. If true a sequence is
//@@ ending, if false a sequence is continuing. Must specify
//@@ either int32_false_true, fp32_false_true or bool_false_true
//@@ for this control. This control is optional.
//@@
CONTROL_SEQUENCE_END = 2;
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_CORRID = 3
//@@
//@@ The correlation ID of the sequence. The correlation ID
//@@ is an uint64_t value that is communicated in whole or
//@@ in part by the tensor. The tensor's datatype must be
//@@ specified by data_type and must be TYPE_UINT64, TYPE_INT64,
//@@ TYPE_UINT32 or TYPE_INT32. If a 32-bit datatype is specified
//@@ the correlation ID will be truncated to the low-order 32
//@@ bits. This control is optional.
//@@
CONTROL_SEQUENCE_CORRID = 3;
}
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this control.
//@@
Kind kind = 1;
//@@ .. cpp:var:: int32 int32_false_true (repeated)
//@@
//@@ The control's true and false setting is indicated by setting
//@@ a value in an int32 tensor. The tensor must be a
//@@ 1-dimensional tensor with size equal to the batch size of
//@@ the request. 'int32_false_true' must have two entries: the
//@@ first the false value and the second the true value.
//@@
repeated int32 int32_false_true = 2;
//@@ .. cpp:var:: float fp32_false_true (repeated)
//@@
//@@ The control's true and false setting is indicated by setting
//@@ a value in a fp32 tensor. The tensor must be a
//@@ 1-dimensional tensor with size equal to the batch size of
//@@ the request. 'fp32_false_true' must have two entries: the
//@@ first the false value and the second the true value.
//@@
repeated float fp32_false_true = 3;
//@@ .. cpp:var:: bool bool_false_true (repeated)
//@@
//@@ The control's true and false setting is indicated by setting
//@@ a value in a bool tensor. The tensor must be a
//@@ 1-dimensional tensor with size equal to the batch size of
//@@ the request. 'bool_false_true' must have two entries: the
//@@ first the false value and the second the true value.
//@@
repeated bool bool_false_true = 5;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The control's datatype.
//@@
DataType data_type = 4;
}
//@@ .. cpp:var:: message ControlInput
//@@
//@@ The sequence control values to communicate by a model input.
//@@
message ControlInput
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model input.
//@@
string name = 1;
//@@ .. cpp:var:: Control control (repeated)
//@@
//@@ The control value(s) that should be communicated to the
//@@ model using this model input.
//@@
repeated Control control = 2;
}
//@@
//@@ .. cpp:var:: message InitialState
//@@
//@@ Settings used to initialize data for implicit state.
//@@
message InitialState
{
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the state.
//@@
DataType data_type = 1;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The shape of the state tensor, not including the batch
//@@ dimension.
//@@
repeated int64 dims = 2;
//@@ .. cpp:var:: oneof state_data
//@@
//@@ Specify how the initial state data is generated.
//@@
oneof state_data
{
//@@
//@@ .. cpp:var:: bool zero_data
//@@
//@@ The identifier for using zeros as initial state data.
//@@ Note that the value of 'zero_data' will not be checked,
//@@ instead, zero data will be used as long as the field is set.
//@@
bool zero_data = 3;
//@@ .. cpp:var:: string data_file
//@@
//@@ The file whose content will be used as the initial data for
//@@ the state in row-major order. The file must be provided in
//@@ sub-directory 'initial_state' under the model directory.
//@@
string data_file = 4;
}
//@@ .. cpp:var:: string name
//@@
//@@ The name of the state initialization.
//@@
string name = 5;
}
//@@ .. cpp:var:: message State
//@@
//@@ An input / output pair of tensors that carry state for the sequence.
//@@
message State
{
//@@ .. cpp:var:: string input_name
//@@
//@@ The name of the model state input.
//@@
string input_name = 1;
//@@ .. cpp:var:: string output_name
//@@
//@@ The name of the model state output.
//@@
string output_name = 2;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the state.
//@@
DataType data_type = 3;
//@@ .. cpp:var:: int64 dim (repeated)
//@@
//@@ The dimension.
//@@
repeated int64 dims = 4;
//@@ .. cpp:var:: InitialState initial_state (repeated)
//@@
//@@ The optional field to specify the initial state for the model.
//@@
repeated InitialState initial_state = 5;
//@@ .. cpp:var:: bool use_same_buffer_for_input_output
//@@
//@@ The optional field to use a single buffer for both input and output
//@@ state. Without this option, Triton allocates separate buffers
//@@ for input and output state
//@@ which can be problematic if the state size is
//@@ large. This option reduces the memory usage by allocating a single
//@@ buffer. Enabling this option is recommended whenever
//@@ the input state is processed before the output state is written.
//@@ When enabled the state
//@@ will always be updated independent of whether
//@@ TRITONBACKEND_StateUpdate is called
//@@ (however TRITONBACKEND_StateUpdate should still be called for
//@@ completeness).
//@@
//@@ The default value is false.
//@@
bool use_same_buffer_for_input_output = 6;
//@@ .. cpp:var:: bool use_growable_memory
//@@
//@@ The optional field to enable an implicit state buffer to grow
//@@ without reallocating or copying existing memory.
//@@ Additional memory will be appended to the end of the buffer and
//@@ existing data will be preserved.
//@@ This option is only available for CUDA memory and requires enabling
//@@ use_same_buffer_for_input_output. When using this option,
//@@ StateBuffer call will always return CUDA memory even if CPU memory
//@@ is requested.
//@@
//@@ The default value is false.
//@@
bool use_growable_memory = 7;
}
//@@ .. cpp:var:: message StrategyDirect
//@@
//@@ The sequence batcher uses a specific, unique batch
//@@ slot for each sequence. All inference requests in a
//@@ sequence are directed to the same batch slot in the same
//@@ model instance over the lifetime of the sequence. This
//@@ is the default strategy.
//@@
message StrategyDirect
{
//@@ .. cpp:var:: uint64 max_queue_delay_microseconds
//@@
//@@ The maximum time, in microseconds, a candidate request
//@@ will be delayed in the sequence batch scheduling queue to
//@@ wait for additional requests for batching. Default is 0.
//@@
uint64 max_queue_delay_microseconds = 1;
//@@ .. cpp:var:: float minimum_slot_utilization
//@@
//@@ The minimum slot utilization that must be satisfied to
//@@ execute the batch before 'max_queue_delay_microseconds' expires.
//@@ For example, a value of 0.5 indicates that the batch should be
//@@ executed as soon as 50% or more of the slots are ready even if
//@@ the 'max_queue_delay_microseconds' timeout has not expired.
//@@ The default is 0.0, indicating that a batch will be executed
//@@ before 'max_queue_delay_microseconds' timeout expires if at least
//@@ one batch slot is ready. 'max_queue_delay_microseconds' will be
//@@ ignored unless minimum_slot_utilization is set to a non-zero
//@@ value.
//@@
float minimum_slot_utilization = 2;
}
//@@ .. cpp:var:: message StrategyOldest
//@@
//@@ The sequence batcher maintains up to 'max_candidate_sequences'
//@@ candidate sequences. 'max_candidate_sequences' can be greater
//@@ than the model's 'max_batch_size'. For inferencing the batcher
//@@ chooses from the candidate sequences up to 'max_batch_size'
//@@ inference requests. Requests are chosen in an oldest-first
//@@ manner across all candidate sequences. A given sequence is
//@@ not guaranteed to be assigned to the same batch slot for
//@@ all inference requests of that sequence.
//@@
message StrategyOldest
{
//@@ .. cpp:var:: int32 max_candidate_sequences
//@@
//@@ Maximum number of candidate sequences that the batcher
//@@ maintains. Excess sequences are kept in an ordered backlog
//@@ and become candidates when existing candidate sequences
//@@ complete.
//@@
int32 max_candidate_sequences = 1;
//@@ .. cpp:var:: int32 preferred_batch_size (repeated)
//@@
//@@ Preferred batch sizes for dynamic batching of candidate
//@@ sequences. If a batch of one of these sizes can be formed
//@@ it will be executed immediately. If not specified a
//@@ preferred batch size will be chosen automatically
//@@ based on model and GPU characteristics.
//@@
repeated int32 preferred_batch_size = 2;
//@@ .. cpp:var:: uint64 max_queue_delay_microseconds
//@@
//@@ The maximum time, in microseconds, a candidate request
//@@ will be delayed in the dynamic batch scheduling queue to
//@@ wait for additional requests for batching. Default is 0.
//@@
uint64 max_queue_delay_microseconds = 3;
//@@ .. cpp:var:: bool preserve_ordering
//@@
//@@ Should the dynamic batcher preserve the ordering of responses to
//@@ match the order of requests received by the scheduler. Default is
//@@ false. If true, the responses will be returned in the same order
//@@ as the order of requests sent to the scheduler. If false, the
//@@ responses may be returned in arbitrary order. This option is
//@@ specifically needed when a sequence of related inference requests
//@@ (i.e. inference requests with the same correlation ID) are sent
//@@ to the dynamic batcher to ensure that the sequence responses are
//@@ in the correct order.
//@@
//@@ When using decoupled models, setting this to true may block the
//@@ responses from independent sequences from being returned to the
//@@ client until the previous request completes, hurting overall
//@@ performance. If using GRPC streaming protocol, the stream
//@@ ordering guarantee may be sufficient alone to ensure the
//@@ responses for each sequence are returned in sequence-order
//@@ without blocking based on independent requests, depending on the
//@@ use case.
//@@
bool preserve_ordering = 4;
}
//@@ .. cpp:var:: oneof strategy_choice
//@@
//@@ The strategy used by the sequence batcher. Default strategy
//@@ is 'direct'.
//@@
oneof strategy_choice
{
//@@ .. cpp:var:: StrategyDirect direct
//@@
//@@ StrategyDirect scheduling strategy.
//@@
StrategyDirect direct = 3;
//@@ .. cpp:var:: StrategyOldest oldest
//@@
//@@ StrategyOldest scheduling strategy.
//@@
StrategyOldest oldest = 4;
}
//@@ .. cpp:var:: uint64 max_sequence_idle_microseconds
//@@
//@@ The maximum time, in microseconds, that a sequence is allowed to
//@@ be idle before it is aborted. The inference server considers a
//@@ sequence idle when it does not have any inference request queued
//@@ for the sequence. If this limit is exceeded, the inference server
//@@ will free the sequence slot allocated by the sequence and make it
//@@ available for another sequence. If not specified (or specified as
//@@ zero) a default value of 1000000 (1 second) is used.
//@@
uint64 max_sequence_idle_microseconds = 1;
//@@ .. cpp:var:: ControlInput control_input (repeated)
//@@
//@@ The model input(s) that the server should use to communicate
//@@ sequence start, stop, ready and similar control values to the
//@@ model.
//@@
repeated ControlInput control_input = 2;
//@@ .. cpp:var:: State state (repeated)
//@@
//@@ The optional state that can be stored in Triton for performing
//@@ inference requests on a sequence. Each sequence holds an implicit
//@@ state local to itself. The output state tensor provided by the
//@@ model in 'output_name' field of the current inference request will
//@@ be transferred as an input tensor named 'input_name' in the next
//@@ request of the same sequence. The input state of the first request
//@@ in the sequence contains garbage data.
//@@
repeated State state = 5;
//@@ .. cpp:var:: bool iterative_sequence
//@@
//@@ Requests for iterative sequences are processed over a number
//@@ of iterations. An iterative sequence is initiated by a single
//@@ request and is "rescheduled" by the model until completion.
//@@ Requests for inflight requests will be batched together
//@@ and can complete independently. Note this feature
//@@ requires backend support. Default value is false.
bool iterative_sequence = 6;
}
//@@
//@@.. cpp:var:: message ModelEnsembling
//@@
//@@ Model ensembling configuration. These settings specify the models that
//@@ compose the ensemble and how data flows between the models.
//@@
message ModelEnsembling
{
//@@ .. cpp:var:: message Step
//@@
//@@ Each step specifies a model included in the ensemble,
//@@ maps ensemble tensor names to the model input tensors,
//@@ and maps model output tensors to ensemble tensor names
//@@
message Step
{
//@@ .. cpp:var:: string model_name
//@@
//@@ The name of the model to execute for this step of the ensemble.
//@@
string model_name = 1;
//@@ .. cpp:var:: int64 model_version
//@@
//@@ The version of the model to use for inference. If -1
//@@ the latest/most-recent version of the model is used.
//@@
int64 model_version = 2;
//@@ .. cpp:var:: map<string,string> input_map
//@@
//@@ Map from name of an input tensor on this step's model to ensemble
//@@ tensor name. The ensemble tensor must have the same data type and
//@@ shape as the model input. Each model input must be assigned to
//@@ one ensemble tensor, but the same ensemble tensor can be assigned
//@@ to multiple model inputs.
//@@
map<string, string> input_map = 3;
//@@ .. cpp:var:: map<string,string> output_map
//@@
//@@ Map from name of an output tensor on this step's model to ensemble
//@@ tensor name. The data type and shape of the ensemble tensor will
//@@ be inferred from the model output. It is optional to assign all
//@@ model outputs to ensemble tensors. One ensemble tensor name
//@@ can appear in an output map only once.
//@@
map<string, string> output_map = 4;
//@@ .. cpp:var:: string model_namespace
//@@
//@@ [RESERVED] currently this field is reserved for internal use, users
//@@ must not set any value to this field to avoid unexpected behavior.
//@@
string model_namespace = 5;
}
//@@ .. cpp:var:: Step step (repeated)
//@@
//@@ The models and the input / output mappings used within the ensemble.
//@@
repeated Step step = 1;
}
//@@
//@@.. cpp:var:: message ModelParameter
//@@
//@@ A model parameter.
//@@
message ModelParameter
{
//@@ .. cpp:var:: string string_value
//@@
//@@ The string value of the parameter.
//@@
string string_value = 1;
}
//@@
//@@.. cpp:var:: message ModelWarmup
//@@
//@@ Settings used to construct the request sample for model warmup.
//@@
message ModelWarmup
{
//@@
//@@ .. cpp:var:: message Input
//@@
//@@ Meta data associated with an input.
//@@
message Input
{
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the input.
//@@
DataType data_type = 1;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The shape of the input tensor, not including the batch dimension.
//@@
repeated int64 dims = 2;
//@@ .. cpp:var:: oneof input_data_type
//@@
//@@ Specify how the input data is generated. If the input has STRING
//@@ data type and 'random_data' is set, the data generation will fall
//@@ back to 'zero_data'.
//@@
oneof input_data_type
{
//@@
//@@ .. cpp:var:: bool zero_data
//@@
//@@ The identifier for using zeros as input data. Note that the
//@@ value of 'zero_data' will not be checked, instead, zero data
//@@ will be used as long as the field is set.
//@@
bool zero_data = 3;
//@@
//@@ .. cpp:var:: bool random_data
//@@
//@@ The identifier for using random data as input data. Note that
//@@ the value of 'random_data' will not be checked, instead,
//@@ random data will be used as long as the field is set.
//@@
bool random_data = 4;
//@@ .. cpp:var:: string input_data_file
//@@
//@@ The file whose content will be used as raw input data in
//@@ row-major order. The file must be provided in a sub-directory
//@@ 'warmup' under the model directory. The file contents should be
//@@ in binary format. For TYPE_STRING data-type, an element is
//@@ represented by a 4-byte unsigned integer giving the length
//@@ followed by the actual bytes.
//@@
string input_data_file = 5;
}
}
//@@ .. cpp:var:: string name
//@@
//@@ The name of the request sample.
//@@
string name = 1;
//@@ .. cpp:var:: uint32 batch_size
//@@
//@@ The batch size of the inference request. This must be >= 1. For
//@@ models that don't support batching, batch_size must be 1. If
//@@ batch_size > 1, the 'inputs' specified below will be duplicated to
//@@ match the batch size requested.
//@@
uint32 batch_size = 2;
//@@ .. cpp:var:: map<string, Input> inputs
//@@
//@@ The warmup meta data associated with every model input, including
//@@ control tensors.
//@@
map<string, Input> inputs = 3;
//@@ .. cpp:var:: uint32 count
//@@
//@@ The number of iterations that this warmup sample will be executed.
//@@ For example, if this field is set to 2, 2 model executions using this
//@@ sample will be scheduled for warmup. Default value is 0 which
//@@ indicates that this sample will be used only once.
//@@ Note that for sequence model, 'count' may not work well
//@@ because the model often expect a valid sequence of requests which
//@@ should be represented by a series of warmup samples. 'count > 1'
//@@ essentially "resends" one of the sample, which may invalidate the
//@@ sequence and result in unexpected warmup failure.
//@@
uint32 count = 4;
}
//@@
//@@ .. cpp:var:: message ModelOperations
//@@
//@@ The metadata of libraries providing custom operations for this model.
//@@
message ModelOperations
{
//@@ .. cpp:var:: string op_library_filename (repeated)
//@@
//@@ Optional paths of the libraries providing custom operations for
//@@ this model. Valid only for ONNX models.
//@@
repeated string op_library_filename = 1;
}
//@@
//@@ .. cpp:var:: message ModelTransactionPolicy
//@@
//@@ The specification that describes the nature of transactions
//@@ to be expected from the model.
//@@
message ModelTransactionPolicy
{
//@@ .. cpp:var:: bool decoupled
//@@
//@@ Indicates whether responses generated by the model are decoupled with
//@@ the requests issued to it, which means the number of responses
//@@ generated by model may differ from number of requests issued, and
//@@ that the responses may be out of order relative to the order of
//@@ requests. The default is false, which means the model will generate
//@@ exactly one response for each request.
//@@
bool decoupled = 1;
}
//@@
//@@.. cpp:var:: message ModelRepositoryAgents
//@@
//@@ The repository agents for the model.
//@@
message ModelRepositoryAgents
{
//@@
//@@ .. cpp:var:: message Agent
//@@
//@@ A repository agent that should be invoked for the specified
//@@ repository actions for this model.
//@@
message Agent
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the agent.
//@@
string name = 1;
//@@ .. cpp:var:: map<string, string> parameters
//@@
//@@ The parameters for the agent.
//@@
map<string, string> parameters = 2;
}
//@@
//@@ .. cpp:var:: Agent agents (repeated)
//@@
//@@ The ordered list of agents for the model. These agents will be
//@@ invoked in order to respond to repository actions occurring for the
//@@ model.
//@@
repeated Agent agents = 1;
}
//@@
//@@.. cpp:var:: message ModelResponseCache
//@@
//@@ The response cache setting for the model.
//@@
message ModelResponseCache
{
//@@
//@@ .. cpp::var:: bool enable
//@@
//@@ Whether or not to use response cache for the model. If True, the
//@@ responses from the model are cached and when identical request
//@@ is encountered, instead of going through the model execution,
//@@ the response from the cache is utilized. By default, response
//@@ cache is disabled for the models.
//@@
bool enable = 1;
}
//@@
//@@ .. cpp:var:: message ModelMetrics
//@@
//@@ The metrics setting of this model.
//@@ NOTE: Consider reusing this message body for backend metric custom
//@@ configuration.
//@@
message ModelMetrics
{
//@@
//@@ .. cpp:var:: message MetricControl
//@@
//@@ Override metrics settings of this model.
//@@
message MetricControl
{
//@@
//@@ .. cpp:var:: message MetricIdentifier
//@@
//@@ Specify metrics to be overridden with metric_option.
//@@
message MetricIdentifier
{
//@@ .. cpp:var:: string family
//@@
//@@ The name of the metric family to override with the custom value.
//@@ All core histogram metrics reported by Triton are customizable.
//@@
// https://github.com/triton-inference-server/server/blob/main/docs/user_guide/metrics.md#histograms
//@@
string family = 1;
}
//@@ .. cpp:var:: message HistogramOptions
//@@
//@@ Histogram metrics options.
//@@
message HistogramOptions
{
//@@ .. cpp:var:: double buckets (repeated)
//@@
//@@ Repeated double type in ascending order for histogram bucket
//@@ boundaries. Each bucket value represents a range less than or
//@@ equal to itself. The range greater than the largest bucket value
//@@ is allocated implicitly.
//@@ For example, [ -5.0, -2, 0, 3.5, 5 ].
//@@
repeated double buckets = 1;
}
//@@ .. cpp:var:: MetricIdentifier metric_identifier
//@@
//@@ The identifier defining metrics to be overridden with the
//@@ metric_options.
//@@
MetricIdentifier metric_identifier = 1;
//@@ .. cpp:var:: oneof metric_options
//@@
//@@ The value to override the metrics defined in metric_identifier.
//@@
oneof metric_options
{
//@@ .. cpp:var:: HistogramOptions histogram_options
//@@
//@@ Histogram options.
//@@
HistogramOptions histogram_options = 2;
}
}
//@@
//@@ .. cpp::var:: MetricControl metric_control (repeated)
//@@
//@@ Optional custom configuration for selected metrics.
//@@
repeated MetricControl metric_control = 1;
}
//@@
//@@.. cpp:var:: message ModelConfig
//@@
//@@ A model configuration.
//@@
message ModelConfig
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model.
//@@
string name = 1;
//@@ .. cpp:var:: string platform
//@@
//@@ Additional backend-specific configuration for the model.
//@@ Please refer to the backend documentation on whether this field
//@@ should be specified.
//@@
string platform = 2;
//@@ .. cpp:var:: string backend
//@@
//@@ The backend used by the model.
//@@
string backend = 17;
//@@ .. cpp:var:: string runtime
//@@
//@@ The name of the backend library file used by the model.
//@@
string runtime = 25;
//@@ .. cpp:var:: ModelVersionPolicy version_policy
//@@
//@@ Policy indicating which version(s) of the model will be served.
//@@
ModelVersionPolicy version_policy = 3;
//@@ .. cpp:var:: int32 max_batch_size
//@@
//@@ Maximum batch size allowed for inference. This can only decrease
//@@ what is allowed by the model itself. A max_batch_size value of 0
//@@ indicates that batching is not allowed for the model and the
//@@ dimension/shape of the input and output tensors must exactly
//@@ match what is specified in the input and output configuration. A
//@@ max_batch_size value > 0 indicates that batching is allowed and
//@@ so the model expects the input tensors to have an additional
//@@ initial dimension for the batching that is not specified in the
//@@ input (for example, if the model supports batched inputs of
//@@ 2-dimensional tensors then the model configuration will specify
//@@ the input shape as [ X, Y ] but the model will expect the actual
//@@ input tensors to have shape [ N, X, Y ]). For max_batch_size > 0
//@@ returned outputs will also have an additional initial dimension
//@@ for the batch.
//@@
int32 max_batch_size = 4;
//@@ .. cpp:var:: ModelInput input (repeated)
//@@
//@@ The inputs request by the model.
//@@
repeated ModelInput input = 5;
//@@ .. cpp:var:: ModelOutput output (repeated)
//@@
//@@ The outputs produced by the model.
//@@
repeated ModelOutput output = 6;
//@@ .. cpp:var:: BatchInput batch_input (repeated)
//@@
//@@ The model input(s) that the server should use to communicate
//@@ batch related values to the model.
//@@
repeated BatchInput batch_input = 20;
//@@ .. cpp:var:: BatchOutput batch_output (repeated)
//@@
//@@ The outputs produced by the model that requires special handling
//@@ by the model backend.
//@@
repeated BatchOutput batch_output = 21;
//@@ .. cpp:var:: ModelOptimizationPolicy optimization
//@@
//@@ Optimization configuration for the model. If not specified
//@@ then default optimization policy is used.
//@@
ModelOptimizationPolicy optimization = 12;
//@@ .. cpp:var:: oneof scheduling_choice
//@@
//@@ The scheduling policy for the model. If not specified the
//@@ default scheduling policy is used for the model. The default
//@@ policy is to execute each inference request independently.
//@@
oneof scheduling_choice
{
//@@ .. cpp:var:: ModelDynamicBatching dynamic_batching
//@@
//@@ If specified, enables the dynamic-batching scheduling
//@@ policy. With dynamic-batching the scheduler may group
//@@ together independent requests into a single batch to
//@@ improve inference throughput.
//@@
ModelDynamicBatching dynamic_batching = 11;
//@@ .. cpp:var:: ModelSequenceBatching sequence_batching
//@@
//@@ If specified, enables the sequence-batching scheduling
//@@ policy. With sequence-batching, inference requests
//@@ with the same correlation ID are routed to the same
//@@ model instance. Multiple sequences of inference requests
//@@ may be batched together into a single batch to
//@@ improve inference throughput.
//@@
ModelSequenceBatching sequence_batching = 13;
//@@ .. cpp:var:: ModelEnsembling ensemble_scheduling
//@@
//@@ If specified, enables the model-ensembling scheduling
//@@ policy. With model-ensembling, inference requests
//@@ will be processed according to the specification, such as an
//@@ execution sequence of models. The input specified in this model
//@@ config will be the input for the ensemble, and the output
//@@ specified will be the output of the ensemble.
//@@
ModelEnsembling ensemble_scheduling = 15;
}
//@@ .. cpp:var:: ModelInstanceGroup instance_group (repeated)
//@@
//@@ Instances of this model. If not specified, one instance
//@@ of the model will be instantiated on each available GPU.
//@@
repeated ModelInstanceGroup instance_group = 7;
//@@ .. cpp:var:: string default_model_filename
//@@
//@@ Optional filename of the model file to use if a
//@@ compute-capability specific model is not specified in
//@@ :cpp:var:`cc_model_filenames`. If not specified the default name
//@@ is 'model.graphdef', 'model.savedmodel', 'model.plan' or
//@@ 'model.pt' depending on the model type.
//@@
string default_model_filename = 8;
//@@ .. cpp:var:: map<string,string> cc_model_filenames
//@@
//@@ Optional map from CUDA compute capability to the filename of
//@@ the model that supports that compute capability. The filename
//@@ refers to a file within the model version directory.
//@@
map<string, string> cc_model_filenames = 9;
//@@ .. cpp:var:: map<string,string> metric_tags
//@@
//@@ Optional metric tags. User-specific key-value pairs for metrics
//@@ reported for this model. These tags are applied to the metrics
//@@ reported on the HTTP metrics port.
//@@
map<string, string> metric_tags = 10;
//@@ .. cpp:var:: map<string,ModelParameter> parameters
//@@
//@@ Optional model parameters. User-specified parameter values.
//@@
map<string, ModelParameter> parameters = 14;
//@@ .. cpp:var:: ModelWarmup model_warmup (repeated)
//@@
//@@ Warmup setting of this model. If specified, all instances
//@@ will be run with the request samples in sequence before
//@@ serving the model.
//@@ This field can only be specified if the model is not an ensemble
//@@ model.
//@@
repeated ModelWarmup model_warmup = 16;
//@@ .. cpp:var:: ModelOperations model_operations
//@@
//@@ Optional metadata of the libraries providing custom operations for
//@@ this model.
//@@
ModelOperations model_operations = 18;
//@@ .. cpp:var:: ModelTransactionPolicy model_transaction_policy
//@@
//@@ Optional specification that describes the nature of transactions
//@@ to be expected from the model.
//@@
ModelTransactionPolicy model_transaction_policy = 19;
//@@ .. cpp:var:: ModelRepositoryAgents model_repository_agents
//@@
//@@ Optional specification of the agent(s) that should be invoked
//@@ with repository actions are performed for this model.
//@@
ModelRepositoryAgents model_repository_agents = 23;
//@@ .. cpp:var:: ModelResponseCache response_cache
//@@
//@@ Optional setting for utilizing the response cache for this
//@@ model.
//@@
ModelResponseCache response_cache = 24;
//@@ .. cpp:var:: ModelMetrics model_metrics
//@@
//@@ Optional setting for custom metrics configuration for this model.
//@@ Application default is applied to metrics that are not specified.
//@@
ModelMetrics model_metrics = 26;
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
pub mod kserve;
pub mod openai;
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use std::pin::Pin;
use std::sync::Arc;
use crate::grpc::service::kserve::inference::DataType;
use crate::grpc::service::kserve::inference::ModelInput;
use crate::grpc::service::kserve::inference::ModelOutput;
use crate::http::service::Metrics;
use crate::http::service::metrics;
use crate::discovery::ModelManager;
use crate::request_template::RequestTemplate;
use anyhow::Result;
use derive_builder::Builder;
use dynamo_async_openai::types::{CompletionFinishReason, CreateCompletionRequest, Prompt};
use dynamo_runtime::transports::etcd;
use futures::pin_mut;
use tokio::task::JoinHandle;
use tokio_stream::{Stream, StreamExt};
use tokio_util::sync::CancellationToken;
use crate::grpc::service::openai::{completion_response_stream, get_parsing_options};
use tonic::{Request, Response, Status, transport::Server};
use crate::protocols::openai::completions::{
NvCreateCompletionRequest, NvCreateCompletionResponse,
};
pub mod inference {
tonic::include_proto!("inference");
}
use inference::grpc_inference_service_server::{GrpcInferenceService, GrpcInferenceServiceServer};
use inference::{
InferParameter, ModelConfig, ModelConfigRequest, ModelConfigResponse, ModelInferRequest,
ModelInferResponse, ModelMetadataRequest, ModelMetadataResponse, ModelStreamInferResponse,
};
/// [gluo TODO] 'metrics' are for HTTP service and there is HTTP endpoint
/// for it as part of HTTP service. Should we always start HTTP service up
/// for non-inference?
pub struct State {
metrics: Arc<Metrics>,
manager: Arc<ModelManager>,
etcd_client: Option<etcd::Client>,
}
impl State {
pub fn new(manager: Arc<ModelManager>) -> Self {
Self {
manager,
metrics: Arc::new(Metrics::default()),
etcd_client: None,
}
}
pub fn new_with_etcd(manager: Arc<ModelManager>, etcd_client: Option<etcd::Client>) -> Self {
Self {
manager,
metrics: Arc::new(Metrics::default()),
etcd_client,
}
}
/// Get the Prometheus [`Metrics`] object which tracks request counts and inflight requests
pub fn metrics_clone(&self) -> Arc<Metrics> {
self.metrics.clone()
}
pub fn manager(&self) -> &ModelManager {
Arc::as_ref(&self.manager)
}
pub fn manager_clone(&self) -> Arc<ModelManager> {
self.manager.clone()
}
pub fn etcd_client(&self) -> Option<&etcd::Client> {
self.etcd_client.as_ref()
}
}
#[derive(Clone)]
pub struct KserveService {
// The state we share with every request handler
state: Arc<State>,
port: u16,
host: String,
request_template: Option<RequestTemplate>,
}
#[derive(Clone, Builder)]
#[builder(pattern = "owned", build_fn(private, name = "build_internal"))]
pub struct KserveServiceConfig {
#[builder(default = "8787")]
port: u16,
#[builder(setter(into), default = "String::from(\"0.0.0.0\")")]
host: String,
#[builder(default = "None")]
request_template: Option<RequestTemplate>,
#[builder(default = "None")]
etcd_client: Option<etcd::Client>,
}
impl KserveService {
pub fn builder() -> KserveServiceConfigBuilder {
KserveServiceConfigBuilder::default()
}
pub fn state_clone(&self) -> Arc<State> {
self.state.clone()
}
pub fn state(&self) -> &State {
Arc::as_ref(&self.state)
}
pub fn model_manager(&self) -> &ModelManager {
self.state().manager()
}
pub async fn spawn(&self, cancel_token: CancellationToken) -> JoinHandle<Result<()>> {
let this = self.clone();
tokio::spawn(async move { this.run(cancel_token).await })
}
pub async fn run(&self, cancel_token: CancellationToken) -> Result<()> {
let address = format!("{}:{}", self.host, self.port);
tracing::info!(address, "Starting KServe gRPC service on: {address}");
let observer = cancel_token.child_token();
Server::builder()
.add_service(GrpcInferenceServiceServer::new(self.clone()))
.serve_with_shutdown(address.parse()?, observer.cancelled_owned())
.await
.inspect_err(|_| cancel_token.cancel())?;
Ok(())
}
}
impl KserveServiceConfigBuilder {
pub fn build(self) -> Result<KserveService, anyhow::Error> {
let config: KserveServiceConfig = self.build_internal()?;
let model_manager = Arc::new(ModelManager::new());
let state = Arc::new(State::new_with_etcd(model_manager, config.etcd_client));
// enable prometheus metrics
let registry = metrics::Registry::new();
state.metrics_clone().register(&registry)?;
Ok(KserveService {
state,
port: config.port,
host: config.host,
request_template: config.request_template,
})
}
pub fn with_request_template(mut self, request_template: Option<RequestTemplate>) -> Self {
self.request_template = Some(request_template);
self
}
pub fn with_etcd_client(mut self, etcd_client: Option<etcd::Client>) -> Self {
self.etcd_client = Some(etcd_client);
self
}
}
#[tonic::async_trait]
impl GrpcInferenceService for KserveService {
async fn model_infer(
&self,
request: Request<ModelInferRequest>,
) -> Result<Response<ModelInferResponse>, Status> {
let request = request.into_inner();
let request_id = request.id.clone();
let mut completion_request: NvCreateCompletionRequest = request
.try_into()
.map_err(|e| Status::invalid_argument(format!("Failed to parse request: {}", e)))?;
if completion_request.inner.stream.unwrap_or(false) {
// return error that streaming is not supported
return Err(Status::invalid_argument(
"Streaming is not supported for this endpoint",
));
}
// Apply template values if present
if let Some(template) = self.request_template.as_ref() {
if completion_request.inner.model.is_empty() {
completion_request.inner.model = template.model.clone();
}
if completion_request.inner.temperature.unwrap_or(0.0) == 0.0 {
completion_request.inner.temperature = Some(template.temperature);
}
if completion_request.inner.max_tokens.unwrap_or(0) == 0 {
completion_request.inner.max_tokens = Some(template.max_completion_tokens);
}
}
let model = completion_request.inner.model.clone();
let parsing_options = get_parsing_options(self.state.manager(), &model);
let stream = completion_response_stream(self.state_clone(), completion_request).await?;
let completion_response =
NvCreateCompletionResponse::from_annotated_stream(stream, parsing_options)
.await
.map_err(|e| {
tracing::error!("Failed to fold completions stream: {:?}", e);
Status::internal("Failed to fold completions stream")
})?;
let mut reply: ModelInferResponse = completion_response
.try_into()
.map_err(|e| Status::invalid_argument(format!("Failed to parse response: {}", e)))?;
reply.id = request_id;
Ok(Response::new(reply))
}
type ModelStreamInferStream =
Pin<Box<dyn Stream<Item = Result<ModelStreamInferResponse, Status>> + Send + 'static>>;
async fn model_stream_infer(
&self,
request: Request<tonic::Streaming<ModelInferRequest>>,
) -> Result<Response<Self::ModelStreamInferStream>, Status> {
let mut request_stream = request.into_inner();
let state = self.state_clone();
let template = self.request_template.clone();
let output = async_stream::try_stream! {
// [gluo FIXME] should be able to demux request / response streaming
// await requests in a separate task until cancellation / completion,
// and passing AsyncEngineStream for each request to the response stream
// which will be collectively polling.
while let Some(request) = request_stream.next().await {
// Must keep track of 'request_id' which will be returned in corresponding response
let request_id: String;
let mut completion_request: NvCreateCompletionRequest = match request {
Err(e) => {
tracing::error!("Unexpected gRPC failed to read request: {}", e);
yield ModelStreamInferResponse {
error_message: e.to_string(),
infer_response: None
};
continue;
}
Ok(request) => {
request_id = request.id.clone();
request.try_into().map_err(|e| {
Status::invalid_argument(format!("Failed to parse request: {}", e))
})?
}
};
// Apply template values if present
if let Some(template) = &template {
if completion_request.inner.model.is_empty() {
completion_request.inner.model = template.model.clone();
}
if completion_request.inner.temperature.unwrap_or(0.0) == 0.0 {
completion_request.inner.temperature = Some(template.temperature);
}
if completion_request.inner.max_tokens.unwrap_or(0) == 0 {
completion_request.inner.max_tokens = Some(template.max_completion_tokens);
}
}
let model = completion_request.inner.model.clone();
let parsing_options = get_parsing_options(state.manager(), &model);
let streaming = completion_request.inner.stream.unwrap_or(false);
let stream = completion_response_stream(state.clone(), completion_request).await?;
if streaming {
pin_mut!(stream);
while let Some(response) = stream.next().await {
match response.data {
Some(data) => {
let mut reply = ModelStreamInferResponse::try_from(data).map_err(|e| {
Status::invalid_argument(format!("Failed to parse response: {}", e))
})?;
if reply.infer_response.is_some() {
reply.infer_response.as_mut().unwrap().id = request_id.clone();
}
yield reply;
},
None => {
// Skip if no data is present, the response is for annotation
},
}
}
} else {
let completion_response = NvCreateCompletionResponse::from_annotated_stream(stream, parsing_options)
.await
.map_err(|e| {
tracing::error!(
"Failed to fold completions stream: {:?}",
e
);
Status::internal("Failed to fold completions stream")
})?;
let mut response: ModelStreamInferResponse = completion_response.try_into().map_err(|e| {
Status::invalid_argument(format!("Failed to parse response: {}", e))
})?;
if response.infer_response.is_some() {
response.infer_response.as_mut().unwrap().id = request_id.clone();
}
yield response;
}
}
};
Ok(Response::new(
Box::pin(output) as Self::ModelStreamInferStream
))
}
async fn model_metadata(
&self,
request: Request<ModelMetadataRequest>,
) -> Result<Response<ModelMetadataResponse>, Status> {
let models = self.state.manager().list_completions_models();
let request_model_name = &request.into_inner().name;
if let Some(model_name) = models.into_iter().find(|n| request_model_name == n) {
return Ok(Response::new(ModelMetadataResponse {
name: model_name,
versions: vec!["1".to_string()],
platform: "dynamo".to_string(),
inputs: vec![
inference::model_metadata_response::TensorMetadata {
name: "text_input".to_string(),
datatype: "BYTES".to_string(),
shape: vec![1],
},
inference::model_metadata_response::TensorMetadata {
name: "streaming".to_string(),
datatype: "BOOL".to_string(),
shape: vec![1],
},
],
outputs: vec![
inference::model_metadata_response::TensorMetadata {
name: "text_output".to_string(),
datatype: "BYTES".to_string(),
shape: vec![-1],
},
inference::model_metadata_response::TensorMetadata {
name: "finish_reason".to_string(),
datatype: "BYTES".to_string(),
shape: vec![-1],
},
],
}));
}
Err(Status::not_found(format!(
"Model '{}' not found",
request_model_name
)))
}
async fn model_config(
&self,
request: Request<ModelConfigRequest>,
) -> Result<Response<ModelConfigResponse>, Status> {
let models = self.state.manager().list_completions_models();
let request_model_name = &request.into_inner().name;
if let Some(model_name) = models.into_iter().find(|n| request_model_name == n) {
let config = ModelConfig {
name: model_name,
platform: "dynamo".to_string(),
backend: "dynamo".to_string(),
input: vec![
ModelInput {
name: "text_input".to_string(),
data_type: DataType::TypeString as i32,
dims: vec![1],
..Default::default()
},
ModelInput {
name: "streaming".to_string(),
data_type: DataType::TypeBool as i32,
dims: vec![1],
optional: true,
..Default::default()
},
],
output: vec![
ModelOutput {
name: "text_output".to_string(),
data_type: DataType::TypeString as i32,
dims: vec![-1],
..Default::default()
},
ModelOutput {
name: "finish_reason".to_string(),
data_type: DataType::TypeString as i32,
dims: vec![-1],
..Default::default()
},
],
..Default::default()
};
return Ok(Response::new(ModelConfigResponse {
config: Some(config),
}));
}
Err(Status::not_found(format!(
"Model '{}' not found",
request_model_name
)))
}
}
impl TryFrom<ModelInferRequest> for NvCreateCompletionRequest {
type Error = Status;
fn try_from(request: ModelInferRequest) -> Result<Self, Self::Error> {
// Protocol requires if `raw_input_contents` is used to hold input data,
// it must be used for all inputs.
if !request.raw_input_contents.is_empty()
&& request.inputs.len() != request.raw_input_contents.len()
{
return Err(Status::invalid_argument(
"`raw_input_contents` must be used for all inputs",
));
}
// iterate through inputs
let mut text_input = None;
let mut stream = false;
for (idx, input) in request.inputs.iter().enumerate() {
match input.name.as_str() {
"text_input" => {
if input.datatype != "BYTES" {
return Err(Status::invalid_argument(format!(
"Expected 'text_input' to be of type BYTES for string input, got {:?}",
input.datatype
)));
}
if input.shape != vec![1] && input.shape != vec![1, 1] {
return Err(Status::invalid_argument(format!(
"Expected 'text_input' to have shape [1], got {:?}",
input.shape
)));
}
match &input.contents {
Some(content) => {
let bytes = &content.bytes_contents[0];
text_input = Some(String::from_utf8_lossy(bytes).to_string());
}
None => {
let raw_input =
request.raw_input_contents.get(idx).ok_or_else(|| {
Status::invalid_argument("Missing raw input for 'text_input'")
})?;
if raw_input.len() < 4 {
return Err(Status::invalid_argument(
"'text_input' raw input must be length-prefixed (>= 4 bytes)",
));
}
// We restrict the 'text_input' only contain one element, only need to
// parse the first element. Skip first four bytes that is used to store
// the length of the input.
text_input = Some(String::from_utf8_lossy(&raw_input[4..]).to_string());
}
}
}
"streaming" | "stream" => {
if input.datatype != "BOOL" {
return Err(Status::invalid_argument(format!(
"Expected '{}' to be of type BOOL, got {:?}",
input.name, input.datatype
)));
}
if input.shape != vec![1] {
return Err(Status::invalid_argument(format!(
"Expected 'stream' to have shape [1], got {:?}",
input.shape
)));
}
match &input.contents {
Some(content) => {
stream = content.bool_contents[0];
}
None => {
let raw_input =
request.raw_input_contents.get(idx).ok_or_else(|| {
Status::invalid_argument("Missing raw input for 'stream'")
})?;
if raw_input.is_empty() {
return Err(Status::invalid_argument(
"'stream' raw input must contain at least one byte",
));
}
stream = raw_input[0] != 0;
}
}
}
_ => {
return Err(Status::invalid_argument(format!(
"Invalid input name: {}, supported inputs are 'text_input', 'stream'",
input.name
)));
}
}
}
// return error if text_input is None
let text_input = match text_input {
Some(input) => input,
None => {
return Err(Status::invalid_argument(
"Missing required input: 'text_input'",
));
}
};
Ok(NvCreateCompletionRequest {
inner: CreateCompletionRequest {
model: request.model_name,
prompt: Prompt::String(text_input),
stream: Some(stream),
user: if request.id.is_empty() {
None
} else {
Some(request.id.clone())
},
..Default::default()
},
common: Default::default(),
nvext: None,
})
}
}
impl TryFrom<NvCreateCompletionResponse> for ModelInferResponse {
type Error = anyhow::Error;
fn try_from(response: NvCreateCompletionResponse) -> Result<Self, Self::Error> {
let mut outputs = vec![];
let mut text_output = vec![];
let mut finish_reason = vec![];
for choice in &response.inner.choices {
text_output.push(choice.text.clone());
if let Some(reason) = choice.finish_reason.as_ref() {
match reason {
CompletionFinishReason::Stop => {
finish_reason.push("stop".to_string());
}
CompletionFinishReason::Length => {
finish_reason.push("length".to_string());
}
CompletionFinishReason::ContentFilter => {
finish_reason.push("content_filter".to_string());
}
}
}
}
outputs.push(inference::model_infer_response::InferOutputTensor {
name: "text_output".to_string(),
datatype: "BYTES".to_string(),
shape: vec![text_output.len() as i64],
contents: Some(inference::InferTensorContents {
bytes_contents: text_output
.into_iter()
.map(|text| text.as_bytes().to_vec())
.collect(),
..Default::default()
}),
..Default::default()
});
outputs.push(inference::model_infer_response::InferOutputTensor {
name: "finish_reason".to_string(),
datatype: "BYTES".to_string(),
shape: vec![finish_reason.len() as i64],
contents: Some(inference::InferTensorContents {
bytes_contents: finish_reason
.into_iter()
.map(|text| text.as_bytes().to_vec())
.collect(),
..Default::default()
}),
..Default::default()
});
Ok(ModelInferResponse {
model_name: response.inner.model,
model_version: "1".to_string(),
id: response.inner.id,
outputs,
parameters: ::std::collections::HashMap::<String, InferParameter>::new(),
raw_output_contents: vec![],
})
}
}
impl TryFrom<NvCreateCompletionResponse> for ModelStreamInferResponse {
type Error = anyhow::Error;
fn try_from(response: NvCreateCompletionResponse) -> Result<Self, Self::Error> {
match ModelInferResponse::try_from(response) {
Ok(response) => Ok(ModelStreamInferResponse {
infer_response: Some(response),
..Default::default()
}),
Err(e) => Ok(ModelStreamInferResponse {
infer_response: None,
error_message: format!("Failed to convert response: {}", e),
}),
}
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use dynamo_runtime::{
engine::AsyncEngineContext,
pipeline::{AsyncEngineContextProvider, Context},
protocols::annotated::AnnotationsProvider,
};
use futures::{Stream, StreamExt, stream};
use std::sync::Arc;
use crate::discovery::ModelManager;
use crate::protocols::openai::{
ParsingOptions,
completions::{NvCreateCompletionRequest, NvCreateCompletionResponse},
};
use crate::types::Annotated;
use super::kserve;
// [gluo NOTE] These are common utilities that should be shared between frontends
use crate::http::service::{
disconnect::{ConnectionHandle, create_connection_monitor},
metrics::{Endpoint, ResponseMetricCollector},
};
use crate::{http::service::metrics::InflightGuard, preprocessor::LLMMetricAnnotation};
use tonic::Status;
/// Dynamo Annotation for the request ID
pub const ANNOTATION_REQUEST_ID: &str = "request_id";
// [gluo NOTE] strip down version of lib/llm/src/http/service/openai.rs
// dupliating it here as the original file has coupling with HTTP objects.
/// OpenAI Completions Request Handler
///
/// This method will handle the incoming request for the `/v1/completions endpoint`. The endpoint is a "source"
/// for an [`super::OpenAICompletionsStreamingEngine`] and will return a stream of
/// responses which will be forward to the client.
///
/// Note: For all requests, streaming or non-streaming, we always call the engine with streaming enabled. For
/// non-streaming requests, we will fold the stream into a single response as part of this handler.
pub async fn completion_response_stream(
state: Arc<kserve::State>,
request: NvCreateCompletionRequest,
) -> Result<impl Stream<Item = Annotated<NvCreateCompletionResponse>>, Status> {
// create the context for the request
// [WIP] from request id.
let request_id = get_or_create_request_id(request.inner.user.as_deref());
let request = Context::with_id(request, request_id.clone());
let context = request.context();
// create the connection handles
let (mut connection_handle, stream_handle) = create_connection_monitor(context.clone()).await;
let streaming = request.inner.stream.unwrap_or(false);
// update the request to always stream
let request = request.map(|mut req| {
req.inner.stream = Some(true);
req
});
// todo - make the protocols be optional for model name
// todo - when optional, if none, apply a default
let model = &request.inner.model;
// todo - error handling should be more robust
let engine = state
.manager()
.get_completions_engine(model)
.map_err(|_| Status::not_found("model not found"))?;
let inflight_guard =
state
.metrics_clone()
.create_inflight_guard(model, Endpoint::Completions, streaming);
let mut response_collector = state.metrics_clone().create_response_collector(model);
// prepare to process any annotations
let annotations = request.annotations();
// issue the generate call on the engine
let stream = engine
.generate(request)
.await
.map_err(|e| Status::internal(format!("Failed to generate completions: {}", e)))?;
// capture the context to cancel the stream if the client disconnects
let ctx = stream.context();
// prepare any requested annotations
let annotations = annotations.map_or(Vec::new(), |annotations| {
annotations
.iter()
.filter_map(|annotation| {
if annotation == ANNOTATION_REQUEST_ID {
Annotated::<NvCreateCompletionResponse>::from_annotation(
ANNOTATION_REQUEST_ID,
&request_id,
)
.ok()
} else {
None
}
})
.collect::<Vec<_>>()
});
// apply any annotations to the front of the stream
let stream = stream::iter(annotations).chain(stream);
// Tap on the stream to collect response metrics
let stream = stream.inspect(move |response| {
process_metrics_only(response, &mut response_collector);
});
let stream = grpc_monitor_for_disconnects(stream, ctx, inflight_guard, stream_handle);
// if we got here, then we will return a response and the potentially long running task has completed successfully
// without need to be cancelled.
connection_handle.disarm();
Ok(stream)
}
/// This method will consume an AsyncEngineStream and monitor for disconnects or context cancellation.
/// This is gRPC variant of `monitor_for_disconnects` as that implementation has SSE specific handling.
/// Should decouple and reuse `monitor_for_disconnects`
///
/// Uses `tokio::select!` to choose between receiving responses from the source stream or detecting when
/// the context is stopped. If the context is stopped, we break the stream. If the source stream ends
/// naturally, we mark the request as successful and send the final `[DONE]` event.
pub fn grpc_monitor_for_disconnects<T>(
stream: impl Stream<Item = Annotated<T>>,
context: Arc<dyn AsyncEngineContext>,
mut inflight_guard: InflightGuard,
mut stream_handle: ConnectionHandle,
) -> impl Stream<Item = Annotated<T>> {
stream_handle.arm();
async_stream::stream! {
tokio::pin!(stream);
loop {
tokio::select! {
event = stream.next() => {
match event {
Some(response) => {
yield response;
}
None => {
// Stream ended normally
inflight_guard.mark_ok();
stream_handle.disarm();
break;
}
}
}
_ = context.stopped() => {
tracing::trace!("Context stopped; breaking stream");
break;
}
}
}
}
}
fn process_metrics_only<T>(
annotated: &Annotated<T>,
response_collector: &mut ResponseMetricCollector,
) {
// update metrics
if let Ok(Some(metrics)) = LLMMetricAnnotation::from_annotation(annotated) {
response_collector.observe_current_osl(metrics.output_tokens);
response_collector.observe_response(metrics.input_tokens, metrics.chunk_tokens);
}
}
/// Get the request ID from a primary source, or lastly create a new one if not present
fn get_or_create_request_id(primary: Option<&str>) -> String {
// Try to get the request ID from the primary source
if let Some(primary) = primary
&& let Ok(uuid) = uuid::Uuid::parse_str(primary)
{
return uuid.to_string();
}
// Try to parse the request ID as a UUID, or generate a new one if missing/invalid
let uuid = uuid::Uuid::new_v4();
uuid.to_string()
}
pub fn get_parsing_options(manager: &ModelManager, model: &str) -> ParsingOptions {
let tool_call_parser = manager.get_model_tool_call_parser(model);
let reasoning_parser = None; // TODO: Implement reasoning parser
ParsingOptions::new(tool_call_parser, reasoning_parser)
}
...@@ -31,7 +31,6 @@ use super::{ ...@@ -31,7 +31,6 @@ use super::{
metrics::{Endpoint, ResponseMetricCollector}, metrics::{Endpoint, ResponseMetricCollector},
service_v2, service_v2,
}; };
use crate::preprocessor::LLMMetricAnnotation;
use crate::protocols::openai::chat_completions::aggregator::ChatCompletionAggregator; use crate::protocols::openai::chat_completions::aggregator::ChatCompletionAggregator;
use crate::protocols::openai::{ use crate::protocols::openai::{
ParsingOptions, ParsingOptions,
...@@ -42,6 +41,7 @@ use crate::protocols::openai::{ ...@@ -42,6 +41,7 @@ use crate::protocols::openai::{
}; };
use crate::request_template::RequestTemplate; use crate::request_template::RequestTemplate;
use crate::types::Annotated; use crate::types::Annotated;
use crate::{discovery::ModelManager, preprocessor::LLMMetricAnnotation};
use dynamo_runtime::logging::get_distributed_tracing_context; use dynamo_runtime::logging::get_distributed_tracing_context;
use tracing::Instrument; use tracing::Instrument;
...@@ -195,8 +195,8 @@ fn get_or_create_request_id(primary: Option<&str>, headers: &HeaderMap) -> Strin ...@@ -195,8 +195,8 @@ fn get_or_create_request_id(primary: Option<&str>, headers: &HeaderMap) -> Strin
uuid.to_string() uuid.to_string()
} }
fn get_parsing_options(state: &Arc<service_v2::State>, model: &str) -> ParsingOptions { fn get_parsing_options(manager: &ModelManager, model: &str) -> ParsingOptions {
let tool_call_parser = state.manager().get_model_tool_call_parser(model); let tool_call_parser = manager.get_model_tool_call_parser(model);
let reasoning_parser = None; // TODO: Implement reasoning parser let reasoning_parser = None; // TODO: Implement reasoning parser
ParsingOptions::new(tool_call_parser, reasoning_parser) ParsingOptions::new(tool_call_parser, reasoning_parser)
...@@ -274,7 +274,7 @@ async fn completions( ...@@ -274,7 +274,7 @@ async fn completions(
.get_completions_engine(model) .get_completions_engine(model)
.map_err(|_| ErrorMessage::model_not_found())?; .map_err(|_| ErrorMessage::model_not_found())?;
let parsing_options = get_parsing_options(&state, model); let parsing_options = get_parsing_options(state.manager(), model);
let mut inflight_guard = let mut inflight_guard =
state state
...@@ -501,7 +501,7 @@ async fn chat_completions( ...@@ -501,7 +501,7 @@ async fn chat_completions(
.get_chat_completions_engine(model) .get_chat_completions_engine(model)
.map_err(|_| ErrorMessage::model_not_found())?; .map_err(|_| ErrorMessage::model_not_found())?;
let parsing_options = get_parsing_options(&state, model); let parsing_options = get_parsing_options(state.manager(), model);
let mut inflight_guard = let mut inflight_guard =
state state
...@@ -736,7 +736,7 @@ async fn responses( ...@@ -736,7 +736,7 @@ async fn responses(
.get_chat_completions_engine(model) .get_chat_completions_engine(model)
.map_err(|_| ErrorMessage::model_not_found())?; .map_err(|_| ErrorMessage::model_not_found())?;
let parsing_options = get_parsing_options(&state, model); let parsing_options = get_parsing_options(state.manager(), model);
let mut inflight_guard = let mut inflight_guard =
state state
......
...@@ -18,6 +18,7 @@ pub mod endpoint_type; ...@@ -18,6 +18,7 @@ pub mod endpoint_type;
pub mod engines; pub mod engines;
pub mod entrypoint; pub mod entrypoint;
pub mod gguf; pub mod gguf;
pub mod grpc;
pub mod http; pub mod http;
pub mod hub; pub mod hub;
// pub mod key_value_store; // pub mod key_value_store;
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
pub mod kserve_test {
// For using gRPC client for test
pub mod inference {
tonic::include_proto!("inference");
}
use inference::grpc_inference_service_client::GrpcInferenceServiceClient;
use inference::{
DataType, ModelConfigRequest, ModelInferRequest, ModelInferResponse, ModelMetadataRequest,
};
use anyhow::Error;
use async_stream::stream;
use dynamo_llm::grpc::service::kserve::KserveService;
use dynamo_llm::protocols::{
Annotated,
openai::{
chat_completions::{
NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
},
completions::{NvCreateCompletionRequest, NvCreateCompletionResponse},
},
};
use dynamo_runtime::{
CancellationToken,
pipeline::{
AsyncEngine, AsyncEngineContextProvider, ManyOut, ResponseStream, SingleIn, async_trait,
},
};
use rstest::*;
use std::sync::Arc;
use std::time::Duration;
use tokio::time::timeout;
use tonic::{Request, Response, transport::Channel};
use dynamo_async_openai::types::Prompt;
struct SplitEngine {}
// Add a new long-running test engine
struct LongRunningEngine {
delay_ms: u64,
cancelled: Arc<std::sync::atomic::AtomicBool>,
}
impl LongRunningEngine {
fn new(delay_ms: u64) -> Self {
Self {
delay_ms,
cancelled: Arc::new(std::sync::atomic::AtomicBool::new(false)),
}
}
fn was_cancelled(&self) -> bool {
self.cancelled.load(std::sync::atomic::Ordering::Acquire)
}
// Wait for the duration of generation delay to ensure the generate stream
// has been terminated early (`was_cancelled` remains true).
async fn wait_for_delay(&self) {
tokio::time::sleep(std::time::Duration::from_millis(self.delay_ms)).await;
}
}
#[async_trait]
impl
AsyncEngine<
SingleIn<NvCreateCompletionRequest>,
ManyOut<Annotated<NvCreateCompletionResponse>>,
Error,
> for SplitEngine
{
async fn generate(
&self,
request: SingleIn<NvCreateCompletionRequest>,
) -> Result<ManyOut<Annotated<NvCreateCompletionResponse>>, Error> {
let (request, context) = request.transfer(());
let ctx = context.context();
// let generator = NvCreateChatCompletionStreamResponse::generator(request.model.clone());
let generator = request.response_generator(ctx.id().to_string());
let word_list: Vec<String> = match request.inner.prompt {
Prompt::String(str) => str.split(' ').map(|s| s.to_string()).collect(),
_ => {
return Err(Error::msg("SplitEngine only support prompt type String"))?;
}
};
let stream = stream! {
tokio::time::sleep(std::time::Duration::from_millis(10)).await;
for word in word_list {
yield Annotated::from_data(generator.create_choice(0, Some(word.to_string()), None, None));
}
};
Ok(ResponseStream::new(Box::pin(stream), ctx))
}
}
#[async_trait]
impl
AsyncEngine<
SingleIn<NvCreateCompletionRequest>,
ManyOut<Annotated<NvCreateCompletionResponse>>,
Error,
> for LongRunningEngine
{
async fn generate(
&self,
request: SingleIn<NvCreateCompletionRequest>,
) -> Result<ManyOut<Annotated<NvCreateCompletionResponse>>, Error> {
let (_request, context) = request.transfer(());
let ctx = context.context();
tracing::info!(
"LongRunningEngine: Starting generation with {}ms delay",
self.delay_ms
);
let cancelled_flag = self.cancelled.clone();
let delay_ms = self.delay_ms;
let ctx_clone = ctx.clone();
let stream = async_stream::stream! {
// the stream can be dropped or it can be cancelled
// either way we consider this a cancellation
cancelled_flag.store(true, std::sync::atomic::Ordering::SeqCst);
tokio::select! {
_ = tokio::time::sleep(std::time::Duration::from_millis(delay_ms)) => {
// the stream went to completion
cancelled_flag.store(false, std::sync::atomic::Ordering::SeqCst);
}
_ = ctx_clone.stopped() => {
cancelled_flag.store(true, std::sync::atomic::Ordering::SeqCst);
}
}
yield Annotated::<NvCreateCompletionResponse>::from_annotation("event.dynamo.test.sentinel", &"DONE".to_string()).expect("Failed to create annotated response");
};
Ok(ResponseStream::new(Box::pin(stream), ctx))
}
}
struct AlwaysFailEngine {}
#[async_trait]
impl
AsyncEngine<
SingleIn<NvCreateChatCompletionRequest>,
ManyOut<Annotated<NvCreateChatCompletionStreamResponse>>,
Error,
> for AlwaysFailEngine
{
async fn generate(
&self,
_request: SingleIn<NvCreateChatCompletionRequest>,
) -> Result<ManyOut<Annotated<NvCreateChatCompletionStreamResponse>>, Error> {
Err(Error::msg("Always fail"))?
}
}
#[async_trait]
impl
AsyncEngine<
SingleIn<NvCreateCompletionRequest>,
ManyOut<Annotated<NvCreateCompletionResponse>>,
Error,
> for AlwaysFailEngine
{
async fn generate(
&self,
_request: SingleIn<NvCreateCompletionRequest>,
) -> Result<ManyOut<Annotated<NvCreateCompletionResponse>>, Error> {
Err(Error::msg("Always fail"))?
}
}
/// Wait for the HTTP service to be ready by checking its health endpoint
async fn get_ready_client(port: u16, timeout_secs: u64) -> GrpcInferenceServiceClient<Channel> {
let start = tokio::time::Instant::now();
let timeout = tokio::time::Duration::from_secs(timeout_secs);
loop {
let address = format!("http://0.0.0.0:{}", port);
match GrpcInferenceServiceClient::connect(address).await {
Ok(client) => return client,
Err(_) if start.elapsed() < timeout => {
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
}
Err(e) => panic!("Service failed to start within timeout: {}", e),
}
}
}
#[fixture]
fn text_input(
#[default("dummy input")] text: &str,
) -> inference::model_infer_request::InferInputTensor {
inference::model_infer_request::InferInputTensor {
name: "text_input".into(),
datatype: "BYTES".into(),
shape: vec![1],
contents: Some(inference::InferTensorContents {
bytes_contents: vec![text.into()],
..Default::default()
}),
..Default::default()
}
}
#[fixture]
fn service_with_engines(
#[default(8990)] port: u16,
) -> (
KserveService,
Arc<SplitEngine>,
Arc<AlwaysFailEngine>,
Arc<LongRunningEngine>,
) {
let service = KserveService::builder().port(port).build().unwrap();
let manager = service.model_manager();
let split = Arc::new(SplitEngine {});
let failure = Arc::new(AlwaysFailEngine {});
let long_running = Arc::new(LongRunningEngine::new(1_000));
manager
.add_completions_model("split", split.clone())
.unwrap();
manager
.add_chat_completions_model("failure", failure.clone())
.unwrap();
manager
.add_completions_model("failure", failure.clone())
.unwrap();
manager
.add_completions_model("long_running", long_running.clone())
.unwrap();
(service, split, failure, long_running)
}
struct RunningService {
token: CancellationToken,
}
impl RunningService {
fn spawn(service: KserveService) -> Self {
let token = CancellationToken::new();
tokio::spawn({
let t = token.clone();
async move { service.run(t).await }
});
Self { token }
}
}
impl Drop for RunningService {
fn drop(&mut self) {
self.token.cancel();
}
}
// Tests may run in parallel, use this enum to keep track of port used for different
// test cases
enum TestPort {
InferFailure = 8988,
InferSuccess = 8989,
StreamInferFailure = 8990,
StreamInferSuccess = 8991,
InferCancellation = 8992,
StreamInferCancellation = 8993,
ModelInfo = 8994,
}
#[rstest]
#[tokio::test]
async fn test_infer_failure(
#[with(TestPort::InferFailure as u16)] service_with_engines: (
KserveService,
Arc<SplitEngine>,
Arc<AlwaysFailEngine>,
Arc<LongRunningEngine>,
),
text_input: inference::model_infer_request::InferInputTensor,
) {
// start server
let _running = RunningService::spawn(service_with_engines.0);
// create client and send request to unregistered model
let mut client = get_ready_client(TestPort::InferFailure as u16, 5).await;
// unknown_model
let request = tonic::Request::new(ModelInferRequest {
model_name: "Tonic".into(),
model_version: "1".into(),
id: "1234".into(),
inputs: vec![text_input.clone()],
..Default::default()
});
let response = client.model_infer(request).await;
assert!(response.is_err());
let err = response.unwrap_err();
assert_eq!(
err.code(),
tonic::Code::NotFound,
"Expected NotFound error for unregistered model, get {}",
err
);
// missing input
let request = tonic::Request::new(ModelInferRequest {
model_name: "split".into(),
model_version: "1".into(),
id: "1234".into(),
inputs: vec![],
..Default::default()
});
let response = client.model_infer(request).await;
assert!(response.is_err());
let err = response.unwrap_err();
assert_eq!(
err.code(),
tonic::Code::InvalidArgument,
"Expected InvalidArgument error for missing input, get {}",
err
);
// request streaming
let request = tonic::Request::new(ModelInferRequest {
model_name: "split".into(),
model_version: "1".into(),
id: "1234".into(),
inputs: vec![
text_input.clone(),
inference::model_infer_request::InferInputTensor {
name: "streaming".into(),
datatype: "BOOL".into(),
shape: vec![1],
contents: Some(inference::InferTensorContents {
bool_contents: vec![true],
..Default::default()
}),
..Default::default()
},
],
..Default::default()
});
let response = client.model_infer(request).await;
assert!(response.is_err());
let err = response.unwrap_err();
assert_eq!(
err.code(),
tonic::Code::InvalidArgument,
"Expected InvalidArgument error for streaming, get {}",
err
);
// assert "stream" in error message
assert!(
err.message().contains("Streaming is not supported"),
"Expected error message to contain 'Streaming is not supported', got: {}",
err.message()
);
// AlwaysFailEngine
let request = tonic::Request::new(ModelInferRequest {
model_name: "failure".into(),
model_version: "1".into(),
id: "1234".into(),
inputs: vec![text_input.clone()],
..Default::default()
});
let response = client.model_infer(request).await;
assert!(response.is_err());
let err = response.unwrap_err();
assert_eq!(
err.code(),
tonic::Code::Internal,
"Expected Internal error for streaming, get {}",
err
);
assert!(
err.message().contains("Failed to generate completions:"),
"Expected error message to contain 'Failed to generate completions:', got: {}",
err.message()
);
}
#[rstest]
#[tokio::test]
async fn test_infer_success(
#[with(TestPort::InferSuccess as u16)] service_with_engines: (
KserveService,
Arc<SplitEngine>,
Arc<AlwaysFailEngine>,
Arc<LongRunningEngine>,
),
text_input: inference::model_infer_request::InferInputTensor,
) {
// start server
let _running = RunningService::spawn(service_with_engines.0);
let mut client = get_ready_client(TestPort::InferSuccess as u16, 5).await;
let model_name = "split";
let request = tonic::Request::new(ModelInferRequest {
model_name: model_name.into(),
model_version: "1".into(),
id: "1234".into(),
inputs: vec![text_input.clone()],
..Default::default()
});
let response = client.model_infer(request).await.unwrap();
validate_infer_response(response, model_name);
// Input data in raw_input_content
let mut text_input = text_input.clone();
text_input.contents = None; // Clear contents to use raw_input_contents
let text_input_str = "dummy input";
let input_len = text_input_str.len() as u32;
let mut serialized_input = input_len.to_le_bytes().to_vec();
serialized_input.extend_from_slice(text_input_str.as_bytes());
let request = tonic::Request::new(ModelInferRequest {
model_name: model_name.into(),
model_version: "1".into(),
id: "1234".into(),
inputs: vec![text_input],
raw_input_contents: vec![serialized_input],
..Default::default()
});
let response = client.model_infer(request).await.unwrap();
validate_infer_response(response, model_name);
}
fn validate_infer_response(response: Response<ModelInferResponse>, model_name: &str) {
assert_eq!(
response.get_ref().model_name,
model_name,
"Expected response of the same model name",
);
for output in &response.get_ref().outputs {
match output.name.as_str() {
"text_output" => {
assert_eq!(
output.datatype, "BYTES",
"Expected 'text_output' to have datatype 'BYTES'"
);
assert_eq!(
output.shape,
vec![1],
"Expected 'text_output' to have shape [1]"
);
let expected_output: Vec<Vec<u8>> = vec!["dummyinput".into()];
assert_eq!(
output.contents.as_ref().unwrap().bytes_contents,
expected_output,
"Expected 'text_output' to contain 'dummy input'"
);
}
"finish_reason" => {
assert_eq!(
output.datatype, "BYTES",
"Expected 'finish_reason' to have datatype 'BYTES'"
);
assert_eq!(
output.shape,
vec![0],
"Expected 'finish_reason' to have shape [0]"
);
}
_ => panic!("Unexpected output name: {}", output.name),
}
}
}
#[rstest]
#[tokio::test]
async fn test_infer_cancellation(
#[with(TestPort::InferCancellation as u16)] service_with_engines: (
KserveService,
Arc<SplitEngine>,
Arc<AlwaysFailEngine>,
Arc<LongRunningEngine>,
),
text_input: inference::model_infer_request::InferInputTensor,
) {
// start server
let _running = RunningService::spawn(service_with_engines.0);
let long_running = service_with_engines.3;
// create client and send request to unregistered model
let mut client = get_ready_client(TestPort::InferCancellation as u16, 5).await;
let model_name = "long_running";
let request = tonic::Request::new(ModelInferRequest {
model_name: model_name.into(),
model_version: "1".into(),
id: "1234".into(),
inputs: vec![text_input],
..Default::default()
});
assert!(
!long_running.was_cancelled(),
"Expected long running engine is not cancelled"
);
// Cancelling the request by dropping the request future after 1 second
let response = match timeout(Duration::from_millis(500), client.model_infer(request)).await
{
Ok(_) => Err("Expect request timed out"),
Err(_) => {
println!("Cancelled request after 500ms");
Ok("timed out")
}
};
assert!(response.is_ok(), "Expected client timed out",);
long_running.wait_for_delay().await;
assert!(
long_running.was_cancelled(),
"Expected long running engine to be cancelled"
);
}
#[rstest]
#[tokio::test]
async fn test_stream_infer_success(
#[with(TestPort::StreamInferSuccess as u16)] service_with_engines: (
KserveService,
Arc<SplitEngine>,
Arc<AlwaysFailEngine>,
Arc<LongRunningEngine>,
),
text_input: inference::model_infer_request::InferInputTensor,
) {
// start server
let _running = RunningService::spawn(service_with_engines.0);
// create client and send request to unregistered model
let mut client = get_ready_client(TestPort::StreamInferSuccess as u16, 5).await;
let model_name = "split";
let request_id = "1234";
// Response streaming true
{
let text_input = text_input.clone();
let outbound = async_stream::stream! {
let request_count = 1;
for _ in 0..request_count {
let request = ModelInferRequest {
model_name: model_name.into(),
model_version: "1".into(),
id: request_id.into(),
inputs: vec![text_input.clone(),
inference::model_infer_request::InferInputTensor{
name: "streaming".into(),
datatype: "BOOL".into(),
shape: vec![1],
contents: Some(inference::InferTensorContents {
bool_contents: vec![true],
..Default::default()
}),
..Default::default()
}],
..Default::default()
};
yield request;
}
};
let response = client
.model_stream_infer(Request::new(outbound))
.await
.unwrap();
let mut inbound = response.into_inner();
let mut response_idx = 0;
while let Some(response) = inbound.message().await.unwrap() {
assert!(
response.error_message.is_empty(),
"Expected successful inference"
);
assert!(
response.infer_response.is_some(),
"Expected successful inference"
);
if let Some(response) = &response.infer_response {
assert_eq!(
response.model_name, model_name,
"Expected response of the same model name",
);
assert_eq!(
response.id, request_id,
"Expected response ID to match request ID"
);
let expected_output: Vec<Vec<u8>> = vec!["dummy".into(), "input".into()];
for output in &response.outputs {
match output.name.as_str() {
"text_output" => {
assert_eq!(
output.datatype, "BYTES",
"Expected 'text_output' to have datatype 'BYTES'"
);
assert_eq!(
output.shape,
vec![1],
"Expected 'text_output' to have shape [1]"
);
assert_eq!(
output.contents.as_ref().unwrap().bytes_contents,
vec![expected_output[response_idx].clone()],
"Expected 'text_output' to contain 'dummy input'"
);
}
"finish_reason" => {
assert_eq!(
output.datatype, "BYTES",
"Expected 'finish_reason' to have datatype 'BYTES'"
);
assert_eq!(
output.shape,
vec![0],
"Expected 'finish_reason' to have shape [0]"
);
}
_ => panic!("Unexpected output name: {}", output.name),
}
}
}
response_idx += 1;
}
assert_eq!(response_idx, 2, "Expected 2 responses")
}
// Response streaming false
{
let text_input = text_input.clone();
let outbound = async_stream::stream! {
let request_count = 2;
for idx in 0..request_count {
let request = ModelInferRequest {
model_name: model_name.into(),
model_version: "1".into(),
id: format!("{idx}"),
inputs: vec![text_input.clone()],
..Default::default()
};
yield request;
}
};
let response = client
.model_stream_infer(Request::new(outbound))
.await
.unwrap();
let mut inbound = response.into_inner();
let mut response_idx = 0;
while let Some(response) = inbound.message().await.unwrap() {
assert!(
response.error_message.is_empty(),
"Expected successful inference"
);
assert!(
response.infer_response.is_some(),
"Expected successful inference"
);
// Each response is the complete inference
if let Some(response) = &response.infer_response {
assert_eq!(
response.model_name, model_name,
"Expected response of the same model name",
);
// [gluo NOTE] Here we assume the responses across requests are
// processed in the order of receiving requests, which is not true
// if we improve stream handling in gRPC frontend. Consider:
// time 0: request 0 -> long running -> response 0 (time 5)
// time 1: request 1 -> short running -> response 1 (time 2)
// We expect response 1 to be received before response 0 as their
// requests are independent from each other.
assert_eq!(
response.id,
format!("{response_idx}"),
"Expected response ID to match request ID"
);
for output in &response.outputs {
match output.name.as_str() {
"text_output" => {
assert_eq!(
output.datatype, "BYTES",
"Expected 'text_output' to have datatype 'BYTES'"
);
assert_eq!(
output.shape,
vec![1],
"Expected 'text_output' to have shape [1]"
);
let expected_output: Vec<Vec<u8>> = vec!["dummyinput".into()];
assert_eq!(
output.contents.as_ref().unwrap().bytes_contents,
expected_output,
"Expected 'text_output' to contain 'dummyinput'"
);
}
"finish_reason" => {
assert_eq!(
output.datatype, "BYTES",
"Expected 'finish_reason' to have datatype 'BYTES'"
);
assert_eq!(
output.shape,
vec![0],
"Expected 'finish_reason' to have shape [0]"
);
}
_ => panic!("Unexpected output name: {}", output.name),
}
}
}
response_idx += 1;
}
assert_eq!(
response_idx, 2,
"Expected 2 responses, each for one of the two requests"
)
}
}
#[rstest]
#[tokio::test]
async fn test_stream_infer_failure(
#[with(TestPort::StreamInferFailure as u16)] service_with_engines: (
KserveService,
Arc<SplitEngine>,
Arc<AlwaysFailEngine>,
Arc<LongRunningEngine>,
),
text_input: inference::model_infer_request::InferInputTensor,
) {
// start server
let _running = RunningService::spawn(service_with_engines.0);
// create client and send request to unregistered model
let mut client = get_ready_client(TestPort::StreamInferFailure as u16, 5).await;
let model_name = "failure";
let outbound = async_stream::stream! {
let request_count = 1;
for _ in 0..request_count {
let request = ModelInferRequest {
model_name: model_name.into(),
model_version: "1".into(),
id: "1234".into(),
inputs: vec![text_input.clone()],
..Default::default()
};
yield request;
}
};
let response = client
.model_stream_infer(Request::new(outbound))
.await
.unwrap();
let mut inbound = response.into_inner();
loop {
match inbound.message().await {
Ok(Some(_)) => {
panic!("Expecting failure in the stream");
}
Err(err) => {
assert_eq!(
err.code(),
tonic::Code::Internal,
"Expected Internal error for streaming, get {}",
err
);
assert!(
err.message().contains("Failed to generate completions:"),
"Expected error message to contain 'Failed to generate completions:', got: {}",
err.message()
);
}
Ok(None) => {
// End of stream
break;
}
}
}
}
#[rstest]
#[tokio::test]
async fn test_stream_infer_cancellation(
#[with(TestPort::StreamInferCancellation as u16)] service_with_engines: (
KserveService,
Arc<SplitEngine>,
Arc<AlwaysFailEngine>,
Arc<LongRunningEngine>,
),
text_input: inference::model_infer_request::InferInputTensor,
) {
// start server
let _running = RunningService::spawn(service_with_engines.0);
let long_running = service_with_engines.3;
// create client and send request to unregistered model
let mut client = get_ready_client(TestPort::StreamInferCancellation as u16, 5).await;
let model_name = "long_running";
let outbound = async_stream::stream! {
let request_count = 1;
for _ in 0..request_count {
let request = ModelInferRequest {
model_name: model_name.into(),
model_version: "1".into(),
id: "1234".into(),
inputs: vec![text_input.clone()],
..Default::default()
};
yield request;
}
};
assert!(
!long_running.was_cancelled(),
"Expected long running engine is still running"
);
// Cancelling the request by dropping the request future after 1 second
let response = match timeout(
Duration::from_millis(500),
client.model_stream_infer(Request::new(outbound)),
)
.await
{
Ok(response) => response.unwrap(),
Err(_) => {
panic!("Expected response stream is returned immediately");
}
};
std::mem::drop(response); // Drop the response to cancel the stream
long_running.wait_for_delay().await;
assert!(
long_running.was_cancelled(),
"Expected long running engine to be cancelled"
);
}
#[rstest]
#[tokio::test]
async fn test_model_info(
#[with(TestPort::ModelInfo as u16)] service_with_engines: (
KserveService,
Arc<SplitEngine>,
Arc<AlwaysFailEngine>,
Arc<LongRunningEngine>,
),
) {
// start server
let _running = RunningService::spawn(service_with_engines.0);
// create client and send request to unregistered model
let mut client = get_ready_client(TestPort::ModelInfo as u16, 5).await;
// Failure unknown_model
let request = tonic::Request::new(ModelMetadataRequest {
name: "Tonic".into(),
version: "".into(),
});
let response = client.model_metadata(request).await;
assert!(response.is_err());
let err = response.unwrap_err();
assert_eq!(
err.code(),
tonic::Code::NotFound,
"Expected NotFound error for unregistered model, get {}",
err
);
let request = tonic::Request::new(ModelConfigRequest {
name: "Tonic".into(),
version: "".into(),
});
let response = client.model_config(request).await;
assert!(response.is_err());
let err = response.unwrap_err();
assert_eq!(
err.code(),
tonic::Code::NotFound,
"Expected NotFound error for unregistered model, get {}",
err
);
// Success metadata
let model_name = "split";
let request = tonic::Request::new(ModelMetadataRequest {
name: model_name.into(),
version: "1".into(),
});
let response = client.model_metadata(request).await.unwrap();
assert_eq!(
response.get_ref().name,
model_name,
"Expected response of the same model name",
);
// input
for io in &response.get_ref().inputs {
match io.name.as_str() {
"text_input" => {
assert_eq!(
io.datatype, "BYTES",
"Expected 'text_input' to have datatype 'BYTES'"
);
assert_eq!(
io.shape,
vec![1],
"Expected 'text_output' to have shape [1]"
);
}
"streaming" => {
assert_eq!(
io.datatype, "BOOL",
"Expected 'streaming' to have datatype 'BOOL'"
);
assert_eq!(io.shape, vec![1], "Expected 'streaming' to have shape [1]");
}
_ => panic!("Unexpected output name: {}", io.name),
}
}
// output
for io in &response.get_ref().outputs {
match io.name.as_str() {
"text_output" => {
assert_eq!(
io.datatype, "BYTES",
"Expected 'text_output' to have datatype 'BYTES'"
);
assert_eq!(
io.shape,
vec![-1],
"Expected 'text_output' to have shape [-1]"
);
}
"finish_reason" => {
assert_eq!(
io.datatype, "BYTES",
"Expected 'finish_reason' to have datatype 'BYTES'"
);
assert_eq!(
io.shape,
vec![-1],
"Expected 'finish_reason' to have shape [-1]"
);
}
_ => panic!("Unexpected output name: {}", io.name),
}
}
// success config
let request = tonic::Request::new(ModelConfigRequest {
name: model_name.into(),
version: "1".into(),
});
let response = client
.model_config(request)
.await
.unwrap()
.into_inner()
.config;
let Some(config) = response else {
panic!("Expected Some(config), got None");
};
assert_eq!(
config.name, model_name,
"Expected response of the same model name",
);
// input
for io in &config.input {
match io.name.as_str() {
"text_input" => {
assert_eq!(
io.data_type,
DataType::TypeString as i32,
"Expected 'text_input' to have datatype 'TYPE_STRING'"
);
assert_eq!(io.dims, vec![1], "Expected 'text_output' to have shape [1]");
}
"streaming" => {
assert_eq!(
io.data_type,
DataType::TypeBool as i32,
"Expected 'streaming' to have datatype 'TYPE_BOOL'"
);
assert_eq!(io.dims, vec![1], "Expected 'streaming' to have shape [1]");
}
_ => panic!("Unexpected output name: {}", io.name),
}
}
// output
for io in &config.output {
match io.name.as_str() {
"text_output" => {
assert_eq!(
io.data_type,
DataType::TypeString as i32,
"Expected 'text_output' to have datatype 'TYPE_STRING'"
);
assert_eq!(
io.dims,
vec![-1],
"Expected 'text_output' to have shape [-1]"
);
}
"finish_reason" => {
assert_eq!(
io.data_type,
DataType::TypeString as i32,
"Expected 'finish_reason' to have datatype 'TYPE_STRING'"
);
assert_eq!(
io.dims,
vec![-1],
"Expected 'finish_reason' to have shape [-1]"
);
}
_ => panic!("Unexpected output name: {}", io.name),
}
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment