Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
91a459c0
Unverified
Commit
91a459c0
authored
Aug 26, 2025
by
GuanLuo
Committed by
GitHub
Aug 26, 2025
Browse files
feat: KServe gRPC support (#2638)
parent
ef535edb
Changes
19
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
5374 additions
and
503 deletions
+5374
-503
.github/workflows/pre-merge-rust.yml
.github/workflows/pre-merge-rust.yml
+2
-2
Cargo.lock
Cargo.lock
+379
-366
components/frontend/src/dynamo/frontend/main.py
components/frontend/src/dynamo/frontend/main.py
+8
-0
launch/dynamo-run/src/lib.rs
launch/dynamo-run/src/lib.rs
+0
-1
launch/dynamo-run/src/main.rs
launch/dynamo-run/src/main.rs
+1
-1
lib/bindings/python/Cargo.lock
lib/bindings/python/Cargo.lock
+130
-126
lib/llm/Cargo.toml
lib/llm/Cargo.toml
+10
-0
lib/llm/build.rs
lib/llm/build.rs
+7
-1
lib/llm/src/entrypoint/input.rs
lib/llm/src/entrypoint/input.rs
+9
-0
lib/llm/src/entrypoint/input/grpc.rs
lib/llm/src/entrypoint/input/grpc.rs
+170
-0
lib/llm/src/grpc.rs
lib/llm/src/grpc.rs
+4
-0
lib/llm/src/grpc/protos/kserve.proto
lib/llm/src/grpc/protos/kserve.proto
+624
-0
lib/llm/src/grpc/protos/model_config.proto
lib/llm/src/grpc/protos/model_config.proto
+2140
-0
lib/llm/src/grpc/service.rs
lib/llm/src/grpc/service.rs
+5
-0
lib/llm/src/grpc/service/kserve.rs
lib/llm/src/grpc/service/kserve.rs
+625
-0
lib/llm/src/grpc/service/openai.rs
lib/llm/src/grpc/service/openai.rs
+198
-0
lib/llm/src/http/service/openai.rs
lib/llm/src/http/service/openai.rs
+6
-6
lib/llm/src/lib.rs
lib/llm/src/lib.rs
+1
-0
lib/llm/tests/kserve_service.rs
lib/llm/tests/kserve_service.rs
+1055
-0
No files found.
.github/workflows/pre-merge-rust.yml
View file @
91a459c0
...
@@ -100,11 +100,11 @@ jobs:
...
@@ -100,11 +100,11 @@ jobs:
# Have an explicit step to build tests first to separate time spent on build vs execution.
# Have an explicit step to build tests first to separate time spent on build vs execution.
-
name
:
Compile Tests
-
name
:
Compile Tests
working-directory
:
${{ matrix.dir }}
working-directory
:
${{ matrix.dir }}
run
:
cargo test --locked --no-run
run
:
cargo test --locked --no-run
--target-dir ${HOME}/tmp || df -h
-
name
:
Run Doc Tests
-
name
:
Run Doc Tests
working-directory
:
${{ matrix.dir }}
working-directory
:
${{ matrix.dir }}
run
:
cargo doc --no-deps && cargo test --locked --doc
run
:
cargo doc --no-deps && cargo test --locked --doc
-
name
:
Run Unit Tests
-
name
:
Run Unit Tests
working-directory
:
${{ matrix.dir }}
working-directory
:
${{ matrix.dir }}
# NOTE: --all-targets doesn't run doc tests
# NOTE: --all-targets doesn't run doc tests
run
:
cargo test --locked --all-targets
run
:
cargo test --locked --all-targets
--target-dir ${HOME}/tmp
Cargo.lock
View file @
91a459c0
...
@@ -33,7 +33,7 @@ version = "0.8.12"
...
@@ -33,7 +33,7 @@ version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"getrandom 0.3.3",
"getrandom 0.3.3",
"once_cell",
"once_cell",
"serde",
"serde",
...
@@ -88,9 +88,9 @@ dependencies = [
...
@@ -88,9 +88,9 @@ dependencies = [
[[package]]
[[package]]
name = "anstream"
name = "anstream"
version = "0.6.
19
"
version = "0.6.
20
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3
01af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933
"
checksum = "3
ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192
"
dependencies = [
dependencies = [
"anstyle",
"anstyle",
"anstyle-parse",
"anstyle-parse",
...
@@ -118,29 +118,29 @@ dependencies = [
...
@@ -118,29 +118,29 @@ dependencies = [
[[package]]
[[package]]
name = "anstyle-query"
name = "anstyle-query"
version = "1.1.
3
"
version = "1.1.
4
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9
"
checksum = "
9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2
"
dependencies = [
dependencies = [
"windows-sys 0.
59.0
",
"windows-sys 0.
60.2
",
]
]
[[package]]
[[package]]
name = "anstyle-wincon"
name = "anstyle-wincon"
version = "3.0.
9
"
version = "3.0.
10
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882
"
checksum = "
3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a
"
dependencies = [
dependencies = [
"anstyle",
"anstyle",
"once_cell_polyfill",
"once_cell_polyfill",
"windows-sys 0.
59.0
",
"windows-sys 0.
60.2
",
]
]
[[package]]
[[package]]
name = "anyhow"
name = "anyhow"
version = "1.0.9
8
"
version = "1.0.9
9
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487
"
checksum = "
b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100
"
[[package]]
[[package]]
name = "apodize"
name = "apodize"
...
@@ -159,9 +159,9 @@ dependencies = [
...
@@ -159,9 +159,9 @@ dependencies = [
[[package]]
[[package]]
name = "arbitrary"
name = "arbitrary"
version = "1.4.
1
"
version = "1.4.
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223
"
checksum = "
c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1
"
dependencies = [
dependencies = [
"derive_arbitrary",
"derive_arbitrary",
]
]
...
@@ -180,7 +180,7 @@ checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea"
...
@@ -180,7 +180,7 @@ checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -257,7 +257,7 @@ checksum = "0289cba6d5143bfe8251d57b4a8cac036adf158525a76533a7082ba65ec76398"
...
@@ -257,7 +257,7 @@ checksum = "0289cba6d5143bfe8251d57b4a8cac036adf158525a76533a7082ba65ec76398"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -279,18 +279,18 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
...
@@ -279,18 +279,18 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
name = "async-trait"
name = "async-trait"
version = "0.1.8
8
"
version = "0.1.8
9
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedff
db
5
"
checksum = "
9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5
db
b
"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -439,7 +439,7 @@ dependencies = [
...
@@ -439,7 +439,7 @@ dependencies = [
"http 1.3.1",
"http 1.3.1",
"http-body 1.0.1",
"http-body 1.0.1",
"http-body-util",
"http-body-util",
"hyper 1.
6
.0",
"hyper 1.
7
.0",
"hyper-util",
"hyper-util",
"itoa",
"itoa",
"matchit 0.8.4",
"matchit 0.8.4",
...
@@ -508,7 +508,7 @@ checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c"
...
@@ -508,7 +508,7 @@ checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -522,7 +522,7 @@ dependencies = [
...
@@ -522,7 +522,7 @@ dependencies = [
"fs-err",
"fs-err",
"http 1.3.1",
"http 1.3.1",
"http-body 1.0.1",
"http-body 1.0.1",
"hyper 1.
6
.0",
"hyper 1.
7
.0",
"hyper-util",
"hyper-util",
"pin-project-lite",
"pin-project-lite",
"rustls",
"rustls",
...
@@ -554,7 +554,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -554,7 +554,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
dependencies = [
dependencies = [
"addr2line",
"addr2line",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"libc",
"libc",
"miniz_oxide",
"miniz_oxide",
"object",
"object",
...
@@ -601,10 +601,10 @@ version = "0.69.5"
...
@@ -601,10 +601,10 @@ version = "0.69.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"cexpr",
"cexpr",
"clang-sys",
"clang-sys",
"itertools 0.1
0.5
",
"itertools 0.1
2.1
",
"lazy_static",
"lazy_static",
"lazycell",
"lazycell",
"log",
"log",
...
@@ -614,7 +614,7 @@ dependencies = [
...
@@ -614,7 +614,7 @@ dependencies = [
"regex",
"regex",
"rustc-hash 1.1.0",
"rustc-hash 1.1.0",
"shlex",
"shlex",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
"which",
"which",
]
]
...
@@ -624,10 +624,10 @@ version = "0.71.1"
...
@@ -624,10 +624,10 @@ version = "0.71.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"cexpr",
"cexpr",
"clang-sys",
"clang-sys",
"itertools 0.1
0.5
",
"itertools 0.1
3.0
",
"log",
"log",
"prettyplease",
"prettyplease",
"proc-macro2",
"proc-macro2",
...
@@ -635,7 +635,7 @@ dependencies = [
...
@@ -635,7 +635,7 @@ dependencies = [
"regex",
"regex",
"rustc-hash 2.1.1",
"rustc-hash 2.1.1",
"shlex",
"shlex",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -644,10 +644,10 @@ version = "0.72.0"
...
@@ -644,10 +644,10 @@ version = "0.72.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f72209734318d0b619a5e0f5129918b848c416e122a3c4ce054e03cb87b726f"
checksum = "4f72209734318d0b619a5e0f5129918b848c416e122a3c4ce054e03cb87b726f"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"cexpr",
"cexpr",
"clang-sys",
"clang-sys",
"itertools 0.1
0.5
",
"itertools 0.1
3.0
",
"log",
"log",
"prettyplease",
"prettyplease",
"proc-macro2",
"proc-macro2",
...
@@ -655,7 +655,7 @@ dependencies = [
...
@@ -655,7 +655,7 @@ dependencies = [
"regex",
"regex",
"rustc-hash 2.1.1",
"rustc-hash 2.1.1",
"shlex",
"shlex",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -723,9 +723,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
...
@@ -723,9 +723,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
[[package]]
name = "bitflags"
name = "bitflags"
version = "2.9.
1
"
version = "2.9.
3
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967
"
checksum = "
34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d
"
[[package]]
[[package]]
name = "bitstream-io"
name = "bitstream-io"
...
@@ -742,7 +742,7 @@ dependencies = [
...
@@ -742,7 +742,7 @@ dependencies = [
"arrayref",
"arrayref",
"arrayvec",
"arrayvec",
"cc",
"cc",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"constant_time_eq",
"constant_time_eq",
]
]
...
@@ -793,7 +793,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -793,7 +793,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [
dependencies = [
"memchr",
"memchr",
"regex-automata 0.4.
9
",
"regex-automata 0.4.
10
",
"serde",
"serde",
]
]
...
@@ -817,22 +817,22 @@ checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e"
...
@@ -817,22 +817,22 @@ checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e"
[[package]]
[[package]]
name = "bytemuck"
name = "bytemuck"
version = "1.23.
1
"
version = "1.23.
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422
"
checksum = "
3995eaeebcdf32f91f980d360f78732ddc061097ab4e39991ae7a6ace9194677
"
dependencies = [
dependencies = [
"bytemuck_derive",
"bytemuck_derive",
]
]
[[package]]
[[package]]
name = "bytemuck_derive"
name = "bytemuck_derive"
version = "1.10.
0
"
version = "1.10.
1
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4
41473f2b4b0459a68628c744bc61d23e730fb00128b841d30fa4bb3972257e4
"
checksum = "4
f154e572231cb6ba2bd1176980827e3d5dc04cc183a75dea38109fbdd672d29
"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -865,9 +865,9 @@ dependencies = [
...
@@ -865,9 +865,9 @@ dependencies = [
"ahash",
"ahash",
"cached_proc_macro",
"cached_proc_macro",
"cached_proc_macro_types",
"cached_proc_macro_types",
"hashbrown 0.15.
4
",
"hashbrown 0.15.
5
",
"once_cell",
"once_cell",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"web-time",
"web-time",
]
]
...
@@ -880,7 +880,7 @@ dependencies = [
...
@@ -880,7 +880,7 @@ dependencies = [
"darling 0.20.11",
"darling 0.20.11",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -930,7 +930,7 @@ dependencies = [
...
@@ -930,7 +930,7 @@ dependencies = [
"rand 0.9.2",
"rand 0.9.2",
"rand_distr 0.5.1",
"rand_distr 0.5.1",
"rayon",
"rayon",
"safetensors 0.6.
1
",
"safetensors 0.6.
2
",
"thiserror 1.0.69",
"thiserror 1.0.69",
"ug",
"ug",
"ug-cuda",
"ug-cuda",
...
@@ -970,7 +970,7 @@ dependencies = [
...
@@ -970,7 +970,7 @@ dependencies = [
"metal 0.27.0",
"metal 0.27.0",
"num-traits",
"num-traits",
"rayon",
"rayon",
"safetensors 0.6.
1
",
"safetensors 0.6.
2
",
"serde",
"serde",
"thiserror 1.0.69",
"thiserror 1.0.69",
]
]
...
@@ -996,24 +996,24 @@ version = "0.27.0"
...
@@ -996,24 +996,24 @@ version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fce8dd7fcfcbf3a0a87d8f515194b49d6135acab73e18bd380d1d93bb1a15eb"
checksum = "3fce8dd7fcfcbf3a0a87d8f515194b49d6135acab73e18bd380d1d93bb1a15eb"
dependencies = [
dependencies = [
"clap 4.5.4
2
",
"clap 4.5.4
5
",
"heck 0.4.1",
"heck 0.4.1",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"log",
"log",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"serde",
"serde",
"serde_json",
"serde_json",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
"tempfile",
"tempfile",
"toml",
"toml",
]
]
[[package]]
[[package]]
name = "cc"
name = "cc"
version = "1.2.3
3
"
version = "1.2.3
4
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
3ee0f8803222ba5a7e2777dd72ca451868909b1ac410621b676adf07280e9b5f
"
checksum = "
42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc
"
dependencies = [
dependencies = [
"jobserver",
"jobserver",
"libc",
"libc",
...
@@ -1047,9 +1047,9 @@ checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
...
@@ -1047,9 +1047,9 @@ checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
[[package]]
[[package]]
name = "cfg-if"
name = "cfg-if"
version = "1.0.
1
"
version = "1.0.
3
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268
"
checksum = "
2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9
"
[[package]]
[[package]]
name = "cfg_aliases"
name = "cfg_aliases"
...
@@ -1063,7 +1063,7 @@ version = "0.13.10"
...
@@ -1063,7 +1063,7 @@ version = "0.13.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf"
checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf"
dependencies = [
dependencies = [
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"lazy_static",
"lazy_static",
"num-traits",
"num-traits",
"regex",
"regex",
...
@@ -1110,9 +1110,9 @@ dependencies = [
...
@@ -1110,9 +1110,9 @@ dependencies = [
[[package]]
[[package]]
name = "clap"
name = "clap"
version = "4.5.4
2
"
version = "4.5.4
5
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882
"
checksum = "
1fc0e74a703892159f5ae7d3aac52c8e6c392f5ae5f359c70b5881d60aaac318
"
dependencies = [
dependencies = [
"clap_builder",
"clap_builder",
"clap_derive",
"clap_derive",
...
@@ -1120,9 +1120,9 @@ dependencies = [
...
@@ -1120,9 +1120,9 @@ dependencies = [
[[package]]
[[package]]
name = "clap_builder"
name = "clap_builder"
version = "4.5.4
2
"
version = "4.5.4
4
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966
"
checksum = "
b3e7f4214277f3c7aa526a59dd3fbe306a370daee1f8b7b8c987069cd8e888a8
"
dependencies = [
dependencies = [
"anstream",
"anstream",
"anstyle",
"anstyle",
...
@@ -1133,14 +1133,14 @@ dependencies = [
...
@@ -1133,14 +1133,14 @@ dependencies = [
[[package]]
[[package]]
name = "clap_derive"
name = "clap_derive"
version = "4.5.4
1
"
version = "4.5.4
5
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491
"
checksum = "
14cb31bb0a7d536caef2639baa7fad459e15c3144efefa6dbd1c84562c4739f6
"
dependencies = [
dependencies = [
"heck 0.5.0",
"heck 0.5.0",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -1177,7 +1177,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -1177,7 +1177,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a"
checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a"
dependencies = [
dependencies = [
"castaway",
"castaway",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"itoa",
"itoa",
"rustversion",
"rustversion",
"ryu",
"ryu",
...
@@ -1301,7 +1301,7 @@ version = "1.5.0"
...
@@ -1301,7 +1301,7 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
]
]
[[package]]
[[package]]
...
@@ -1457,7 +1457,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -1457,7 +1457,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
dependencies = [
dependencies = [
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -1506,7 +1506,7 @@ version = "4.1.3"
...
@@ -1506,7 +1506,7 @@ version = "4.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"cpufeatures",
"cpufeatures",
"curve25519-dalek-derive",
"curve25519-dalek-derive",
"digest",
"digest",
...
@@ -1523,7 +1523,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3"
...
@@ -1523,7 +1523,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -1571,7 +1571,7 @@ dependencies = [
...
@@ -1571,7 +1571,7 @@ dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"strsim 0.11.1",
"strsim 0.11.1",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -1593,7 +1593,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
...
@@ -1593,7 +1593,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
dependencies = [
dependencies = [
"darling_core 0.20.11",
"darling_core 0.20.11",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -1611,7 +1611,7 @@ version = "5.5.3"
...
@@ -1611,7 +1611,7 @@ version = "5.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"hashbrown 0.14.5",
"hashbrown 0.14.5",
"lock_api",
"lock_api",
"once_cell",
"once_cell",
...
@@ -1659,7 +1659,7 @@ checksum = "74ef43543e701c01ad77d3a5922755c6a1d71b22d942cb8042be4994b380caff"
...
@@ -1659,7 +1659,7 @@ checksum = "74ef43543e701c01ad77d3a5922755c6a1d71b22d942cb8042be4994b380caff"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -1670,18 +1670,18 @@ checksum = "2cdc8d50f426189eef89dac62fabfa0abb27d5cc008f25bf4156a0203325becc"
...
@@ -1670,18 +1670,18 @@ checksum = "2cdc8d50f426189eef89dac62fabfa0abb27d5cc008f25bf4156a0203325becc"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
name = "derive_arbitrary"
name = "derive_arbitrary"
version = "1.4.
1
"
version = "1.4.
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800
"
checksum = "
1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a
"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -1702,7 +1702,7 @@ dependencies = [
...
@@ -1702,7 +1702,7 @@ dependencies = [
"darling 0.20.11",
"darling 0.20.11",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -1712,7 +1712,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -1712,7 +1712,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
dependencies = [
dependencies = [
"derive_builder_core",
"derive_builder_core",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -1723,7 +1723,7 @@ checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f"
...
@@ -1723,7 +1723,7 @@ checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -1743,7 +1743,7 @@ checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3"
...
@@ -1743,7 +1743,7 @@ checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -1755,8 +1755,8 @@ dependencies = [
...
@@ -1755,8 +1755,8 @@ dependencies = [
"anyhow",
"anyhow",
"bytemuck",
"bytemuck",
"bytemuck_derive",
"bytemuck_derive",
"hashbrown 0.15.
4
",
"hashbrown 0.15.
5
",
"regex-syntax 0.8.
5
",
"regex-syntax 0.8.
6
",
"strum",
"strum",
]
]
...
@@ -1837,7 +1837,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
...
@@ -1837,7 +1837,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -1904,12 +1904,12 @@ dependencies = [
...
@@ -1904,12 +1904,12 @@ dependencies = [
"eventsource-stream",
"eventsource-stream",
"futures",
"futures",
"rand 0.9.2",
"rand 0.9.2",
"reqwest 0.12.2
2
",
"reqwest 0.12.2
3
",
"reqwest-eventsource",
"reqwest-eventsource",
"secrecy",
"secrecy",
"serde",
"serde",
"serde_json",
"serde_json",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"tokio",
"tokio",
"tokio-stream",
"tokio-stream",
"tokio-test",
"tokio-test",
...
@@ -1941,7 +1941,7 @@ dependencies = [
...
@@ -1941,7 +1941,7 @@ dependencies = [
"dynamo-llm",
"dynamo-llm",
"dynamo-runtime",
"dynamo-runtime",
"either",
"either",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"mistralrs",
"mistralrs",
"serde_json",
"serde_json",
"tokio",
"tokio",
...
@@ -2002,10 +2002,11 @@ dependencies = [
...
@@ -2002,10 +2002,11 @@ dependencies = [
"parking_lot",
"parking_lot",
"prometheus",
"prometheus",
"proptest",
"proptest",
"prost",
"rand 0.9.2",
"rand 0.9.2",
"rayon",
"rayon",
"regex",
"regex",
"reqwest 0.12.2
2
",
"reqwest 0.12.2
3
",
"rmp-serde",
"rmp-serde",
"rstest 0.18.2",
"rstest 0.18.2",
"rstest_reuse",
"rstest_reuse",
...
@@ -2015,19 +2016,21 @@ dependencies = [
...
@@ -2015,19 +2016,21 @@ dependencies = [
"serial_test",
"serial_test",
"strum",
"strum",
"tempfile",
"tempfile",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"tmq",
"tmq",
"tokenizers",
"tokenizers",
"tokio",
"tokio",
"tokio-stream",
"tokio-stream",
"tokio-util",
"tokio-util",
"toktrie 1.1.1",
"toktrie 1.2.0",
"toktrie_hf_tokenizers 1.1.1",
"toktrie_hf_tokenizers 1.2.0",
"tonic 0.13.1",
"tonic-build",
"tower-http",
"tower-http",
"tracing",
"tracing",
"unicode-segmentation",
"unicode-segmentation",
"url",
"url",
"uuid 1.1
7
.0",
"uuid 1.1
8
.0",
"validator",
"validator",
"xxhash-rust",
"xxhash-rust",
"zeromq",
"zeromq",
...
@@ -2045,7 +2048,7 @@ dependencies = [
...
@@ -2045,7 +2048,7 @@ dependencies = [
"serde",
"serde",
"serde_json",
"serde_json",
"tracing",
"tracing",
"uuid 1.1
7
.0",
"uuid 1.1
8
.0",
]
]
[[package]]
[[package]]
...
@@ -2055,7 +2058,7 @@ dependencies = [
...
@@ -2055,7 +2058,7 @@ dependencies = [
"anyhow",
"anyhow",
"async-stream",
"async-stream",
"async-trait",
"async-trait",
"clap 4.5.4
2
",
"clap 4.5.4
5
",
"dynamo-async-openai",
"dynamo-async-openai",
"dynamo-engine-llamacpp",
"dynamo-engine-llamacpp",
"dynamo-engine-mistralrs",
"dynamo-engine-mistralrs",
...
@@ -2073,7 +2076,7 @@ dependencies = [
...
@@ -2073,7 +2076,7 @@ dependencies = [
"tokio-util",
"tokio-util",
"tracing",
"tracing",
"tracing-subscriber",
"tracing-subscriber",
"uuid 1.1
7
.0",
"uuid 1.1
8
.0",
"vergen-gitcl",
"vergen-gitcl",
]
]
...
@@ -2114,7 +2117,7 @@ dependencies = [
...
@@ -2114,7 +2117,7 @@ dependencies = [
"prometheus",
"prometheus",
"rand 0.9.2",
"rand 0.9.2",
"regex",
"regex",
"reqwest 0.12.2
2
",
"reqwest 0.12.2
3
",
"rstest 0.23.0",
"rstest 0.23.0",
"serde",
"serde",
"serde_json",
"serde_json",
...
@@ -2122,7 +2125,7 @@ dependencies = [
...
@@ -2122,7 +2125,7 @@ dependencies = [
"stdio-override",
"stdio-override",
"temp-env",
"temp-env",
"tempfile",
"tempfile",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"tokio",
"tokio",
"tokio-stream",
"tokio-stream",
"tokio-util",
"tokio-util",
...
@@ -2130,7 +2133,7 @@ dependencies = [
...
@@ -2130,7 +2133,7 @@ dependencies = [
"tracing",
"tracing",
"tracing-subscriber",
"tracing-subscriber",
"url",
"url",
"uuid 1.1
7
.0",
"uuid 1.1
8
.0",
"validator",
"validator",
"xxhash-rust",
"xxhash-rust",
]
]
...
@@ -2176,7 +2179,7 @@ dependencies = [
...
@@ -2176,7 +2179,7 @@ dependencies = [
"enum-ordinalize",
"enum-ordinalize",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -2206,7 +2209,7 @@ version = "0.8.35"
...
@@ -2206,7 +2209,7 @@ version = "0.8.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
]
]
[[package]]
[[package]]
...
@@ -2224,7 +2227,7 @@ dependencies = [
...
@@ -2224,7 +2227,7 @@ dependencies = [
"heck 0.5.0",
"heck 0.5.0",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -2244,7 +2247,7 @@ checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff"
...
@@ -2244,7 +2247,7 @@ checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -2264,7 +2267,7 @@ checksum = "67c78a4d8fdf9953a5c9d458f9efe940fd97a0cab0941c075a813ac594733827"
...
@@ -2264,7 +2267,7 @@ checksum = "67c78a4d8fdf9953a5c9d458f9efe940fd97a0cab0941c075a813ac594733827"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -2307,7 +2310,7 @@ checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3"
...
@@ -2307,7 +2310,7 @@ checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -2410,8 +2413,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -2410,8 +2413,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
dependencies = [
dependencies = [
"bit-set 0.5.3",
"bit-set 0.5.3",
"regex-automata 0.4.
9
",
"regex-automata 0.4.
10
",
"regex-syntax 0.8.
5
",
"regex-syntax 0.8.
6
",
]
]
[[package]]
[[package]]
...
@@ -2421,8 +2424,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -2421,8 +2424,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
dependencies = [
dependencies = [
"bit-set 0.8.0",
"bit-set 0.8.0",
"regex-automata 0.4.
9
",
"regex-automata 0.4.
10
",
"regex-syntax 0.8.
5
",
"regex-syntax 0.8.
6
",
]
]
[[package]]
[[package]]
...
@@ -2538,7 +2541,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
...
@@ -2538,7 +2541,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -2555,9 +2558,9 @@ checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b"
...
@@ -2555,9 +2558,9 @@ checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b"
[[package]]
[[package]]
name = "form_urlencoded"
name = "form_urlencoded"
version = "1.2.
1
"
version = "1.2.
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456
"
checksum = "
cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf
"
dependencies = [
dependencies = [
"percent-encoding",
"percent-encoding",
]
]
...
@@ -2670,7 +2673,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
...
@@ -2670,7 +2673,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -2990,7 +2993,7 @@ version = "0.2.16"
...
@@ -2990,7 +2993,7 @@ version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"js-sys",
"js-sys",
"libc",
"libc",
"wasi 0.11.1+wasi-snapshot-preview1",
"wasi 0.11.1+wasi-snapshot-preview1",
...
@@ -3003,7 +3006,7 @@ version = "0.3.3"
...
@@ -3003,7 +3006,7 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"js-sys",
"js-sys",
"libc",
"libc",
"r-efi",
"r-efi",
...
@@ -3030,7 +3033,7 @@ checksum = "3ac5654356c6f7f6116905aeaf92ab002c3d03414ada5dbe0bb2e32aa5fea173"
...
@@ -3030,7 +3033,7 @@ checksum = "3ac5654356c6f7f6116905aeaf92ab002c3d03414ada5dbe0bb2e32aa5fea173"
dependencies = [
dependencies = [
"fancy-regex 0.14.0",
"fancy-regex 0.14.0",
"ggml-quants",
"ggml-quants",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"log",
"log",
"num_enum",
"num_enum",
]
]
...
@@ -3053,9 +3056,9 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
...
@@ -3053,9 +3056,9 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
[[package]]
[[package]]
name = "glob"
name = "glob"
version = "0.3.
2
"
version = "0.3.
3
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2
"
checksum = "
0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280
"
[[package]]
[[package]]
name = "globset"
name = "globset"
...
@@ -3066,8 +3069,8 @@ dependencies = [
...
@@ -3066,8 +3069,8 @@ dependencies = [
"aho-corasick",
"aho-corasick",
"bstr",
"bstr",
"log",
"log",
"regex-automata 0.4.
9
",
"regex-automata 0.4.
10
",
"regex-syntax 0.8.
5
",
"regex-syntax 0.8.
6
",
]
]
[[package]]
[[package]]
...
@@ -3082,7 +3085,7 @@ dependencies = [
...
@@ -3082,7 +3085,7 @@ dependencies = [
"futures-sink",
"futures-sink",
"futures-util",
"futures-util",
"http 0.2.12",
"http 0.2.12",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"slab",
"slab",
"tokio",
"tokio",
"tokio-util",
"tokio-util",
...
@@ -3091,9 +3094,9 @@ dependencies = [
...
@@ -3091,9 +3094,9 @@ dependencies = [
[[package]]
[[package]]
name = "h2"
name = "h2"
version = "0.4.1
1
"
version = "0.4.1
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
17da50a276f1e01e0ba6c029e47b7100754904ee8a278f886546e98575380785
"
checksum = "
f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386
"
dependencies = [
dependencies = [
"atomic-waker",
"atomic-waker",
"bytes",
"bytes",
...
@@ -3101,7 +3104,7 @@ dependencies = [
...
@@ -3101,7 +3104,7 @@ dependencies = [
"futures-core",
"futures-core",
"futures-sink",
"futures-sink",
"http 1.3.1",
"http 1.3.1",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"slab",
"slab",
"tokio",
"tokio",
"tokio-util",
"tokio-util",
...
@@ -3121,7 +3124,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -3121,7 +3124,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"
checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"
dependencies = [
dependencies = [
"bytemuck",
"bytemuck",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"crunchy",
"crunchy",
"num-traits",
"num-traits",
"rand 0.9.2",
"rand 0.9.2",
...
@@ -3142,9 +3145,9 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
...
@@ -3142,9 +3145,9 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
[[package]]
[[package]]
name = "hashbrown"
name = "hashbrown"
version = "0.15.
4
"
version = "0.15.
5
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5
"
checksum = "
9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1
"
dependencies = [
dependencies = [
"allocator-api2",
"allocator-api2",
"equivalent",
"equivalent",
...
@@ -3211,10 +3214,10 @@ dependencies = [
...
@@ -3211,10 +3214,10 @@ dependencies = [
"log",
"log",
"num_cpus",
"num_cpus",
"rand 0.9.2",
"rand 0.9.2",
"reqwest 0.12.2
2
",
"reqwest 0.12.2
3
",
"serde",
"serde",
"serde_json",
"serde_json",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"tokio",
"tokio",
"ureq",
"ureq",
"windows-sys 0.60.2",
"windows-sys 0.60.2",
...
@@ -3243,7 +3246,7 @@ checksum = "c1637acec3b965bab873352189d887b12c87b4f8d7571f4d185e796be5654ad8"
...
@@ -3243,7 +3246,7 @@ checksum = "c1637acec3b965bab873352189d887b12c87b4f8d7571f4d185e796be5654ad8"
dependencies = [
dependencies = [
"html5ever 0.31.0",
"html5ever 0.31.0",
"tendril",
"tendril",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"unicode-width 0.2.1",
"unicode-width 0.2.1",
]
]
...
@@ -3371,20 +3374,22 @@ dependencies = [
...
@@ -3371,20 +3374,22 @@ dependencies = [
[[package]]
[[package]]
name = "hyper"
name = "hyper"
version = "1.
6
.0"
version = "1.
7
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80
"
checksum = "
eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e
"
dependencies = [
dependencies = [
"atomic-waker",
"bytes",
"bytes",
"futures-channel",
"futures-channel",
"futures-
util
",
"futures-
core
",
"h2 0.4.1
1
",
"h2 0.4.1
2
",
"http 1.3.1",
"http 1.3.1",
"http-body 1.0.1",
"http-body 1.0.1",
"httparse",
"httparse",
"httpdate",
"httpdate",
"itoa",
"itoa",
"pin-project-lite",
"pin-project-lite",
"pin-utils",
"smallvec",
"smallvec",
"tokio",
"tokio",
"want",
"want",
...
@@ -3397,7 +3402,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -3397,7 +3402,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
dependencies = [
dependencies = [
"http 1.3.1",
"http 1.3.1",
"hyper 1.
6
.0",
"hyper 1.
7
.0",
"hyper-util",
"hyper-util",
"rustls",
"rustls",
"rustls-native-certs 0.8.1",
"rustls-native-certs 0.8.1",
...
@@ -3414,7 +3419,7 @@ version = "0.5.2"
...
@@ -3414,7 +3419,7 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0"
checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0"
dependencies = [
dependencies = [
"hyper 1.
6
.0",
"hyper 1.
7
.0",
"hyper-util",
"hyper-util",
"pin-project-lite",
"pin-project-lite",
"tokio",
"tokio",
...
@@ -3429,7 +3434,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
...
@@ -3429,7 +3434,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
dependencies = [
dependencies = [
"bytes",
"bytes",
"http-body-util",
"http-body-util",
"hyper 1.
6
.0",
"hyper 1.
7
.0",
"hyper-util",
"hyper-util",
"native-tls",
"native-tls",
"tokio",
"tokio",
...
@@ -3450,7 +3455,7 @@ dependencies = [
...
@@ -3450,7 +3455,7 @@ dependencies = [
"futures-util",
"futures-util",
"http 1.3.1",
"http 1.3.1",
"http-body 1.0.1",
"http-body 1.0.1",
"hyper 1.
6
.0",
"hyper 1.
7
.0",
"ipnet",
"ipnet",
"libc",
"libc",
"percent-encoding",
"percent-encoding",
...
@@ -3581,9 +3586,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
...
@@ -3581,9 +3586,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
[[package]]
name = "idna"
name = "idna"
version = "1.
0.3
"
version = "1.
1.0
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
686f825264d630750a544639377bae737
62
8
04
3f20d38bbc029e8f29ea968a7
e"
checksum = "
3b0875f23caa03898994f6ddc501886a45c7d3d
62
d
04
d2d90788d47be1b1e4d
e"
dependencies = [
dependencies = [
"idna_adapter",
"idna_adapter",
"smallvec",
"smallvec",
...
@@ -3652,12 +3657,12 @@ dependencies = [
...
@@ -3652,12 +3657,12 @@ dependencies = [
[[package]]
[[package]]
name = "indexmap"
name = "indexmap"
version = "2.1
0
.0"
version = "2.1
1
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f
e4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661
"
checksum = "f
2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9
"
dependencies = [
dependencies = [
"equivalent",
"equivalent",
"hashbrown 0.15.
4
",
"hashbrown 0.15.
5
",
"serde",
"serde",
]
]
...
@@ -3704,7 +3709,7 @@ version = "0.1.13"
...
@@ -3704,7 +3709,7 @@ version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
]
]
[[package]]
[[package]]
...
@@ -3715,7 +3720,7 @@ checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60"
...
@@ -3715,7 +3720,7 @@ checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -3733,12 +3738,12 @@ dependencies = [
...
@@ -3733,12 +3738,12 @@ dependencies = [
[[package]]
[[package]]
name = "io-uring"
name = "io-uring"
version = "0.7.
9
"
version = "0.7.
10
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4
"
checksum = "
046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b
"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"libc",
"libc",
]
]
...
@@ -3800,6 +3805,15 @@ dependencies = [
...
@@ -3800,6 +3805,15 @@ dependencies = [
"either",
"either",
]
]
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]]
[[package]]
name = "itertools"
name = "itertools"
version = "0.14.0"
version = "0.14.0"
...
@@ -3836,14 +3850,14 @@ checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
...
@@ -3836,14 +3850,14 @@ checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
name = "jobserver"
name = "jobserver"
version = "0.1.3
3
"
version = "0.1.3
4
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a
"
checksum = "
9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33
"
dependencies = [
dependencies = [
"getrandom 0.3.3",
"getrandom 0.3.3",
"libc",
"libc",
...
@@ -3875,7 +3889,7 @@ dependencies = [
...
@@ -3875,7 +3889,7 @@ dependencies = [
"anyhow",
"anyhow",
"base64 0.21.7",
"base64 0.21.7",
"bytecount",
"bytecount",
"clap 4.5.4
2
",
"clap 4.5.4
5
",
"fancy-regex 0.11.0",
"fancy-regex 0.11.0",
"fraction",
"fraction",
"getrandom 0.2.16",
"getrandom 0.2.16",
...
@@ -3892,7 +3906,7 @@ dependencies = [
...
@@ -3892,7 +3906,7 @@ dependencies = [
"serde_json",
"serde_json",
"time",
"time",
"url",
"url",
"uuid 1.1
7
.0",
"uuid 1.1
8
.0",
]
]
[[package]]
[[package]]
...
@@ -3935,9 +3949,9 @@ checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8"
...
@@ -3935,9 +3949,9 @@ checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8"
[[package]]
[[package]]
name = "libc"
name = "libc"
version = "0.2.17
4
"
version = "0.2.17
5
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776
"
checksum = "
6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543
"
[[package]]
[[package]]
name = "libdynamo_llm"
name = "libdynamo_llm"
...
@@ -3957,7 +3971,7 @@ dependencies = [
...
@@ -3957,7 +3971,7 @@ dependencies = [
"tokio-stream",
"tokio-stream",
"tracing",
"tracing",
"tracing-subscriber",
"tracing-subscriber",
"uuid 1.1
7
.0",
"uuid 1.1
8
.0",
]
]
[[package]]
[[package]]
...
@@ -3976,7 +3990,7 @@ version = "0.8.8"
...
@@ -3976,7 +3990,7 @@ version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"windows-targets 0.53.3",
"windows-targets 0.53.3",
]
]
...
@@ -3992,7 +4006,7 @@ version = "0.1.9"
...
@@ -3992,7 +4006,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3"
checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"libc",
"libc",
]
]
...
@@ -4048,8 +4062,8 @@ source = "git+https://github.com/guidance-ai/llguidance.git?rev=c432092#c432092d
...
@@ -4048,8 +4062,8 @@ source = "git+https://github.com/guidance-ai/llguidance.git?rev=c432092#c432092d
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"derivre",
"derivre",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"regex-syntax 0.8.
5
",
"regex-syntax 0.8.
6
",
"serde",
"serde",
"serde_json",
"serde_json",
"toktrie 1.0.0",
"toktrie 1.0.0",
...
@@ -4063,7 +4077,7 @@ checksum = "656b3b27f8893f7bbf9485148ff9a65f019e3f33bd5cdc87c83cab16b3fd9ec8"
...
@@ -4063,7 +4077,7 @@ checksum = "656b3b27f8893f7bbf9485148ff9a65f019e3f33bd5cdc87c83cab16b3fd9ec8"
dependencies = [
dependencies = [
"libc",
"libc",
"neli",
"neli",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"windows-sys 0.59.0",
"windows-sys 0.59.0",
]
]
...
@@ -4175,7 +4189,7 @@ checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b"
...
@@ -4175,7 +4189,7 @@ checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -4215,7 +4229,7 @@ version = "0.1.1"
...
@@ -4215,7 +4229,7 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519"
checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"rayon",
"rayon",
]
]
...
@@ -4227,9 +4241,9 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
...
@@ -4227,9 +4241,9 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]]
[[package]]
name = "memmap2"
name = "memmap2"
version = "0.9.
7
"
version = "0.9.
8
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
483758ad303d734cec05e5c12b41d7e93e6a6390c5e9dae6bdeb7c1259012d28
"
checksum = "
843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7
"
dependencies = [
dependencies = [
"libc",
"libc",
"stable_deref_trait",
"stable_deref_trait",
...
@@ -4256,7 +4270,7 @@ version = "0.27.0"
...
@@ -4256,7 +4270,7 @@ version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c43f73953f8cbe511f021b58f18c3ce1c3d1ae13fe953293e13345bf83217f25"
checksum = "c43f73953f8cbe511f021b58f18c3ce1c3d1ae13fe953293e13345bf83217f25"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"block",
"block",
"core-graphics-types",
"core-graphics-types",
"foreign-types 0.5.0",
"foreign-types 0.5.0",
...
@@ -4271,7 +4285,7 @@ version = "0.29.0"
...
@@ -4271,7 +4285,7 @@ version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"block",
"block",
"core-graphics-types",
"core-graphics-types",
"foreign-types 0.5.0",
"foreign-types 0.5.0",
...
@@ -4285,16 +4299,16 @@ name = "metrics"
...
@@ -4285,16 +4299,16 @@ name = "metrics"
version = "0.4.1"
version = "0.4.1"
dependencies = [
dependencies = [
"axum 0.8.4",
"axum 0.8.4",
"clap 4.5.4
2
",
"clap 4.5.4
5
",
"dynamo-llm",
"dynamo-llm",
"dynamo-runtime",
"dynamo-runtime",
"futures",
"futures",
"prometheus",
"prometheus",
"rand 0.9.2",
"rand 0.9.2",
"reqwest 0.12.2
2
",
"reqwest 0.12.2
3
",
"serde",
"serde",
"serde_json",
"serde_json",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"tokio",
"tokio",
"tracing",
"tracing",
]
]
...
@@ -4317,9 +4331,9 @@ dependencies = [
...
@@ -4317,9 +4331,9 @@ dependencies = [
[[package]]
[[package]]
name = "minijinja"
name = "minijinja"
version = "2.1
1
.0"
version = "2.1
2
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
4e60ac08614cc09062820e51d5d94c2fce16b94ea4e5003bb81b99a95f84e876
"
checksum = "
a9f264d75233323f4b7d2f03aefe8a990690cdebfbfe26ea86bcbaec5e9ac990
"
dependencies = [
dependencies = [
"memo-map",
"memo-map",
"self_cell",
"self_cell",
...
@@ -4329,9 +4343,9 @@ dependencies = [
...
@@ -4329,9 +4343,9 @@ dependencies = [
[[package]]
[[package]]
name = "minijinja-contrib"
name = "minijinja-contrib"
version = "2.1
1
.0"
version = "2.1
2
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
f93e5bfa889f16d8c10ec92ac964074a68a7206c0fd9748ff23a31942c85d97c
"
checksum = "
182ba1438db4679ddfa03792c183bdc2b9ce26b58e7d41a749e59b06497cf136
"
dependencies = [
dependencies = [
"minijinja",
"minijinja",
"serde",
"serde",
...
@@ -4415,14 +4429,14 @@ dependencies = [
...
@@ -4415,14 +4429,14 @@ dependencies = [
"anyhow",
"anyhow",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=95d713f9)",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=95d713f9)",
"candle-nn",
"candle-nn",
"clap 4.5.4
2
",
"clap 4.5.4
5
",
"either",
"either",
"futures",
"futures",
"image",
"image",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"mistralrs-core",
"mistralrs-core",
"rand 0.9.2",
"rand 0.9.2",
"reqwest 0.12.2
2
",
"reqwest 0.12.2
3
",
"serde",
"serde",
"serde_json",
"serde_json",
"tokio",
"tokio",
...
@@ -4460,7 +4474,7 @@ dependencies = [
...
@@ -4460,7 +4474,7 @@ dependencies = [
"candle-nn",
"candle-nn",
"cfgrammar",
"cfgrammar",
"chrono",
"chrono",
"clap 4.5.4
2
",
"clap 4.5.4
5
",
"csv",
"csv",
"derive-new",
"derive-new",
"derive_more 2.0.1",
"derive_more 2.0.1",
...
@@ -4470,13 +4484,13 @@ dependencies = [
...
@@ -4470,13 +4484,13 @@ dependencies = [
"futures",
"futures",
"galil-seiferas",
"galil-seiferas",
"half 2.6.0",
"half 2.6.0",
"hashbrown 0.15.
4
",
"hashbrown 0.15.
5
",
"hf-hub",
"hf-hub",
"hound",
"hound",
"html2text",
"html2text",
"http 1.3.1",
"http 1.3.1",
"image",
"image",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"indicatif",
"indicatif",
"interprocess",
"interprocess",
"itertools 0.14.0",
"itertools 0.14.0",
...
@@ -4502,13 +4516,13 @@ dependencies = [
...
@@ -4502,13 +4516,13 @@ dependencies = [
"rand_isaac",
"rand_isaac",
"rayon",
"rayon",
"regex",
"regex",
"regex-automata 0.4.
9
",
"regex-automata 0.4.
10
",
"reqwest 0.12.2
2
",
"reqwest 0.12.2
3
",
"rubato",
"rubato",
"rust-mcp-schema",
"rust-mcp-schema",
"rustc-hash 2.1.1",
"rustc-hash 2.1.1",
"rustfft",
"rustfft",
"safetensors 0.6.
1
",
"safetensors 0.6.
2
",
"schemars 0.8.22",
"schemars 0.8.22",
"scraper",
"scraper",
"serde",
"serde",
...
@@ -4520,7 +4534,7 @@ dependencies = [
...
@@ -4520,7 +4534,7 @@ dependencies = [
"strum",
"strum",
"symphonia",
"symphonia",
"sysinfo",
"sysinfo",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"tokenizers",
"tokenizers",
"tokio",
"tokio",
"tokio-rayon",
"tokio-rayon",
...
@@ -4531,7 +4545,7 @@ dependencies = [
...
@@ -4531,7 +4545,7 @@ dependencies = [
"tracing",
"tracing",
"tracing-subscriber",
"tracing-subscriber",
"urlencoding",
"urlencoding",
"uuid 1.1
7
.0",
"uuid 1.1
8
.0",
"variantly",
"variantly",
"vob",
"vob",
]
]
...
@@ -4545,7 +4559,7 @@ dependencies = [
...
@@ -4545,7 +4559,7 @@ dependencies = [
"async-trait",
"async-trait",
"futures-util",
"futures-util",
"http 1.3.1",
"http 1.3.1",
"reqwest 0.12.2
2
",
"reqwest 0.12.2
3
",
"rust-mcp-schema",
"rust-mcp-schema",
"serde",
"serde",
"serde_json",
"serde_json",
...
@@ -4553,7 +4567,7 @@ dependencies = [
...
@@ -4553,7 +4567,7 @@ dependencies = [
"tokio-tungstenite 0.24.0",
"tokio-tungstenite 0.24.0",
"tracing",
"tracing",
"utoipa",
"utoipa",
"uuid 1.1
7
.0",
"uuid 1.1
8
.0",
]
]
[[package]]
[[package]]
...
@@ -4568,7 +4582,7 @@ dependencies = [
...
@@ -4568,7 +4582,7 @@ dependencies = [
"half 2.6.0",
"half 2.6.0",
"metal 0.27.0",
"metal 0.27.0",
"once_cell",
"once_cell",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
]
]
[[package]]
[[package]]
...
@@ -4590,10 +4604,10 @@ dependencies = [
...
@@ -4590,10 +4604,10 @@ dependencies = [
"paste",
"paste",
"rayon",
"rayon",
"regex",
"regex",
"safetensors 0.6.
1
",
"safetensors 0.6.
2
",
"serde",
"serde",
"serde_json",
"serde_json",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"tokio",
"tokio",
"tracing",
"tracing",
"yoke 0.7.5",
"yoke 0.7.5",
...
@@ -4627,7 +4641,7 @@ checksum = "c402a4092d5e204f32c9e155431046831fa712637043c58cb73bc6bc6c9663b5"
...
@@ -4627,7 +4641,7 @@ checksum = "c402a4092d5e204f32c9e155431046831fa712637043c58cb73bc6bc6c9663b5"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -4754,7 +4768,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -4754,7 +4768,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
dependencies = [
dependencies = [
"bitflags 1.3.2",
"bitflags 1.3.2",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"libc",
"libc",
"memoffset",
"memoffset",
"pin-utils",
"pin-utils",
...
@@ -4766,8 +4780,8 @@ version = "0.29.0"
...
@@ -4766,8 +4780,8 @@ version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"cfg_aliases",
"cfg_aliases",
"libc",
"libc",
]
]
...
@@ -4784,7 +4798,7 @@ dependencies = [
...
@@ -4784,7 +4798,7 @@ dependencies = [
"os_info",
"os_info",
"pkg-config",
"pkg-config",
"serde",
"serde",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"tracing",
"tracing",
]
]
...
@@ -4916,7 +4930,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
...
@@ -4916,7 +4930,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -4989,7 +5003,7 @@ dependencies = [
...
@@ -4989,7 +5003,7 @@ dependencies = [
"proc-macro-crate",
"proc-macro-crate",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -5069,7 +5083,7 @@ version = "6.5.1"
...
@@ -5069,7 +5083,7 @@ version = "6.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"libc",
"libc",
"once_cell",
"once_cell",
"onig_sys",
"onig_sys",
...
@@ -5100,19 +5114,19 @@ dependencies = [
...
@@ -5100,19 +5114,19 @@ dependencies = [
"anyhow",
"anyhow",
"base64 0.22.1",
"base64 0.22.1",
"bstr",
"bstr",
"clap 4.5.4
2
",
"clap 4.5.4
5
",
"fancy-regex 0.13.0",
"fancy-regex 0.13.0",
"futures",
"futures",
"image",
"image",
"regex",
"regex",
"reqwest 0.12.2
2
",
"reqwest 0.12.2
3
",
"rustc-hash 1.1.0",
"rustc-hash 1.1.0",
"serde",
"serde",
"serde_json",
"serde_json",
"serde_with",
"serde_with",
"sha1",
"sha1",
"sha2",
"sha2",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
]
]
[[package]]
[[package]]
...
@@ -5121,8 +5135,8 @@ version = "0.10.73"
...
@@ -5121,8 +5135,8 @@ version = "0.10.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8"
checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"foreign-types 0.3.2",
"foreign-types 0.3.2",
"libc",
"libc",
"once_cell",
"once_cell",
...
@@ -5138,7 +5152,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
...
@@ -5138,7 +5152,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -5228,7 +5242,7 @@ version = "0.9.11"
...
@@ -5228,7 +5242,7 @@ version = "0.9.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"libc",
"libc",
"redox_syscall",
"redox_syscall",
"smallvec",
"smallvec",
...
@@ -5261,7 +5275,7 @@ dependencies = [
...
@@ -5261,7 +5275,7 @@ dependencies = [
"proc-macro2",
"proc-macro2",
"proc-macro2-diagnostics",
"proc-macro2-diagnostics",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -5275,9 +5289,9 @@ dependencies = [
...
@@ -5275,9 +5289,9 @@ dependencies = [
[[package]]
[[package]]
name = "percent-encoding"
name = "percent-encoding"
version = "2.3.
1
"
version = "2.3.
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e
"
checksum = "
9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220
"
[[package]]
[[package]]
name = "pest"
name = "pest"
...
@@ -5286,7 +5300,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -5286,7 +5300,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323"
checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323"
dependencies = [
dependencies = [
"memchr",
"memchr",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"ucd-trie",
"ucd-trie",
]
]
...
@@ -5310,7 +5324,7 @@ dependencies = [
...
@@ -5310,7 +5324,7 @@ dependencies = [
"pest_meta",
"pest_meta",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -5330,7 +5344,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -5330,7 +5344,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
dependencies = [
dependencies = [
"fixedbitset",
"fixedbitset",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
]
]
[[package]]
[[package]]
...
@@ -5373,7 +5387,7 @@ dependencies = [
...
@@ -5373,7 +5387,7 @@ dependencies = [
"phf_shared",
"phf_shared",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -5402,7 +5416,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
...
@@ -5402,7 +5416,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -5440,7 +5454,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -5440,7 +5454,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1"
checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1"
dependencies = [
dependencies = [
"base64 0.22.1",
"base64 0.22.1",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"quick-xml",
"quick-xml",
"serde",
"serde",
"time",
"time",
...
@@ -5534,12 +5548,12 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
...
@@ -5534,12 +5548,12 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
[[package]]
name = "prettyplease"
name = "prettyplease"
version = "0.2.3
6
"
version = "0.2.3
7
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2
"
checksum = "
479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b
"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -5579,14 +5593,14 @@ dependencies = [
...
@@ -5579,14 +5593,14 @@ dependencies = [
"proc-macro-error-attr2",
"proc-macro-error-attr2",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
name = "proc-macro2"
name = "proc-macro2"
version = "1.0.
95
"
version = "1.0.
101
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778
"
checksum = "
89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de
"
dependencies = [
dependencies = [
"unicode-ident",
"unicode-ident",
]
]
...
@@ -5599,7 +5613,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8"
...
@@ -5599,7 +5613,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
"version_check",
"version_check",
"yansi",
"yansi",
]
]
...
@@ -5620,7 +5634,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -5620,7 +5634,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52717f9a02b6965224f95ca2a81e2e0c5c43baacd28ca057577988930b6c3d5b"
checksum = "52717f9a02b6965224f95ca2a81e2e0c5c43baacd28ca057577988930b6c3d5b"
dependencies = [
dependencies = [
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -5629,13 +5643,13 @@ version = "0.14.0"
...
@@ -5629,13 +5643,13 @@ version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a"
checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"fnv",
"fnv",
"lazy_static",
"lazy_static",
"memchr",
"memchr",
"parking_lot",
"parking_lot",
"protobuf",
"protobuf",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
]
]
[[package]]
[[package]]
...
@@ -5646,13 +5660,13 @@ checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f"
...
@@ -5646,13 +5660,13 @@ checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f"
dependencies = [
dependencies = [
"bit-set 0.8.0",
"bit-set 0.8.0",
"bit-vec 0.8.0",
"bit-vec 0.8.0",
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"lazy_static",
"lazy_static",
"num-traits",
"num-traits",
"rand 0.9.2",
"rand 0.9.2",
"rand_chacha 0.9.0",
"rand_chacha 0.9.0",
"rand_xorshift",
"rand_xorshift",
"regex-syntax 0.8.
5
",
"regex-syntax 0.8.
6
",
"rusty-fork",
"rusty-fork",
"tempfile",
"tempfile",
"unarray",
"unarray",
...
@@ -5684,7 +5698,7 @@ dependencies = [
...
@@ -5684,7 +5698,7 @@ dependencies = [
"prost",
"prost",
"prost-types",
"prost-types",
"regex",
"regex",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
"tempfile",
"tempfile",
]
]
...
@@ -5698,7 +5712,7 @@ dependencies = [
...
@@ -5698,7 +5712,7 @@ dependencies = [
"itertools 0.14.0",
"itertools 0.14.0",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -5749,7 +5763,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -5749,7 +5763,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96b86df24f0a7ddd5e4b95c94fc9ed8a98f1ca94d3b01bdce2824097e7835907"
checksum = "96b86df24f0a7ddd5e4b95c94fc9ed8a98f1ca94d3b01bdce2824097e7835907"
dependencies = [
dependencies = [
"bytemuck",
"bytemuck",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"libm",
"libm",
"num-complex",
"num-complex",
"reborrow",
"reborrow",
...
@@ -5779,9 +5793,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
...
@@ -5779,9 +5793,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
[[package]]
[[package]]
name = "quick-xml"
name = "quick-xml"
version = "0.38.
0
"
version = "0.38.
3
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
8927b0664f5c5a98265138b7e3f90aa19a6b21353182469ace36d4ac527b7b1b
"
checksum = "
42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89
"
dependencies = [
dependencies = [
"memchr",
"memchr",
]
]
...
@@ -5800,7 +5814,7 @@ dependencies = [
...
@@ -5800,7 +5814,7 @@ dependencies = [
"rustc-hash 2.1.1",
"rustc-hash 2.1.1",
"rustls",
"rustls",
"socket2 0.5.10",
"socket2 0.5.10",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"tokio",
"tokio",
"tracing",
"tracing",
"web-time",
"web-time",
...
@@ -5821,7 +5835,7 @@ dependencies = [
...
@@ -5821,7 +5835,7 @@ dependencies = [
"rustls",
"rustls",
"rustls-pki-types",
"rustls-pki-types",
"slab",
"slab",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"tinyvec",
"tinyvec",
"tracing",
"tracing",
"web-time",
"web-time",
...
@@ -5975,7 +5989,7 @@ dependencies = [
...
@@ -5975,7 +5989,7 @@ dependencies = [
"av1-grain",
"av1-grain",
"bitstream-io",
"bitstream-io",
"built",
"built",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"interpolate_name",
"interpolate_name",
"itertools 0.12.1",
"itertools 0.12.1",
"libc",
"libc",
...
@@ -6028,7 +6042,7 @@ version = "11.5.0"
...
@@ -6028,7 +6042,7 @@ version = "11.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146"
checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
]
]
[[package]]
[[package]]
...
@@ -6039,9 +6053,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
...
@@ -6039,9 +6053,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
[[package]]
[[package]]
name = "rayon"
name = "rayon"
version = "1.1
0
.0"
version = "1.1
1
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa
"
checksum = "
368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f
"
dependencies = [
dependencies = [
"either",
"either",
"rayon-core",
"rayon-core",
...
@@ -6060,9 +6074,9 @@ dependencies = [
...
@@ -6060,9 +6074,9 @@ dependencies = [
[[package]]
[[package]]
name = "rayon-core"
name = "rayon-core"
version = "1.1
2.1
"
version = "1.1
3.0
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2
"
checksum = "
22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91
"
dependencies = [
dependencies = [
"crossbeam-deque",
"crossbeam-deque",
"crossbeam-utils",
"crossbeam-utils",
...
@@ -6095,7 +6109,7 @@ version = "0.5.17"
...
@@ -6095,7 +6109,7 @@ version = "0.5.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
]
]
[[package]]
[[package]]
...
@@ -6106,7 +6120,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
...
@@ -6106,7 +6120,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
dependencies = [
dependencies = [
"getrandom 0.2.16",
"getrandom 0.2.16",
"libredox",
"libredox",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
]
]
[[package]]
[[package]]
...
@@ -6126,19 +6140,19 @@ checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7"
...
@@ -6126,19 +6140,19 @@ checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
name = "regex"
name = "regex"
version = "1.11.
1
"
version = "1.11.
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b2011
91"
checksum = "
23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6
91
2
"
dependencies = [
dependencies = [
"aho-corasick",
"aho-corasick",
"memchr",
"memchr",
"regex-automata 0.4.
9
",
"regex-automata 0.4.
10
",
"regex-syntax 0.8.
5
",
"regex-syntax 0.8.
6
",
]
]
[[package]]
[[package]]
...
@@ -6152,13 +6166,13 @@ dependencies = [
...
@@ -6152,13 +6166,13 @@ dependencies = [
[[package]]
[[package]]
name = "regex-automata"
name = "regex-automata"
version = "0.4.
9
"
version = "0.4.
10
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908
"
checksum = "
6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6
"
dependencies = [
dependencies = [
"aho-corasick",
"aho-corasick",
"memchr",
"memchr",
"regex-syntax 0.8.
5
",
"regex-syntax 0.8.
6
",
]
]
[[package]]
[[package]]
...
@@ -6169,9 +6183,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
...
@@ -6169,9 +6183,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
[[package]]
name = "regex-syntax"
name = "regex-syntax"
version = "0.8.
5
"
version = "0.8.
6
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c
"
checksum = "
caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001
"
[[package]]
[[package]]
name = "relative-path"
name = "relative-path"
...
@@ -6217,9 +6231,9 @@ dependencies = [
...
@@ -6217,9 +6231,9 @@ dependencies = [
[[package]]
[[package]]
name = "reqwest"
name = "reqwest"
version = "0.12.2
2
"
version = "0.12.2
3
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531
"
checksum = "
d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb
"
dependencies = [
dependencies = [
"base64 0.22.1",
"base64 0.22.1",
"bytes",
"bytes",
...
@@ -6227,11 +6241,11 @@ dependencies = [
...
@@ -6227,11 +6241,11 @@ dependencies = [
"futures-channel",
"futures-channel",
"futures-core",
"futures-core",
"futures-util",
"futures-util",
"h2 0.4.1
1
",
"h2 0.4.1
2
",
"http 1.3.1",
"http 1.3.1",
"http-body 1.0.1",
"http-body 1.0.1",
"http-body-util",
"http-body-util",
"hyper 1.
6
.0",
"hyper 1.
7
.0",
"hyper-rustls",
"hyper-rustls",
"hyper-tls",
"hyper-tls",
"hyper-util",
"hyper-util",
...
@@ -6277,7 +6291,7 @@ dependencies = [
...
@@ -6277,7 +6291,7 @@ dependencies = [
"mime",
"mime",
"nom 7.1.3",
"nom 7.1.3",
"pin-project-lite",
"pin-project-lite",
"reqwest 0.12.2
2
",
"reqwest 0.12.2
3
",
"thiserror 1.0.69",
"thiserror 1.0.69",
]
]
...
@@ -6294,7 +6308,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -6294,7 +6308,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
dependencies = [
dependencies = [
"cc",
"cc",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"getrandom 0.2.16",
"getrandom 0.2.16",
"libc",
"libc",
"untrusted",
"untrusted",
...
@@ -6353,14 +6367,14 @@ version = "0.18.2"
...
@@ -6353,14 +6367,14 @@ version = "0.18.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d428f8247852f894ee1be110b375111b586d4fa431f6c46e64ba5a0dcccbe605"
checksum = "d428f8247852f894ee1be110b375111b586d4fa431f6c46e64ba5a0dcccbe605"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"glob",
"glob",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"regex",
"regex",
"relative-path",
"relative-path",
"rustc_version",
"rustc_version",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
"unicode-ident",
"unicode-ident",
]
]
...
@@ -6370,7 +6384,7 @@ version = "0.23.0"
...
@@ -6370,7 +6384,7 @@ version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "825ea780781b15345a146be27eaefb05085e337e869bff01b4306a4fd4a9ad5a"
checksum = "825ea780781b15345a146be27eaefb05085e337e869bff01b4306a4fd4a9ad5a"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"glob",
"glob",
"proc-macro-crate",
"proc-macro-crate",
"proc-macro2",
"proc-macro2",
...
@@ -6378,7 +6392,7 @@ dependencies = [
...
@@ -6378,7 +6392,7 @@ dependencies = [
"regex",
"regex",
"relative-path",
"relative-path",
"rustc_version",
"rustc_version",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
"unicode-ident",
"unicode-ident",
]
]
...
@@ -6390,7 +6404,7 @@ checksum = "b3a8fb4672e840a587a66fc577a5491375df51ddb88f2a2c2a792598c326fe14"
...
@@ -6390,7 +6404,7 @@ checksum = "b3a8fb4672e840a587a66fc577a5491375df51ddb88f2a2c2a792598c326fe14"
dependencies = [
dependencies = [
"quote",
"quote",
"rand 0.8.5",
"rand 0.8.5",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -6472,7 +6486,7 @@ version = "0.38.44"
...
@@ -6472,7 +6486,7 @@ version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"errno",
"errno",
"libc",
"libc",
"linux-raw-sys 0.4.15",
"linux-raw-sys 0.4.15",
...
@@ -6485,7 +6499,7 @@ version = "1.0.8"
...
@@ -6485,7 +6499,7 @@ version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8"
checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"errno",
"errno",
"libc",
"libc",
"linux-raw-sys 0.9.4",
"linux-raw-sys 0.9.4",
...
@@ -6530,7 +6544,7 @@ dependencies = [
...
@@ -6530,7 +6544,7 @@ dependencies = [
"openssl-probe",
"openssl-probe",
"rustls-pki-types",
"rustls-pki-types",
"schannel",
"schannel",
"security-framework 3.
2
.0",
"security-framework 3.
3
.0",
]
]
[[package]]
[[package]]
...
@@ -6576,9 +6590,9 @@ dependencies = [
...
@@ -6576,9 +6590,9 @@ dependencies = [
[[package]]
[[package]]
name = "rustversion"
name = "rustversion"
version = "1.0.2
1
"
version = "1.0.2
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1
d"
checksum = "
b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46
d"
[[package]]
[[package]]
name = "rusty-fork"
name = "rusty-fork"
...
@@ -6619,9 +6633,9 @@ dependencies = [
...
@@ -6619,9 +6633,9 @@ dependencies = [
[[package]]
[[package]]
name = "safetensors"
name = "safetensors"
version = "0.6.
1
"
version = "0.6.
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
3acc594eede59bd7facbb5
5c
1
79
d1d67980f412c201544b21fce7f9ac3aab4a5
"
checksum = "
172dd94c5a87b
5c79
f945c863da53b2ebc7ccef4eca24ac63cca66a41aab2178
"
dependencies = [
dependencies = [
"serde",
"serde",
"serde_json",
"serde_json",
...
@@ -6638,9 +6652,9 @@ dependencies = [
...
@@ -6638,9 +6652,9 @@ dependencies = [
[[package]]
[[package]]
name = "scc"
name = "scc"
version = "2.
3.4
"
version = "2.
4.0
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
22b2d775fb28f245817589471dd49
c5ed
f64237f4a19d10ce9a92ff4651a27f4
"
checksum = "
46e6f046b7fef48e2660
c5
7
ed
794263155d713de679057f2d0c169bfc6e756cc
"
dependencies = [
dependencies = [
"sdd",
"sdd",
]
]
...
@@ -6699,7 +6713,7 @@ dependencies = [
...
@@ -6699,7 +6713,7 @@ dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"serde_derive_internals",
"serde_derive_internals",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -6745,7 +6759,7 @@ version = "2.11.1"
...
@@ -6745,7 +6759,7 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"core-foundation 0.9.4",
"core-foundation 0.9.4",
"core-foundation-sys",
"core-foundation-sys",
"libc",
"libc",
...
@@ -6754,11 +6768,11 @@ dependencies = [
...
@@ -6754,11 +6768,11 @@ dependencies = [
[[package]]
[[package]]
name = "security-framework"
name = "security-framework"
version = "3.
2
.0"
version = "3.
3
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316
"
checksum = "
80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c
"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"core-foundation 0.10.1",
"core-foundation 0.10.1",
"core-foundation-sys",
"core-foundation-sys",
"libc",
"libc",
...
@@ -6781,7 +6795,7 @@ version = "0.26.0"
...
@@ -6781,7 +6795,7 @@ version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8"
checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"cssparser",
"cssparser",
"derive_more 0.99.20",
"derive_more 0.99.20",
"fxhash",
"fxhash",
...
@@ -6848,7 +6862,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
...
@@ -6848,7 +6862,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -6859,16 +6873,16 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
...
@@ -6859,16 +6873,16 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
name = "serde_json"
name = "serde_json"
version = "1.0.14
2
"
version = "1.0.14
3
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7
"
checksum = "
d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a
"
dependencies = [
dependencies = [
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"itoa",
"itoa",
"memchr",
"memchr",
"ryu",
"ryu",
...
@@ -6911,7 +6925,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
...
@@ -6911,7 +6925,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -6945,7 +6959,7 @@ dependencies = [
...
@@ -6945,7 +6959,7 @@ dependencies = [
"chrono",
"chrono",
"hex",
"hex",
"indexmap 1.9.3",
"indexmap 1.9.3",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"schemars 0.9.0",
"schemars 0.9.0",
"schemars 1.0.4",
"schemars 1.0.4",
"serde",
"serde",
...
@@ -6964,7 +6978,7 @@ dependencies = [
...
@@ -6964,7 +6978,7 @@ dependencies = [
"darling 0.20.11",
"darling 0.20.11",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -6973,7 +6987,7 @@ version = "0.9.34+deprecated"
...
@@ -6973,7 +6987,7 @@ version = "0.9.34+deprecated"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
dependencies = [
dependencies = [
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"itoa",
"itoa",
"ryu",
"ryu",
"serde",
"serde",
...
@@ -7002,7 +7016,7 @@ checksum = "5d69265a08751de7844521fd15003ae0a888e035773ba05695c5c759a6f89eef"
...
@@ -7002,7 +7016,7 @@ checksum = "5d69265a08751de7844521fd15003ae0a888e035773ba05695c5c759a6f89eef"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -7020,7 +7034,7 @@ version = "0.10.6"
...
@@ -7020,7 +7034,7 @@ version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"cpufeatures",
"cpufeatures",
"digest",
"digest",
]
]
...
@@ -7031,7 +7045,7 @@ version = "0.10.9"
...
@@ -7031,7 +7045,7 @@ version = "0.10.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"cpufeatures",
"cpufeatures",
"digest",
"digest",
]
]
...
@@ -7080,9 +7094,9 @@ dependencies = [
...
@@ -7080,9 +7094,9 @@ dependencies = [
[[package]]
[[package]]
name = "signal-hook-registry"
name = "signal-hook-registry"
version = "1.4.
5
"
version = "1.4.
6
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410
"
checksum = "
b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b
"
dependencies = [
dependencies = [
"libc",
"libc",
]
]
...
@@ -7151,9 +7165,9 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
...
@@ -7151,9 +7165,9 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
[[package]]
[[package]]
name = "slab"
name = "slab"
version = "0.4.1
0
"
version = "0.4.1
1
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d
"
checksum = "
7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589
"
[[package]]
[[package]]
name = "smallvec"
name = "smallvec"
...
@@ -7330,7 +7344,7 @@ dependencies = [
...
@@ -7330,7 +7344,7 @@ dependencies = [
"heck 0.5.0",
"heck 0.5.0",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -7487,9 +7501,9 @@ dependencies = [
...
@@ -7487,9 +7501,9 @@ dependencies = [
[[package]]
[[package]]
name = "syn"
name = "syn"
version = "2.0.10
4
"
version = "2.0.10
6
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40
"
checksum = "
ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6
"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
...
@@ -7519,7 +7533,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
...
@@ -7519,7 +7533,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -7528,7 +7542,7 @@ version = "0.5.5"
...
@@ -7528,7 +7542,7 @@ version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea"
checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"byteorder",
"byteorder",
"enum-as-inner",
"enum-as-inner",
"libc",
"libc",
...
@@ -7542,7 +7556,7 @@ version = "0.6.0"
...
@@ -7542,7 +7556,7 @@ version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc"
checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"byteorder",
"byteorder",
"enum-as-inner",
"enum-as-inner",
"libc",
"libc",
...
@@ -7556,7 +7570,7 @@ version = "0.30.13"
...
@@ -7556,7 +7570,7 @@ version = "0.30.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3"
checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"core-foundation-sys",
"core-foundation-sys",
"libc",
"libc",
"ntapi",
"ntapi",
...
@@ -7582,7 +7596,7 @@ version = "0.6.1"
...
@@ -7582,7 +7596,7 @@ version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"core-foundation 0.9.4",
"core-foundation 0.9.4",
"system-configuration-sys 0.6.0",
"system-configuration-sys 0.6.0",
]
]
...
@@ -7638,15 +7652,15 @@ dependencies = [
...
@@ -7638,15 +7652,15 @@ dependencies = [
[[package]]
[[package]]
name = "tempfile"
name = "tempfile"
version = "3.2
0
.0"
version = "3.2
1
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1
"
checksum = "
15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e
"
dependencies = [
dependencies = [
"fastrand",
"fastrand",
"getrandom 0.3.3",
"getrandom 0.3.3",
"once_cell",
"once_cell",
"rustix 1.0.8",
"rustix 1.0.8",
"windows-sys 0.
59.0
",
"windows-sys 0.
60.2
",
]
]
[[package]]
[[package]]
...
@@ -7662,12 +7676,12 @@ dependencies = [
...
@@ -7662,12 +7676,12 @@ dependencies = [
[[package]]
[[package]]
name = "terminal_size"
name = "terminal_size"
version = "0.4.
2
"
version = "0.4.
3
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
45c6481c4829e4cc63825e62c49186a34538b7b2750b73b266581ffb612fb5ed
"
checksum = "
60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0
"
dependencies = [
dependencies = [
"rustix 1.0.8",
"rustix 1.0.8",
"windows-sys 0.
59.0
",
"windows-sys 0.
60.2
",
]
]
[[package]]
[[package]]
...
@@ -7690,11 +7704,11 @@ dependencies = [
...
@@ -7690,11 +7704,11 @@ dependencies = [
[[package]]
[[package]]
name = "thiserror"
name = "thiserror"
version = "2.0.1
2
"
version = "2.0.1
6
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708
"
checksum = "
3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0
"
dependencies = [
dependencies = [
"thiserror-impl 2.0.1
2
",
"thiserror-impl 2.0.1
6
",
]
]
[[package]]
[[package]]
...
@@ -7705,18 +7719,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
...
@@ -7705,18 +7719,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
name = "thiserror-impl"
name = "thiserror-impl"
version = "2.0.1
2
"
version = "2.0.1
6
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d
"
checksum = "
6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960
"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -7725,7 +7739,7 @@ version = "1.1.9"
...
@@ -7725,7 +7739,7 @@ version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
]
]
[[package]]
[[package]]
...
@@ -7794,9 +7808,9 @@ dependencies = [
...
@@ -7794,9 +7808,9 @@ dependencies = [
[[package]]
[[package]]
name = "tinyvec"
name = "tinyvec"
version = "1.
9
.0"
version = "1.
10
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71
"
checksum = "
bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa
"
dependencies = [
dependencies = [
"tinyvec_macros",
"tinyvec_macros",
]
]
...
@@ -7845,11 +7859,11 @@ dependencies = [
...
@@ -7845,11 +7859,11 @@ dependencies = [
"rayon",
"rayon",
"rayon-cond",
"rayon-cond",
"regex",
"regex",
"regex-syntax 0.8.
5
",
"regex-syntax 0.8.
6
",
"serde",
"serde",
"serde_json",
"serde_json",
"spm_precompiled",
"spm_precompiled",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"unicode-normalization-alignments",
"unicode-normalization-alignments",
"unicode-segmentation",
"unicode-segmentation",
"unicode_categories",
"unicode_categories",
...
@@ -7884,7 +7898,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
...
@@ -7884,7 +7898,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -7967,16 +7981,15 @@ dependencies = [
...
@@ -7967,16 +7981,15 @@ dependencies = [
[[package]]
[[package]]
name = "tokio-util"
name = "tokio-util"
version = "0.7.1
5
"
version = "0.7.1
6
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b9
50696
ad7df
"
checksum = "
14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea
50
1
696
195e6e3ce5
"
dependencies = [
dependencies = [
"bytes",
"bytes",
"futures-core",
"futures-core",
"futures-io",
"futures-io",
"futures-sink",
"futures-sink",
"futures-util",
"futures-util",
"hashbrown 0.15.4",
"pin-project-lite",
"pin-project-lite",
"tokio",
"tokio",
]
]
...
@@ -8016,9 +8029,9 @@ dependencies = [
...
@@ -8016,9 +8029,9 @@ dependencies = [
[[package]]
[[package]]
name = "toktrie"
name = "toktrie"
version = "1.
1.1
"
version = "1.
2.0
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
1c01fe70e9a91498c029fb6d5aacf9a648bb1bf30a5db4c344d3effb6367b1f3
"
checksum = "
804459e7818d5cc8920f80d555526454353117a4f2fcda38e4b38666639d7e81
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"bytemuck",
"bytemuck",
...
@@ -8042,16 +8055,16 @@ dependencies = [
...
@@ -8042,16 +8055,16 @@ dependencies = [
[[package]]
[[package]]
name = "toktrie_hf_tokenizers"
name = "toktrie_hf_tokenizers"
version = "1.
1.1
"
version = "1.
2.0
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
759491ad9b56050f817e24d68f48eb7f13bee5f8b48127ba3a9079a579726f40
"
checksum = "
6fc302ed2c53b1d7fcbe3d760a2dc6567def0eb2da7ca09cb21a16b9a9aa4f13
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"log",
"log",
"serde",
"serde",
"serde_json",
"serde_json",
"tokenizers",
"tokenizers",
"toktrie 1.
1.1
",
"toktrie 1.
2.0
",
]
]
[[package]]
[[package]]
...
@@ -8081,7 +8094,7 @@ version = "0.22.27"
...
@@ -8081,7 +8094,7 @@ version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
dependencies = [
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"serde",
"serde",
"serde_spanned",
"serde_spanned",
"toml_datetime",
"toml_datetime",
...
@@ -8106,11 +8119,11 @@ dependencies = [
...
@@ -8106,11 +8119,11 @@ dependencies = [
"axum 0.7.9",
"axum 0.7.9",
"base64 0.22.1",
"base64 0.22.1",
"bytes",
"bytes",
"h2 0.4.1
1
",
"h2 0.4.1
2
",
"http 1.3.1",
"http 1.3.1",
"http-body 1.0.1",
"http-body 1.0.1",
"http-body-util",
"http-body-util",
"hyper 1.
6
.0",
"hyper 1.
7
.0",
"hyper-timeout",
"hyper-timeout",
"hyper-util",
"hyper-util",
"percent-encoding",
"percent-encoding",
...
@@ -8135,11 +8148,11 @@ dependencies = [
...
@@ -8135,11 +8148,11 @@ dependencies = [
"axum 0.8.4",
"axum 0.8.4",
"base64 0.22.1",
"base64 0.22.1",
"bytes",
"bytes",
"h2 0.4.1
1
",
"h2 0.4.1
2
",
"http 1.3.1",
"http 1.3.1",
"http-body 1.0.1",
"http-body 1.0.1",
"http-body-util",
"http-body-util",
"hyper 1.
6
.0",
"hyper 1.
7
.0",
"hyper-timeout",
"hyper-timeout",
"hyper-util",
"hyper-util",
"percent-encoding",
"percent-encoding",
...
@@ -8166,7 +8179,7 @@ dependencies = [
...
@@ -8166,7 +8179,7 @@ dependencies = [
"prost-build",
"prost-build",
"prost-types",
"prost-types",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -8197,7 +8210,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
...
@@ -8197,7 +8210,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
dependencies = [
dependencies = [
"futures-core",
"futures-core",
"futures-util",
"futures-util",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"pin-project-lite",
"pin-project-lite",
"slab",
"slab",
"sync_wrapper 1.0.2",
"sync_wrapper 1.0.2",
...
@@ -8214,7 +8227,7 @@ version = "0.6.6"
...
@@ -8214,7 +8227,7 @@ version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
"bytes",
"bytes",
"futures-util",
"futures-util",
"http 1.3.1",
"http 1.3.1",
...
@@ -8270,7 +8283,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
...
@@ -8270,7 +8283,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -8379,7 +8392,7 @@ dependencies = [
...
@@ -8379,7 +8392,7 @@ dependencies = [
"bytes",
"bytes",
"log",
"log",
"rand 0.9.2",
"rand 0.9.2",
"thiserror 2.0.1
2
",
"thiserror 2.0.1
6
",
"utf-8",
"utf-8",
]
]
...
@@ -8548,9 +8561,9 @@ dependencies = [
...
@@ -8548,9 +8561,9 @@ dependencies = [
[[package]]
[[package]]
name = "url"
name = "url"
version = "2.5.
4
"
version = "2.5.
7
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60
"
checksum = "
08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b
"
dependencies = [
dependencies = [
"form_urlencoded",
"form_urlencoded",
"idna",
"idna",
...
@@ -8588,7 +8601,7 @@ version = "5.4.0"
...
@@ -8588,7 +8601,7 @@ version = "5.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fcc29c80c21c31608227e0912b2d7fddba57ad76b606890627ba8ee7964e993"
checksum = "2fcc29c80c21c31608227e0912b2d7fddba57ad76b606890627ba8ee7964e993"
dependencies = [
dependencies = [
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"serde",
"serde",
"serde_json",
"serde_json",
"utoipa-gen",
"utoipa-gen",
...
@@ -8602,7 +8615,7 @@ checksum = "6d79d08d92ab8af4c5e8a6da20c47ae3f61a0f1dabc1997cdf2d082b757ca08b"
...
@@ -8602,7 +8615,7 @@ checksum = "6d79d08d92ab8af4c5e8a6da20c47ae3f61a0f1dabc1997cdf2d082b757ca08b"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -8616,9 +8629,9 @@ dependencies = [
...
@@ -8616,9 +8629,9 @@ dependencies = [
[[package]]
[[package]]
name = "uuid"
name = "uuid"
version = "1.1
7
.0"
version = "1.1
8
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d
"
checksum = "
f33196643e165781c20a5ead5582283a7dacbb87855d867fbc2df3f81eddc1be
"
dependencies = [
dependencies = [
"getrandom 0.3.3",
"getrandom 0.3.3",
"js-sys",
"js-sys",
...
@@ -8664,7 +8677,7 @@ dependencies = [
...
@@ -8664,7 +8677,7 @@ dependencies = [
"proc-macro-error2",
"proc-macro-error2",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -8744,9 +8757,9 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
...
@@ -8744,9 +8757,9 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
[[package]]
name = "vob"
name = "vob"
version = "3.0.
5
"
version = "3.0.
6
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
0baa046ba374a7701d98032a468a0bbd968a8cd3a2ae39c94d74e211fac05c81
"
checksum = "
bc936b5a7202a703aeaf7ce05e7931db2e0c8126813f97db3e9e06d867b0bb38
"
dependencies = [
dependencies = [
"num-traits",
"num-traits",
"serde",
"serde",
...
@@ -8801,7 +8814,7 @@ version = "0.2.100"
...
@@ -8801,7 +8814,7 @@ version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"once_cell",
"once_cell",
"rustversion",
"rustversion",
"wasm-bindgen-macro",
"wasm-bindgen-macro",
...
@@ -8817,7 +8830,7 @@ dependencies = [
...
@@ -8817,7 +8830,7 @@ dependencies = [
"log",
"log",
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
"wasm-bindgen-shared",
"wasm-bindgen-shared",
]
]
...
@@ -8827,7 +8840,7 @@ version = "0.4.50"
...
@@ -8827,7 +8840,7 @@ version = "0.4.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"js-sys",
"js-sys",
"once_cell",
"once_cell",
"wasm-bindgen",
"wasm-bindgen",
...
@@ -8852,7 +8865,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
...
@@ -8852,7 +8865,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
"wasm-bindgen-backend",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
"wasm-bindgen-shared",
]
]
...
@@ -8993,11 +9006,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
...
@@ -8993,11 +9006,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
[[package]]
name = "winapi-util"
name = "winapi-util"
version = "0.1.
9
"
version = "0.1.
10
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb
"
checksum = "
0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22
"
dependencies = [
dependencies = [
"windows-sys 0.
59.0
",
"windows-sys 0.
60.2
",
]
]
[[package]]
[[package]]
...
@@ -9046,7 +9059,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
...
@@ -9046,7 +9059,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -9057,7 +9070,7 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
...
@@ -9057,7 +9070,7 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -9319,9 +9332,9 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
...
@@ -9319,9 +9332,9 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
[[package]]
[[package]]
name = "winnow"
name = "winnow"
version = "0.7.1
2
"
version = "0.7.1
3
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95
"
checksum = "
21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf
"
dependencies = [
dependencies = [
"memchr",
"memchr",
]
]
...
@@ -9332,7 +9345,7 @@ version = "0.50.0"
...
@@ -9332,7 +9345,7 @@ version = "0.50.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"windows-sys 0.48.0",
"windows-sys 0.48.0",
]
]
...
@@ -9342,7 +9355,7 @@ version = "0.39.0"
...
@@ -9342,7 +9355,7 @@ version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [
dependencies = [
"bitflags 2.9.
1
",
"bitflags 2.9.
3
",
]
]
[[package]]
[[package]]
...
@@ -9405,7 +9418,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
...
@@ -9405,7 +9418,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
"synstructure",
"synstructure",
]
]
...
@@ -9417,7 +9430,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
...
@@ -9417,7 +9430,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
"synstructure",
"synstructure",
]
]
...
@@ -9438,7 +9451,7 @@ checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
...
@@ -9438,7 +9451,7 @@ checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -9458,7 +9471,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
...
@@ -9458,7 +9471,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
"synstructure",
"synstructure",
]
]
...
@@ -9492,7 +9505,7 @@ dependencies = [
...
@@ -9492,7 +9505,7 @@ dependencies = [
"thiserror 1.0.69",
"thiserror 1.0.69",
"tokio",
"tokio",
"tokio-util",
"tokio-util",
"uuid 1.1
7
.0",
"uuid 1.1
8
.0",
]
]
[[package]]
[[package]]
...
@@ -9518,9 +9531,9 @@ dependencies = [
...
@@ -9518,9 +9531,9 @@ dependencies = [
[[package]]
[[package]]
name = "zerovec"
name = "zerovec"
version = "0.11.
2
"
version = "0.11.
4
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428
"
checksum = "
e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b
"
dependencies = [
dependencies = [
"yoke 0.8.0",
"yoke 0.8.0",
"zerofrom",
"zerofrom",
...
@@ -9535,7 +9548,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
...
@@ -9535,7 +9548,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
"syn 2.0.10
4
",
"syn 2.0.10
6
",
]
]
[[package]]
[[package]]
...
@@ -9548,7 +9561,7 @@ dependencies = [
...
@@ -9548,7 +9561,7 @@ dependencies = [
"crc32fast",
"crc32fast",
"crossbeam-utils",
"crossbeam-utils",
"displaydoc",
"displaydoc",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"num_enum",
"num_enum",
"thiserror 1.0.69",
"thiserror 1.0.69",
]
]
...
...
components/frontend/src/dynamo/frontend/main.py
View file @
91a459c0
...
@@ -176,6 +176,12 @@ def parse_args():
...
@@ -176,6 +176,12 @@ def parse_args():
default
=
None
,
default
=
None
,
help
=
"Prefix for Dynamo frontend metrics. If unset, uses DYN_METRICS_PREFIX env var or 'dynamo_frontend'."
,
help
=
"Prefix for Dynamo frontend metrics. If unset, uses DYN_METRICS_PREFIX env var or 'dynamo_frontend'."
,
)
)
parser
.
add_argument
(
"--kserve-grpc-server"
,
action
=
"store_true"
,
default
=
False
,
help
=
"Start KServe gRPC server."
,
)
flags
=
parser
.
parse_args
()
flags
=
parser
.
parse_args
()
...
@@ -246,6 +252,8 @@ async def async_main():
...
@@ -246,6 +252,8 @@ async def async_main():
try
:
try
:
if
flags
.
interactive
:
if
flags
.
interactive
:
await
run_input
(
runtime
,
"text"
,
engine
)
await
run_input
(
runtime
,
"text"
,
engine
)
elif
flags
.
kserve_grpc_server
:
await
run_input
(
runtime
,
"grpc"
,
engine
)
else
:
else
:
await
run_input
(
runtime
,
"http"
,
engine
)
await
run_input
(
runtime
,
"http"
,
engine
)
except
asyncio
.
exceptions
.
CancelledError
:
except
asyncio
.
exceptions
.
CancelledError
:
...
...
launch/dynamo-run/src/lib.rs
View file @
91a459c0
...
@@ -83,7 +83,6 @@ pub async fn run(
...
@@ -83,7 +83,6 @@ pub async fn run(
rt
.clone
(),
rt
.clone
(),
)
)
.await
?
;
.await
?
;
//
//
// Run in from an input
// Run in from an input
//
//
...
...
launch/dynamo-run/src/main.rs
View file @
91a459c0
...
@@ -26,7 +26,7 @@ Example:
...
@@ -26,7 +26,7 @@ Example:
See `docs/guides/dynamo_run.md` in the repo for full details.
See `docs/guides/dynamo_run.md` in the repo for full details.
"#
;
"#
;
const
USAGE
:
&
str
=
"USAGE: dynamo-run in=[http|text|dyn://<path>|batch:<folder>] out=ENGINE_LIST|auto|dyn://<path> [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>] [--context-length=N] [--kv-cache-block-size=16] [--extra-engine-args=args.json] [--static-worker] [--router-mode random|round-robin|kv] [--kv-overlap-score-weight=2.0] [--router-temperature=0.0] [--use-kv-events] [--max-num-batched-tokens=1.0] [--migration-limit=0] [--verbosity (-v|-vv)]"
;
const
USAGE
:
&
str
=
"USAGE: dynamo-run in=[http|
grpc|
text|dyn://<path>|batch:<folder>] out=ENGINE_LIST|auto|dyn://<path> [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>] [--context-length=N] [--kv-cache-block-size=16] [--extra-engine-args=args.json] [--static-worker] [--router-mode random|round-robin|kv] [--kv-overlap-score-weight=2.0] [--router-temperature=0.0] [--use-kv-events] [--max-num-batched-tokens=1.0] [--migration-limit=0] [--verbosity (-v|-vv)]"
;
fn
main
()
->
anyhow
::
Result
<
()
>
{
fn
main
()
->
anyhow
::
Result
<
()
>
{
// Set log level based on verbosity flag
// Set log level based on verbosity flag
...
...
lib/bindings/python/Cargo.lock
View file @
91a459c0
...
@@ -23,7 +23,7 @@ version = "0.8.12"
...
@@ -23,7 +23,7 @@ version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"getrandom 0.3.3",
"getrandom 0.3.3",
"once_cell",
"once_cell",
"serde",
"serde",
...
@@ -465,7 +465,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -465,7 +465,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
dependencies = [
dependencies = [
"addr2line",
"addr2line",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"libc",
"libc",
"miniz_oxide",
"miniz_oxide",
"object",
"object",
...
@@ -506,7 +506,7 @@ version = "0.69.5"
...
@@ -506,7 +506,7 @@ version = "0.69.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
"cexpr",
"cexpr",
"clang-sys",
"clang-sys",
"itertools 0.12.1",
"itertools 0.12.1",
...
@@ -529,7 +529,7 @@ version = "0.71.1"
...
@@ -529,7 +529,7 @@ version = "0.71.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
"cexpr",
"cexpr",
"clang-sys",
"clang-sys",
"itertools 0.13.0",
"itertools 0.13.0",
...
@@ -587,9 +587,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
...
@@ -587,9 +587,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
[[package]]
name = "bitflags"
name = "bitflags"
version = "2.9.
2
"
version = "2.9.
3
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
6a65b545ab31d687cff52899d4890855fec459eb6afe0da6417b8a18da87aa29
"
checksum = "
34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d
"
[[package]]
[[package]]
name = "bitstream-io"
name = "bitstream-io"
...
@@ -606,7 +606,7 @@ dependencies = [
...
@@ -606,7 +606,7 @@ dependencies = [
"arrayref",
"arrayref",
"arrayvec",
"arrayvec",
"cc",
"cc",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"constant_time_eq",
"constant_time_eq",
]
]
...
@@ -637,7 +637,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -637,7 +637,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [
dependencies = [
"memchr",
"memchr",
"regex-automata 0.4.
9
",
"regex-automata 0.4.
10
",
"serde",
"serde",
]
]
...
@@ -727,9 +727,9 @@ dependencies = [
...
@@ -727,9 +727,9 @@ dependencies = [
[[package]]
[[package]]
name = "cc"
name = "cc"
version = "1.2.3
3
"
version = "1.2.3
4
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
3ee0f8803222ba5a7e2777dd72ca451868909b1ac410621b676adf07280e9b5f
"
checksum = "
42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc
"
dependencies = [
dependencies = [
"jobserver",
"jobserver",
"libc",
"libc",
...
@@ -763,9 +763,9 @@ checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
...
@@ -763,9 +763,9 @@ checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
[[package]]
[[package]]
name = "cfg-if"
name = "cfg-if"
version = "1.0.
1
"
version = "1.0.
3
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268
"
checksum = "
2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9
"
[[package]]
[[package]]
name = "cfg_aliases"
name = "cfg_aliases"
...
@@ -865,7 +865,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -865,7 +865,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a"
checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a"
dependencies = [
dependencies = [
"castaway",
"castaway",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"itoa",
"itoa",
"rustversion",
"rustversion",
"ryu",
"ryu",
...
@@ -948,7 +948,7 @@ version = "1.5.0"
...
@@ -948,7 +948,7 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
]
]
[[package]]
[[package]]
...
@@ -1047,7 +1047,7 @@ version = "4.1.3"
...
@@ -1047,7 +1047,7 @@ version = "4.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"cpufeatures",
"cpufeatures",
"curve25519-dalek-derive",
"curve25519-dalek-derive",
"digest",
"digest",
...
@@ -1117,7 +1117,7 @@ version = "5.5.3"
...
@@ -1117,7 +1117,7 @@ version = "5.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"hashbrown 0.14.5",
"hashbrown 0.14.5",
"lock_api",
"lock_api",
"once_cell",
"once_cell",
...
@@ -1342,7 +1342,7 @@ dependencies = [
...
@@ -1342,7 +1342,7 @@ dependencies = [
"secrecy",
"secrecy",
"serde",
"serde",
"serde_json",
"serde_json",
"thiserror 2.0.1
5
",
"thiserror 2.0.1
6
",
"tokio",
"tokio",
"tokio-stream",
"tokio-stream",
"tokio-util",
"tokio-util",
...
@@ -1394,7 +1394,9 @@ dependencies = [
...
@@ -1394,7 +1394,9 @@ dependencies = [
"nixl-sys",
"nixl-sys",
"offset-allocator",
"offset-allocator",
"oneshot",
"oneshot",
"parking_lot",
"prometheus",
"prometheus",
"prost",
"rand 0.9.2",
"rand 0.9.2",
"rayon",
"rayon",
"regex",
"regex",
...
@@ -1404,7 +1406,7 @@ dependencies = [
...
@@ -1404,7 +1406,7 @@ dependencies = [
"serde_json",
"serde_json",
"strum",
"strum",
"tempfile",
"tempfile",
"thiserror 2.0.1
5
",
"thiserror 2.0.1
6
",
"tmq",
"tmq",
"tokenizers",
"tokenizers",
"tokio",
"tokio",
...
@@ -1412,6 +1414,8 @@ dependencies = [
...
@@ -1412,6 +1414,8 @@ dependencies = [
"tokio-util",
"tokio-util",
"toktrie",
"toktrie",
"toktrie_hf_tokenizers",
"toktrie_hf_tokenizers",
"tonic",
"tonic-build",
"tower-http",
"tower-http",
"tracing",
"tracing",
"unicode-segmentation",
"unicode-segmentation",
...
@@ -1460,7 +1464,7 @@ dependencies = [
...
@@ -1460,7 +1464,7 @@ dependencies = [
"rstest",
"rstest",
"serde",
"serde",
"serde_json",
"serde_json",
"thiserror 2.0.1
5
",
"thiserror 2.0.1
6
",
"tokio",
"tokio",
"tokio-stream",
"tokio-stream",
"tokio-util",
"tokio-util",
...
@@ -1505,7 +1509,7 @@ dependencies = [
...
@@ -1505,7 +1509,7 @@ dependencies = [
"serde",
"serde",
"serde_json",
"serde_json",
"socket2 0.5.10",
"socket2 0.5.10",
"thiserror 2.0.1
5
",
"thiserror 2.0.1
6
",
"tokio",
"tokio",
"tokio-stream",
"tokio-stream",
"tokio-util",
"tokio-util",
...
@@ -1573,7 +1577,7 @@ version = "0.8.35"
...
@@ -1573,7 +1577,7 @@ version = "0.8.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
]
]
[[package]]
[[package]]
...
@@ -1733,8 +1737,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -1733,8 +1737,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
dependencies = [
dependencies = [
"bit-set 0.5.3",
"bit-set 0.5.3",
"regex-automata 0.4.
9
",
"regex-automata 0.4.
10
",
"regex-syntax 0.8.
5
",
"regex-syntax 0.8.
6
",
]
]
[[package]]
[[package]]
...
@@ -1744,8 +1748,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -1744,8 +1748,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
dependencies = [
dependencies = [
"bit-set 0.8.0",
"bit-set 0.8.0",
"regex-automata 0.4.
9
",
"regex-automata 0.4.
10
",
"regex-syntax 0.8.
5
",
"regex-syntax 0.8.
6
",
]
]
[[package]]
[[package]]
...
@@ -1810,9 +1814,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
...
@@ -1810,9 +1814,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
[[package]]
name = "form_urlencoded"
name = "form_urlencoded"
version = "1.2.
1
"
version = "1.2.
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456
"
checksum = "
cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf
"
dependencies = [
dependencies = [
"percent-encoding",
"percent-encoding",
]
]
...
@@ -2207,7 +2211,7 @@ version = "0.2.16"
...
@@ -2207,7 +2211,7 @@ version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"js-sys",
"js-sys",
"libc",
"libc",
"wasi 0.11.1+wasi-snapshot-preview1",
"wasi 0.11.1+wasi-snapshot-preview1",
...
@@ -2220,7 +2224,7 @@ version = "0.3.3"
...
@@ -2220,7 +2224,7 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"js-sys",
"js-sys",
"libc",
"libc",
"r-efi",
"r-efi",
...
@@ -2247,7 +2251,7 @@ checksum = "3ac5654356c6f7f6116905aeaf92ab002c3d03414ada5dbe0bb2e32aa5fea173"
...
@@ -2247,7 +2251,7 @@ checksum = "3ac5654356c6f7f6116905aeaf92ab002c3d03414ada5dbe0bb2e32aa5fea173"
dependencies = [
dependencies = [
"fancy-regex 0.14.0",
"fancy-regex 0.14.0",
"ggml-quants",
"ggml-quants",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"log",
"log",
"num_enum",
"num_enum",
]
]
...
@@ -2286,7 +2290,7 @@ dependencies = [
...
@@ -2286,7 +2290,7 @@ dependencies = [
"futures-core",
"futures-core",
"futures-sink",
"futures-sink",
"http",
"http",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"slab",
"slab",
"tokio",
"tokio",
"tokio-util",
"tokio-util",
...
@@ -2300,7 +2304,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -2300,7 +2304,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"
checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"
dependencies = [
dependencies = [
"bytemuck",
"bytemuck",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"crunchy",
"crunchy",
"num-traits",
"num-traits",
"rand 0.9.2",
"rand 0.9.2",
...
@@ -2360,7 +2364,7 @@ dependencies = [
...
@@ -2360,7 +2364,7 @@ dependencies = [
"reqwest",
"reqwest",
"serde",
"serde",
"serde_json",
"serde_json",
"thiserror 2.0.1
5
",
"thiserror 2.0.1
6
",
"tokio",
"tokio",
"ureq",
"ureq",
"windows-sys 0.60.2",
"windows-sys 0.60.2",
...
@@ -2625,9 +2629,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
...
@@ -2625,9 +2629,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
[[package]]
name = "idna"
name = "idna"
version = "1.
0.3
"
version = "1.
1.0
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
686f825264d630750a544639377bae737
62
8
04
3f20d38bbc029e8f29ea968a7
e"
checksum = "
3b0875f23caa03898994f6ddc501886a45c7d3d
62
d
04
d2d90788d47be1b1e4d
e"
dependencies = [
dependencies = [
"idna_adapter",
"idna_adapter",
"smallvec",
"smallvec",
...
@@ -2696,9 +2700,9 @@ dependencies = [
...
@@ -2696,9 +2700,9 @@ dependencies = [
[[package]]
[[package]]
name = "indexmap"
name = "indexmap"
version = "2.1
0
.0"
version = "2.1
1
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f
e4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661
"
checksum = "f
2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9
"
dependencies = [
dependencies = [
"equivalent",
"equivalent",
"hashbrown 0.15.5",
"hashbrown 0.15.5",
...
@@ -2736,7 +2740,7 @@ version = "0.1.13"
...
@@ -2736,7 +2740,7 @@ version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
]
]
[[package]]
[[package]]
...
@@ -2752,21 +2756,21 @@ dependencies = [
...
@@ -2752,21 +2756,21 @@ dependencies = [
[[package]]
[[package]]
name = "inventory"
name = "inventory"
version = "0.3.2
0
"
version = "0.3.2
1
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
ab08d7cd2c5897f2c949e5383ea7c7db03fb19130ffcfbf7eda795137ae3cb83
"
checksum = "
bc61209c082fbeb19919bee74b176221b27223e27b65d781eb91af24eb1fb46e
"
dependencies = [
dependencies = [
"rustversion",
"rustversion",
]
]
[[package]]
[[package]]
name = "io-uring"
name = "io-uring"
version = "0.7.
9
"
version = "0.7.
10
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4
"
checksum = "
046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b
"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"libc",
"libc",
]
]
...
@@ -2836,9 +2840,9 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
...
@@ -2836,9 +2840,9 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
[[package]]
name = "jobserver"
name = "jobserver"
version = "0.1.3
3
"
version = "0.1.3
4
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a
"
checksum = "
9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33
"
dependencies = [
dependencies = [
"getrandom 0.3.3",
"getrandom 0.3.3",
"libc",
"libc",
...
@@ -2920,7 +2924,7 @@ version = "0.8.8"
...
@@ -2920,7 +2924,7 @@ version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"windows-targets 0.53.3",
"windows-targets 0.53.3",
]
]
...
@@ -2936,7 +2940,7 @@ version = "0.1.9"
...
@@ -2936,7 +2940,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3"
checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
"libc",
"libc",
]
]
...
@@ -2966,7 +2970,7 @@ checksum = "656b3b27f8893f7bbf9485148ff9a65f019e3f33bd5cdc87c83cab16b3fd9ec8"
...
@@ -2966,7 +2970,7 @@ checksum = "656b3b27f8893f7bbf9485148ff9a65f019e3f33bd5cdc87c83cab16b3fd9ec8"
dependencies = [
dependencies = [
"libc",
"libc",
"neli",
"neli",
"thiserror 2.0.1
5
",
"thiserror 2.0.1
6
",
"windows-sys 0.59.0",
"windows-sys 0.59.0",
]
]
...
@@ -3048,7 +3052,7 @@ version = "0.1.1"
...
@@ -3048,7 +3052,7 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519"
checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"rayon",
"rayon",
]
]
...
@@ -3060,9 +3064,9 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
...
@@ -3060,9 +3064,9 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]]
[[package]]
name = "memmap2"
name = "memmap2"
version = "0.9.
7
"
version = "0.9.
8
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
483758ad303d734cec05e5c12b41d7e93e6a6390c5e9dae6bdeb7c1259012d28
"
checksum = "
843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7
"
dependencies = [
dependencies = [
"libc",
"libc",
"stable_deref_trait",
"stable_deref_trait",
...
@@ -3110,9 +3114,9 @@ dependencies = [
...
@@ -3110,9 +3114,9 @@ dependencies = [
[[package]]
[[package]]
name = "minijinja"
name = "minijinja"
version = "2.1
1
.0"
version = "2.1
2
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
4e60ac08614cc09062820e51d5d94c2fce16b94ea4e5003bb81b99a95f84e876
"
checksum = "
a9f264d75233323f4b7d2f03aefe8a990690cdebfbfe26ea86bcbaec5e9ac990
"
dependencies = [
dependencies = [
"memo-map",
"memo-map",
"self_cell",
"self_cell",
...
@@ -3121,9 +3125,9 @@ dependencies = [
...
@@ -3121,9 +3125,9 @@ dependencies = [
[[package]]
[[package]]
name = "minijinja-contrib"
name = "minijinja-contrib"
version = "2.1
1
.0"
version = "2.1
2
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
f93e5bfa889f16d8c10ec92ac964074a68a7206c0fd9748ff23a31942c85d97c
"
checksum = "
182ba1438db4679ddfa03792c183bdc2b9ce26b58e7d41a749e59b06497cf136
"
dependencies = [
dependencies = [
"minijinja",
"minijinja",
"serde",
"serde",
...
@@ -3289,7 +3293,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -3289,7 +3293,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
dependencies = [
dependencies = [
"bitflags 1.3.2",
"bitflags 1.3.2",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"libc",
"libc",
"memoffset 0.7.1",
"memoffset 0.7.1",
"pin-utils",
"pin-utils",
...
@@ -3301,8 +3305,8 @@ version = "0.29.0"
...
@@ -3301,8 +3305,8 @@ version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"cfg_aliases",
"cfg_aliases",
"libc",
"libc",
]
]
...
@@ -3319,7 +3323,7 @@ dependencies = [
...
@@ -3319,7 +3323,7 @@ dependencies = [
"os_info",
"os_info",
"pkg-config",
"pkg-config",
"serde",
"serde",
"thiserror 2.0.1
5
",
"thiserror 2.0.1
6
",
"tracing",
"tracing",
]
]
...
@@ -3561,7 +3565,7 @@ version = "6.5.1"
...
@@ -3561,7 +3565,7 @@ version = "6.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
"libc",
"libc",
"once_cell",
"once_cell",
"onig_sys",
"onig_sys",
...
@@ -3598,7 +3602,7 @@ dependencies = [
...
@@ -3598,7 +3602,7 @@ dependencies = [
"serde_with",
"serde_with",
"sha1",
"sha1",
"sha2",
"sha2",
"thiserror 2.0.1
5
",
"thiserror 2.0.1
6
",
]
]
[[package]]
[[package]]
...
@@ -3653,7 +3657,7 @@ version = "0.9.11"
...
@@ -3653,7 +3657,7 @@ version = "0.9.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"libc",
"libc",
"redox_syscall",
"redox_syscall",
"smallvec",
"smallvec",
...
@@ -3700,9 +3704,9 @@ dependencies = [
...
@@ -3700,9 +3704,9 @@ dependencies = [
[[package]]
[[package]]
name = "percent-encoding"
name = "percent-encoding"
version = "2.3.
1
"
version = "2.3.
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e
"
checksum = "
9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220
"
[[package]]
[[package]]
name = "petgraph"
name = "petgraph"
...
@@ -3711,7 +3715,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -3711,7 +3715,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
dependencies = [
dependencies = [
"fixedbitset",
"fixedbitset",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
]
]
[[package]]
[[package]]
...
@@ -3769,7 +3773,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -3769,7 +3773,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1"
checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1"
dependencies = [
dependencies = [
"base64 0.22.1",
"base64 0.22.1",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"quick-xml",
"quick-xml",
"serde",
"serde",
"time",
"time",
...
@@ -3915,13 +3919,13 @@ version = "0.14.0"
...
@@ -3915,13 +3919,13 @@ version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a"
checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"fnv",
"fnv",
"lazy_static",
"lazy_static",
"memchr",
"memchr",
"parking_lot",
"parking_lot",
"protobuf",
"protobuf",
"thiserror 2.0.1
5
",
"thiserror 2.0.1
6
",
]
]
[[package]]
[[package]]
...
@@ -4015,7 +4019,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -4015,7 +4019,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96b86df24f0a7ddd5e4b95c94fc9ed8a98f1ca94d3b01bdce2824097e7835907"
checksum = "96b86df24f0a7ddd5e4b95c94fc9ed8a98f1ca94d3b01bdce2824097e7835907"
dependencies = [
dependencies = [
"bytemuck",
"bytemuck",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"libm",
"libm",
"num-complex",
"num-complex",
"reborrow",
"reborrow",
...
@@ -4028,7 +4032,7 @@ version = "0.23.5"
...
@@ -4028,7 +4032,7 @@ version = "0.23.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872"
checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"indoc",
"indoc",
"libc",
"libc",
"memoffset 0.9.1",
"memoffset 0.9.1",
...
@@ -4140,9 +4144,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
...
@@ -4140,9 +4144,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
[[package]]
[[package]]
name = "quick-xml"
name = "quick-xml"
version = "0.38.
1
"
version = "0.38.
3
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
9845d9dccf565065824e69f9f235fafba1587031eda353c1f1561cd6a6be78f4
"
checksum = "
42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89
"
dependencies = [
dependencies = [
"memchr",
"memchr",
]
]
...
@@ -4161,7 +4165,7 @@ dependencies = [
...
@@ -4161,7 +4165,7 @@ dependencies = [
"rustc-hash 2.1.1",
"rustc-hash 2.1.1",
"rustls",
"rustls",
"socket2 0.5.10",
"socket2 0.5.10",
"thiserror 2.0.1
5
",
"thiserror 2.0.1
6
",
"tokio",
"tokio",
"tracing",
"tracing",
"web-time",
"web-time",
...
@@ -4182,7 +4186,7 @@ dependencies = [
...
@@ -4182,7 +4186,7 @@ dependencies = [
"rustls",
"rustls",
"rustls-pki-types",
"rustls-pki-types",
"slab",
"slab",
"thiserror 2.0.1
5
",
"thiserror 2.0.1
6
",
"tinyvec",
"tinyvec",
"tracing",
"tracing",
"web-time",
"web-time",
...
@@ -4298,7 +4302,7 @@ dependencies = [
...
@@ -4298,7 +4302,7 @@ dependencies = [
"av1-grain",
"av1-grain",
"bitstream-io",
"bitstream-io",
"built",
"built",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"interpolate_name",
"interpolate_name",
"itertools 0.12.1",
"itertools 0.12.1",
"libc",
"libc",
...
@@ -4351,7 +4355,7 @@ version = "11.5.0"
...
@@ -4351,7 +4355,7 @@ version = "11.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146"
checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
]
]
[[package]]
[[package]]
...
@@ -4403,7 +4407,7 @@ version = "0.5.17"
...
@@ -4403,7 +4407,7 @@ version = "0.5.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
]
]
[[package]]
[[package]]
...
@@ -4414,7 +4418,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
...
@@ -4414,7 +4418,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
dependencies = [
dependencies = [
"getrandom 0.2.16",
"getrandom 0.2.16",
"libredox",
"libredox",
"thiserror 2.0.1
5
",
"thiserror 2.0.1
6
",
]
]
[[package]]
[[package]]
...
@@ -4439,14 +4443,14 @@ dependencies = [
...
@@ -4439,14 +4443,14 @@ dependencies = [
[[package]]
[[package]]
name = "regex"
name = "regex"
version = "1.11.
1
"
version = "1.11.
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b2011
91"
checksum = "
23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6
91
2
"
dependencies = [
dependencies = [
"aho-corasick",
"aho-corasick",
"memchr",
"memchr",
"regex-automata 0.4.
9
",
"regex-automata 0.4.
10
",
"regex-syntax 0.8.
5
",
"regex-syntax 0.8.
6
",
]
]
[[package]]
[[package]]
...
@@ -4460,13 +4464,13 @@ dependencies = [
...
@@ -4460,13 +4464,13 @@ dependencies = [
[[package]]
[[package]]
name = "regex-automata"
name = "regex-automata"
version = "0.4.
9
"
version = "0.4.
10
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908
"
checksum = "
6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6
"
dependencies = [
dependencies = [
"aho-corasick",
"aho-corasick",
"memchr",
"memchr",
"regex-syntax 0.8.
5
",
"regex-syntax 0.8.
6
",
]
]
[[package]]
[[package]]
...
@@ -4477,9 +4481,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
...
@@ -4477,9 +4481,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
[[package]]
name = "regex-syntax"
name = "regex-syntax"
version = "0.8.
5
"
version = "0.8.
6
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c
"
checksum = "
caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001
"
[[package]]
[[package]]
name = "relative-path"
name = "relative-path"
...
@@ -4563,7 +4567,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -4563,7 +4567,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
dependencies = [
dependencies = [
"cc",
"cc",
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"getrandom 0.2.16",
"getrandom 0.2.16",
"libc",
"libc",
"untrusted",
"untrusted",
...
@@ -4610,7 +4614,7 @@ version = "0.25.0"
...
@@ -4610,7 +4614,7 @@ version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f168d99749d307be9de54d23fd226628d99768225ef08f6ffb52e0182a27746"
checksum = "1f168d99749d307be9de54d23fd226628d99768225ef08f6ffb52e0182a27746"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"glob",
"glob",
"proc-macro-crate",
"proc-macro-crate",
"proc-macro2",
"proc-macro2",
...
@@ -4655,7 +4659,7 @@ version = "0.38.44"
...
@@ -4655,7 +4659,7 @@ version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
"errno",
"errno",
"libc",
"libc",
"linux-raw-sys 0.4.15",
"linux-raw-sys 0.4.15",
...
@@ -4668,7 +4672,7 @@ version = "1.0.8"
...
@@ -4668,7 +4672,7 @@ version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8"
checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
"errno",
"errno",
"libc",
"libc",
"linux-raw-sys 0.9.4",
"linux-raw-sys 0.9.4",
...
@@ -4843,7 +4847,7 @@ version = "2.11.1"
...
@@ -4843,7 +4847,7 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
"core-foundation 0.9.4",
"core-foundation 0.9.4",
"core-foundation-sys",
"core-foundation-sys",
"libc",
"libc",
...
@@ -4856,7 +4860,7 @@ version = "3.3.0"
...
@@ -4856,7 +4860,7 @@ version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c"
checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
"core-foundation 0.10.1",
"core-foundation 0.10.1",
"core-foundation-sys",
"core-foundation-sys",
"libc",
"libc",
...
@@ -4917,7 +4921,7 @@ version = "1.0.143"
...
@@ -4917,7 +4921,7 @@ version = "1.0.143"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
dependencies = [
dependencies = [
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"itoa",
"itoa",
"memchr",
"memchr",
"ryu",
"ryu",
...
@@ -4985,7 +4989,7 @@ dependencies = [
...
@@ -4985,7 +4989,7 @@ dependencies = [
"chrono",
"chrono",
"hex",
"hex",
"indexmap 1.9.3",
"indexmap 1.9.3",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"schemars 0.9.0",
"schemars 0.9.0",
"schemars 1.0.4",
"schemars 1.0.4",
"serde",
"serde",
...
@@ -5013,7 +5017,7 @@ version = "0.10.6"
...
@@ -5013,7 +5017,7 @@ version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"cpufeatures",
"cpufeatures",
"digest",
"digest",
]
]
...
@@ -5024,7 +5028,7 @@ version = "0.10.9"
...
@@ -5024,7 +5028,7 @@ version = "0.10.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"cpufeatures",
"cpufeatures",
"digest",
"digest",
]
]
...
@@ -5254,7 +5258,7 @@ version = "0.5.5"
...
@@ -5254,7 +5258,7 @@ version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea"
checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
"byteorder",
"byteorder",
"enum-as-inner",
"enum-as-inner",
"libc",
"libc",
...
@@ -5268,7 +5272,7 @@ version = "0.6.0"
...
@@ -5268,7 +5272,7 @@ version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc"
checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
"byteorder",
"byteorder",
"enum-as-inner",
"enum-as-inner",
"libc",
"libc",
...
@@ -5282,7 +5286,7 @@ version = "0.6.1"
...
@@ -5282,7 +5286,7 @@ version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
"core-foundation 0.9.4",
"core-foundation 0.9.4",
"system-configuration-sys",
"system-configuration-sys",
]
]
...
@@ -5318,15 +5322,15 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
...
@@ -5318,15 +5322,15 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
[[package]]
[[package]]
name = "tempfile"
name = "tempfile"
version = "3.2
0
.0"
version = "3.2
1
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1
"
checksum = "
15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e
"
dependencies = [
dependencies = [
"fastrand",
"fastrand",
"getrandom 0.3.3",
"getrandom 0.3.3",
"once_cell",
"once_cell",
"rustix 1.0.8",
"rustix 1.0.8",
"windows-sys 0.
59.0
",
"windows-sys 0.
60.2
",
]
]
[[package]]
[[package]]
...
@@ -5340,11 +5344,11 @@ dependencies = [
...
@@ -5340,11 +5344,11 @@ dependencies = [
[[package]]
[[package]]
name = "thiserror"
name = "thiserror"
version = "2.0.1
5
"
version = "2.0.1
6
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
80d76d3f064b981389ecb4b6b7f45a0bf9fdac1d5b9204c7bd6714fecc30285
0"
checksum = "
3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c
0"
dependencies = [
dependencies = [
"thiserror-impl 2.0.1
5
",
"thiserror-impl 2.0.1
6
",
]
]
[[package]]
[[package]]
...
@@ -5360,9 +5364,9 @@ dependencies = [
...
@@ -5360,9 +5364,9 @@ dependencies = [
[[package]]
[[package]]
name = "thiserror-impl"
name = "thiserror-impl"
version = "2.0.1
5
"
version = "2.0.1
6
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
44d29feb33e986b6ea906bd9c3559a856983f92371b3eaa5e83782a351623de
0"
checksum = "
6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c796
0"
dependencies = [
dependencies = [
"proc-macro2",
"proc-macro2",
"quote",
"quote",
...
@@ -5375,7 +5379,7 @@ version = "1.1.9"
...
@@ -5375,7 +5379,7 @@ version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
]
]
[[package]]
[[package]]
...
@@ -5485,11 +5489,11 @@ dependencies = [
...
@@ -5485,11 +5489,11 @@ dependencies = [
"rayon",
"rayon",
"rayon-cond",
"rayon-cond",
"regex",
"regex",
"regex-syntax 0.8.
5
",
"regex-syntax 0.8.
6
",
"serde",
"serde",
"serde_json",
"serde_json",
"spm_precompiled",
"spm_precompiled",
"thiserror 2.0.1
5
",
"thiserror 2.0.1
6
",
"unicode-normalization-alignments",
"unicode-normalization-alignments",
"unicode-segmentation",
"unicode-segmentation",
"unicode_categories",
"unicode_categories",
...
@@ -5637,7 +5641,7 @@ version = "0.22.27"
...
@@ -5637,7 +5641,7 @@ version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
dependencies = [
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"serde",
"serde",
"serde_spanned",
"serde_spanned",
"toml_datetime",
"toml_datetime",
...
@@ -5703,7 +5707,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
...
@@ -5703,7 +5707,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
dependencies = [
dependencies = [
"futures-core",
"futures-core",
"futures-util",
"futures-util",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"pin-project-lite",
"pin-project-lite",
"slab",
"slab",
"sync_wrapper",
"sync_wrapper",
...
@@ -5720,7 +5724,7 @@ version = "0.6.6"
...
@@ -5720,7 +5724,7 @@ version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
"bytes",
"bytes",
"futures-util",
"futures-util",
"http",
"http",
...
@@ -5957,9 +5961,9 @@ dependencies = [
...
@@ -5957,9 +5961,9 @@ dependencies = [
[[package]]
[[package]]
name = "url"
name = "url"
version = "2.5.
4
"
version = "2.5.
7
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60
"
checksum = "
08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b
"
dependencies = [
dependencies = [
"form_urlencoded",
"form_urlencoded",
"idna",
"idna",
...
@@ -6090,7 +6094,7 @@ version = "0.2.100"
...
@@ -6090,7 +6094,7 @@ version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"once_cell",
"once_cell",
"rustversion",
"rustversion",
"wasm-bindgen-macro",
"wasm-bindgen-macro",
...
@@ -6116,7 +6120,7 @@ version = "0.4.50"
...
@@ -6116,7 +6120,7 @@ version = "0.4.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
dependencies = [
dependencies = [
"cfg-if 1.0.
1
",
"cfg-if 1.0.
3
",
"js-sys",
"js-sys",
"once_cell",
"once_cell",
"wasm-bindgen",
"wasm-bindgen",
...
@@ -6254,11 +6258,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
...
@@ -6254,11 +6258,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
[[package]]
name = "winapi-util"
name = "winapi-util"
version = "0.1.
9
"
version = "0.1.
10
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb
"
checksum = "
0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22
"
dependencies = [
dependencies = [
"windows-sys 0.
59.0
",
"windows-sys 0.
60.2
",
]
]
[[package]]
[[package]]
...
@@ -6495,9 +6499,9 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
...
@@ -6495,9 +6499,9 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
[[package]]
[[package]]
name = "winnow"
name = "winnow"
version = "0.7.1
2
"
version = "0.7.1
3
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95
"
checksum = "
21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf
"
dependencies = [
dependencies = [
"memchr",
"memchr",
]
]
...
@@ -6508,7 +6512,7 @@ version = "0.39.0"
...
@@ -6508,7 +6512,7 @@ version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [
dependencies = [
"bitflags 2.9.
2
",
"bitflags 2.9.
3
",
]
]
[[package]]
[[package]]
...
@@ -6714,7 +6718,7 @@ dependencies = [
...
@@ -6714,7 +6718,7 @@ dependencies = [
"crc32fast",
"crc32fast",
"crossbeam-utils",
"crossbeam-utils",
"displaydoc",
"displaydoc",
"indexmap 2.1
0
.0",
"indexmap 2.1
1
.0",
"num_enum",
"num_enum",
"thiserror 1.0.69",
"thiserror 1.0.69",
]
]
...
...
lib/llm/Cargo.toml
View file @
91a459c0
...
@@ -110,6 +110,13 @@ tower-http = {workspace = true}
...
@@ -110,6 +110,13 @@ tower-http = {workspace = true}
rustls
=
{
version
=
"0.23"
}
rustls
=
{
version
=
"0.23"
}
# grpc-service
# ping version to 0.13.1 so it depends on prost 0.13.5
# which is used across other libraries
tonic
=
{
version
=
"0.13.1"
}
# Request prost specifically so tonic-build properly compiles protobuf message
prost
=
{
version
=
"0.13.5"
}
# tokenizers
# tokenizers
tokenizers
=
{
version
=
"0.21.4"
,
default-features
=
false
,
features
=
[
tokenizers
=
{
version
=
"0.21.4"
,
default-features
=
false
,
features
=
[
"onig"
,
"onig"
,
...
@@ -157,3 +164,6 @@ insta = { version = "1.41", features = [
...
@@ -157,3 +164,6 @@ insta = { version = "1.41", features = [
]
}
]
}
aligned-vec
=
"0.6.4"
aligned-vec
=
"0.6.4"
lazy_static
=
"1.4"
lazy_static
=
"1.4"
[build-dependencies]
tonic-build
=
{
version
=
"0.13.1"
}
\ No newline at end of file
lib/llm/build.rs
View file @
91a459c0
...
@@ -13,8 +13,14 @@
...
@@ -13,8 +13,14 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
fn
main
()
{
fn
main
()
->
Result
<
(),
Box
<
dyn
std
::
error
::
Error
>>
{
println!
(
"cargo:warning=Building with CUDA KV off"
);
println!
(
"cargo:warning=Building with CUDA KV off"
);
build_protos
()
}
fn
build_protos
()
->
Result
<
(),
Box
<
dyn
std
::
error
::
Error
>>
{
tonic_build
::
compile_protos
(
"src/grpc/protos/kserve.proto"
)
?
;
Ok
(())
}
}
// NOTE: Preserving this build.rs for reference. We may want to re-enable
// NOTE: Preserving this build.rs for reference. We may want to re-enable
...
...
lib/llm/src/entrypoint/input.rs
View file @
91a459c0
...
@@ -18,6 +18,7 @@ pub mod batch;
...
@@ -18,6 +18,7 @@ pub mod batch;
mod
common
;
mod
common
;
pub
use
common
::
build_routed_pipeline
;
pub
use
common
::
build_routed_pipeline
;
pub
mod
endpoint
;
pub
mod
endpoint
;
pub
mod
grpc
;
pub
mod
http
;
pub
mod
http
;
pub
mod
text
;
pub
mod
text
;
...
@@ -43,6 +44,9 @@ pub enum Input {
...
@@ -43,6 +44,9 @@ pub enum Input {
/// Batch mode. Run all the prompts, write the outputs, exit.
/// Batch mode. Run all the prompts, write the outputs, exit.
Batch
(
PathBuf
),
Batch
(
PathBuf
),
// Run an KServe compatible gRPC server
Grpc
,
}
}
impl
FromStr
for
Input
{
impl
FromStr
for
Input
{
...
@@ -59,6 +63,7 @@ impl TryFrom<&str> for Input {
...
@@ -59,6 +63,7 @@ impl TryFrom<&str> for Input {
fn
try_from
(
s
:
&
str
)
->
anyhow
::
Result
<
Self
>
{
fn
try_from
(
s
:
&
str
)
->
anyhow
::
Result
<
Self
>
{
match
s
{
match
s
{
"http"
=>
Ok
(
Input
::
Http
),
"http"
=>
Ok
(
Input
::
Http
),
"grpc"
=>
Ok
(
Input
::
Grpc
),
"text"
=>
Ok
(
Input
::
Text
),
"text"
=>
Ok
(
Input
::
Text
),
"stdin"
=>
Ok
(
Input
::
Stdin
),
"stdin"
=>
Ok
(
Input
::
Stdin
),
endpoint_path
if
endpoint_path
.starts_with
(
ENDPOINT_SCHEME
)
=>
{
endpoint_path
if
endpoint_path
.starts_with
(
ENDPOINT_SCHEME
)
=>
{
...
@@ -77,6 +82,7 @@ impl fmt::Display for Input {
...
@@ -77,6 +82,7 @@ impl fmt::Display for Input {
fn
fmt
(
&
self
,
f
:
&
mut
fmt
::
Formatter
)
->
fmt
::
Result
{
fn
fmt
(
&
self
,
f
:
&
mut
fmt
::
Formatter
)
->
fmt
::
Result
{
let
s
=
match
self
{
let
s
=
match
self
{
Input
::
Http
=>
"http"
,
Input
::
Http
=>
"http"
,
Input
::
Grpc
=>
"grpc"
,
Input
::
Text
=>
"text"
,
Input
::
Text
=>
"text"
,
Input
::
Stdin
=>
"stdin"
,
Input
::
Stdin
=>
"stdin"
,
Input
::
Endpoint
(
path
)
=>
path
,
Input
::
Endpoint
(
path
)
=>
path
,
...
@@ -113,6 +119,9 @@ pub async fn run_input(
...
@@ -113,6 +119,9 @@ pub async fn run_input(
Input
::
Http
=>
{
Input
::
Http
=>
{
http
::
run
(
runtime
,
engine_config
)
.await
?
;
http
::
run
(
runtime
,
engine_config
)
.await
?
;
}
}
Input
::
Grpc
=>
{
grpc
::
run
(
runtime
,
engine_config
)
.await
?
;
}
Input
::
Text
=>
{
Input
::
Text
=>
{
text
::
run
(
runtime
,
None
,
engine_config
)
.await
?
;
text
::
run
(
runtime
,
None
,
engine_config
)
.await
?
;
}
}
...
...
lib/llm/src/entrypoint/input/grpc.rs
0 → 100644
View file @
91a459c0
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
std
::
sync
::
Arc
;
use
crate
::{
discovery
::{
MODEL_ROOT_PATH
,
ModelManager
,
ModelWatcher
},
engines
::
StreamingEngineAdapter
,
entrypoint
::{
self
,
EngineConfig
,
input
::
common
},
grpc
::
service
::
kserve
,
kv_router
::
KvRouterConfig
,
types
::
openai
::{
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
},
completions
::{
NvCreateCompletionRequest
,
NvCreateCompletionResponse
},
},
};
use
dynamo_runtime
::
transports
::
etcd
;
use
dynamo_runtime
::{
DistributedRuntime
,
Runtime
};
use
dynamo_runtime
::{
distributed
::
DistributedConfig
,
pipeline
::
RouterMode
};
/// Build and run an KServe gRPC service
pub
async
fn
run
(
runtime
:
Runtime
,
engine_config
:
EngineConfig
)
->
anyhow
::
Result
<
()
>
{
let
mut
grpc_service_builder
=
kserve
::
KserveService
::
builder
()
.port
(
engine_config
.local_model
()
.http_port
())
// [WIP] generalize port..
.with_request_template
(
engine_config
.local_model
()
.request_template
());
let
grpc_service
=
match
engine_config
{
EngineConfig
::
Dynamic
(
_
)
=>
{
let
distributed_runtime
=
DistributedRuntime
::
from_settings
(
runtime
.clone
())
.await
?
;
let
etcd_client
=
distributed_runtime
.etcd_client
();
// This allows the /health endpoint to query etcd for active instances
grpc_service_builder
=
grpc_service_builder
.with_etcd_client
(
etcd_client
.clone
());
let
grpc_service
=
grpc_service_builder
.build
()
?
;
match
etcd_client
{
Some
(
ref
etcd_client
)
=>
{
let
router_config
=
engine_config
.local_model
()
.router_config
();
// Listen for models registering themselves in etcd, add them to gRPC service
run_watcher
(
distributed_runtime
,
grpc_service
.state
()
.manager_clone
(),
etcd_client
.clone
(),
MODEL_ROOT_PATH
,
router_config
.router_mode
,
Some
(
router_config
.kv_router_config
),
router_config
.busy_threshold
,
)
.await
?
;
}
None
=>
{
// Static endpoints don't need discovery
}
}
grpc_service
}
EngineConfig
::
StaticRemote
(
local_model
)
=>
{
let
card
=
local_model
.card
();
let
router_mode
=
local_model
.router_config
()
.router_mode
;
let
dst_config
=
DistributedConfig
::
from_settings
(
true
);
// true means static
let
distributed_runtime
=
DistributedRuntime
::
new
(
runtime
.clone
(),
dst_config
)
.await
?
;
let
grpc_service
=
grpc_service_builder
.build
()
?
;
let
manager
=
grpc_service
.model_manager
();
let
endpoint_id
=
local_model
.endpoint_id
();
let
component
=
distributed_runtime
.namespace
(
&
endpoint_id
.namespace
)
?
.component
(
&
endpoint_id
.component
)
?
;
let
client
=
component
.endpoint
(
&
endpoint_id
.name
)
.client
()
.await
?
;
let
kv_chooser
=
if
router_mode
==
RouterMode
::
KV
{
Some
(
manager
.kv_chooser_for
(
local_model
.display_name
(),
&
component
,
card
.kv_cache_block_size
,
Some
(
local_model
.router_config
()
.kv_router_config
),
)
.await
?
,
)
}
else
{
None
};
let
chat_engine
=
entrypoint
::
build_routed_pipeline
::
<
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
,
>
(
card
,
&
client
,
router_mode
,
None
,
kv_chooser
.clone
())
.await
?
;
manager
.add_chat_completions_model
(
local_model
.display_name
(),
chat_engine
)
?
;
let
completions_engine
=
entrypoint
::
build_routed_pipeline
::
<
NvCreateCompletionRequest
,
NvCreateCompletionResponse
,
>
(
card
,
&
client
,
router_mode
,
None
,
kv_chooser
)
.await
?
;
manager
.add_completions_model
(
local_model
.display_name
(),
completions_engine
)
?
;
grpc_service
}
EngineConfig
::
StaticFull
{
engine
,
model
,
..
}
=>
{
let
grpc_service
=
grpc_service_builder
.build
()
?
;
let
engine
=
Arc
::
new
(
StreamingEngineAdapter
::
new
(
engine
));
let
manager
=
grpc_service
.model_manager
();
manager
.add_completions_model
(
model
.service_name
(),
engine
.clone
())
?
;
manager
.add_chat_completions_model
(
model
.service_name
(),
engine
)
?
;
grpc_service
}
EngineConfig
::
StaticCore
{
engine
:
inner_engine
,
model
,
..
}
=>
{
let
grpc_service
=
grpc_service_builder
.build
()
?
;
let
manager
=
grpc_service
.model_manager
();
let
chat_pipeline
=
common
::
build_pipeline
::
<
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
,
>
(
model
.card
(),
inner_engine
.clone
())
.await
?
;
manager
.add_chat_completions_model
(
model
.service_name
(),
chat_pipeline
)
?
;
let
cmpl_pipeline
=
common
::
build_pipeline
::
<
NvCreateCompletionRequest
,
NvCreateCompletionResponse
,
>
(
model
.card
(),
inner_engine
)
.await
?
;
manager
.add_completions_model
(
model
.service_name
(),
cmpl_pipeline
)
?
;
grpc_service
}
};
grpc_service
.run
(
runtime
.primary_token
())
.await
?
;
runtime
.shutdown
();
// Cancel primary token
Ok
(())
}
/// Spawns a task that watches for new models in etcd at network_prefix,
/// and registers them with the ModelManager so that the HTTP service can use them.
async
fn
run_watcher
(
runtime
:
DistributedRuntime
,
model_manager
:
Arc
<
ModelManager
>
,
etcd_client
:
etcd
::
Client
,
network_prefix
:
&
str
,
router_mode
:
RouterMode
,
kv_router_config
:
Option
<
KvRouterConfig
>
,
busy_threshold
:
Option
<
f64
>
,
)
->
anyhow
::
Result
<
()
>
{
let
watch_obj
=
ModelWatcher
::
new
(
runtime
,
model_manager
,
router_mode
,
kv_router_config
,
busy_threshold
,
);
tracing
::
info!
(
"Watching for remote model at {network_prefix}"
);
let
models_watcher
=
etcd_client
.kv_get_and_watch_prefix
(
network_prefix
)
.await
?
;
let
(
_
prefix
,
_
watcher
,
receiver
)
=
models_watcher
.dissolve
();
// [gluo NOTE] This is different from http::run_watcher where it alters the HTTP service
// endpoint being exposed, gRPC doesn't have the same concept as the KServe service
// only has one kind of inference endpoint.
// Pass the sender to the watcher
let
_
watcher_task
=
tokio
::
spawn
(
async
move
{
watch_obj
.watch
(
receiver
)
.await
;
});
Ok
(())
}
lib/llm/src/grpc.rs
0 → 100644
View file @
91a459c0
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
pub
mod
service
;
lib/llm/src/grpc/protos/kserve.proto
0 → 100644
View file @
91a459c0
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
syntax
=
"proto3"
;
package
inference
;
//@@.. cpp:namespace:: inference
import
"model_config.proto"
;
//@@
//@@.. cpp:var:: service InferenceService
//@@
//@@ Inference Server GRPC endpoints.
//@@
service
GRPCInferenceService
{
//@@ .. cpp:var:: rpc ModelMetadata(ModelMetadataRequest) returns
//@@ (ModelMetadataResponse)
//@@
//@@ Get model metadata.
//@@
rpc
ModelMetadata
(
ModelMetadataRequest
)
returns
(
ModelMetadataResponse
)
{}
//@@ .. cpp:var:: rpc ModelInfer(ModelInferRequest) returns
//@@ (ModelInferResponse)
//@@
//@@ Perform inference using a specific model.
//@@
rpc
ModelInfer
(
ModelInferRequest
)
returns
(
ModelInferResponse
)
{}
//@@ .. cpp:var:: rpc ModelStreamInfer(stream ModelInferRequest) returns
//@@ (stream ModelStreamInferResponse)
//@@
//@@ Perform streaming inference.
//@@
rpc
ModelStreamInfer
(
stream
ModelInferRequest
)
returns
(
stream
ModelStreamInferResponse
)
{}
//@@ .. cpp:var:: rpc ModelConfig(ModelConfigRequest) returns
//@@ (ModelConfigResponse)
//@@
//@@ Get model configuration.
//@@
rpc
ModelConfig
(
ModelConfigRequest
)
returns
(
ModelConfigResponse
)
{}
}
//@@
//@@.. cpp:var:: message ModelMetadataRequest
//@@
//@@ Request message for ModelMetadata.
//@@
message
ModelMetadataRequest
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model.
//@@
string
name
=
1
;
//@@ .. cpp:var:: string version
//@@
//@@ The version of the model to check for readiness. If not
//@@ given the server will choose a version based on the
//@@ model and internal policy.
//@@
string
version
=
2
;
}
//@@
//@@.. cpp:var:: message ModelMetadataResponse
//@@
//@@ Response message for ModelMetadata.
//@@
message
ModelMetadataResponse
{
//@@
//@@ .. cpp:var:: message TensorMetadata
//@@
//@@ Metadata for a tensor.
//@@
message
TensorMetadata
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The tensor name.
//@@
string
name
=
1
;
//@@
//@@ .. cpp:var:: string datatype
//@@
//@@ The tensor data type.
//@@
string
datatype
=
2
;
//@@
//@@ .. cpp:var:: int64 shape (repeated)
//@@
//@@ The tensor shape. A variable-size dimension is represented
//@@ by a -1 value.
//@@
repeated
int64
shape
=
3
;
}
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The model name.
//@@
string
name
=
1
;
//@@
//@@ .. cpp:var:: string versions (repeated)
//@@
//@@ The versions of the model.
//@@
repeated
string
versions
=
2
;
//@@
//@@ .. cpp:var:: string platform
//@@
//@@ The model's platform.
//@@
string
platform
=
3
;
//@@
//@@ .. cpp:var:: TensorMetadata inputs (repeated)
//@@
//@@ The model's inputs.
//@@
repeated
TensorMetadata
inputs
=
4
;
//@@
//@@ .. cpp:var:: TensorMetadata outputs (repeated)
//@@
//@@ The model's outputs.
//@@
repeated
TensorMetadata
outputs
=
5
;
}
//@@
//@@.. cpp:var:: message InferParameter
//@@
//@@ An inference parameter value.
//@@
message
InferParameter
{
//@@ .. cpp:var:: oneof parameter_choice
//@@
//@@ The parameter value can be a string, an int64,
//@@ an uint64, a double, or a boolean
//@@
//@@ Note: double and uint64 are currently
//@@ placeholders for future use and
//@@ are not supported for custom parameters
//@@
oneof
parameter_choice
{
//@@ .. cpp:var:: bool bool_param
//@@
//@@ A boolean parameter value.
//@@
bool
bool_param
=
1
;
//@@ .. cpp:var:: int64 int64_param
//@@
//@@ An int64 parameter value.
//@@
int64
int64_param
=
2
;
//@@ .. cpp:var:: string string_param
//@@
//@@ A string parameter value.
//@@
string
string_param
=
3
;
//@@ .. cpp:var:: double double_param
//@@
//@@ A double parameter value.
//@@
double
double_param
=
4
;
//@@ .. cpp:var:: uint64 uint64_param
//@@
//@@ A uint64 parameter value.
//@@
//@@ Not supported for custom parameters
//@@
uint64
uint64_param
=
5
;
}
}
//@@
//@@.. cpp:var:: message InferTensorContents
//@@
//@@ The data contained in a tensor represented by the repeated type
//@@ that matches the tensor's data type. Protobuf oneof is not used
//@@ because oneofs cannot contain repeated fields.
//@@
message
InferTensorContents
{
//@@
//@@ .. cpp:var:: bool bool_contents (repeated)
//@@
//@@ Representation for BOOL data type. The size must match what is
//@@ expected by the tensor's shape. The contents must be the flattened,
//@@ one-dimensional, row-major order of the tensor elements.
//@@
repeated
bool
bool_contents
=
1
;
//@@
//@@ .. cpp:var:: int32 int_contents (repeated)
//@@
//@@ Representation for INT8, INT16, and INT32 data types. The size
//@@ must match what is expected by the tensor's shape. The contents
//@@ must be the flattened, one-dimensional, row-major order of the
//@@ tensor elements.
//@@
repeated
int32
int_contents
=
2
;
//@@
//@@ .. cpp:var:: int64 int64_contents (repeated)
//@@
//@@ Representation for INT64 data types. The size must match what
//@@ is expected by the tensor's shape. The contents must be the
//@@ flattened, one-dimensional, row-major order of the tensor elements.
//@@
repeated
int64
int64_contents
=
3
;
//@@
//@@ .. cpp:var:: uint32 uint_contents (repeated)
//@@
//@@ Representation for UINT8, UINT16, and UINT32 data types. The size
//@@ must match what is expected by the tensor's shape. The contents
//@@ must be the flattened, one-dimensional, row-major order of the
//@@ tensor elements.
//@@
repeated
uint32
uint_contents
=
4
;
//@@
//@@ .. cpp:var:: uint64 uint64_contents (repeated)
//@@
//@@ Representation for UINT64 data types. The size must match what
//@@ is expected by the tensor's shape. The contents must be the
//@@ flattened, one-dimensional, row-major order of the tensor elements.
//@@
repeated
uint64
uint64_contents
=
5
;
//@@
//@@ .. cpp:var:: float fp32_contents (repeated)
//@@
//@@ Representation for FP32 data type. The size must match what is
//@@ expected by the tensor's shape. The contents must be the flattened,
//@@ one-dimensional, row-major order of the tensor elements.
//@@
repeated
float
fp32_contents
=
6
;
//@@
//@@ .. cpp:var:: double fp64_contents (repeated)
//@@
//@@ Representation for FP64 data type. The size must match what is
//@@ expected by the tensor's shape. The contents must be the flattened,
//@@ one-dimensional, row-major order of the tensor elements.
//@@
repeated
double
fp64_contents
=
7
;
//@@
//@@ .. cpp:var:: bytes bytes_contents (repeated)
//@@
//@@ Representation for BYTES data type. The size must match what is
//@@ expected by the tensor's shape. The contents must be the flattened,
//@@ one-dimensional, row-major order of the tensor elements.
//@@
repeated
bytes
bytes_contents
=
8
;
}
//@@
//@@.. cpp:var:: message ModelInferRequest
//@@
//@@ Request message for ModelInfer.
//@@
message
ModelInferRequest
{
//@@
//@@ .. cpp:var:: message InferInputTensor
//@@
//@@ An input tensor for an inference request.
//@@
message
InferInputTensor
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The tensor name.
//@@
string
name
=
1
;
//@@
//@@ .. cpp:var:: string datatype
//@@
//@@ The tensor data type.
//@@
string
datatype
=
2
;
//@@
//@@ .. cpp:var:: int64 shape (repeated)
//@@
//@@ The tensor shape.
//@@
repeated
int64
shape
=
3
;
//@@ .. cpp:var:: map<string,InferParameter> parameters
//@@
//@@ Optional inference input tensor parameters.
//@@
map
<
string
,
InferParameter
>
parameters
=
4
;
//@@ .. cpp:var:: InferTensorContents contents
//@@
//@@ The tensor contents using a data-type format. This field
//@@ must not be specified if tensor contents are being specified
//@@ in ModelInferRequest.raw_input_contents.
//@@
InferTensorContents
contents
=
5
;
}
//@@
//@@ .. cpp:var:: message InferRequestedOutputTensor
//@@
//@@ An output tensor requested for an inference request.
//@@
message
InferRequestedOutputTensor
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The tensor name.
//@@
string
name
=
1
;
//@@ .. cpp:var:: map<string,InferParameter> parameters
//@@
//@@ Optional requested output tensor parameters.
//@@
map
<
string
,
InferParameter
>
parameters
=
2
;
}
//@@ .. cpp:var:: string model_name
//@@
//@@ The name of the model to use for inferencing.
//@@
string
model_name
=
1
;
//@@ .. cpp:var:: string model_version
//@@
//@@ The version of the model to use for inference. If not
//@@ given the latest/most-recent version of the model is used.
//@@
string
model_version
=
2
;
//@@ .. cpp:var:: string id
//@@
//@@ Optional identifier for the request. If specified will be
//@@ returned in the response.
//@@
string
id
=
3
;
//@@ .. cpp:var:: map<string,InferParameter> parameters
//@@
//@@ Optional inference parameters.
//@@
map
<
string
,
InferParameter
>
parameters
=
4
;
//@@
//@@ .. cpp:var:: InferInputTensor inputs (repeated)
//@@
//@@ The input tensors for the inference.
//@@
repeated
InferInputTensor
inputs
=
5
;
//@@
//@@ .. cpp:var:: InferRequestedOutputTensor outputs (repeated)
//@@
//@@ The requested output tensors for the inference. Optional, if not
//@@ specified all outputs specified in the model config will be
//@@ returned.
//@@
repeated
InferRequestedOutputTensor
outputs
=
6
;
//@@
//@@ .. cpp:var:: bytes raw_input_contents
//@@
//@@ The data contained in an input tensor can be represented in
//@@ "raw" bytes form or in the repeated type that matches the
//@@ tensor's data type. Using the "raw" bytes form will
//@@ typically allow higher performance due to the way protobuf
//@@ allocation and reuse interacts with GRPC. For example, see
//@@ https://github.com/grpc/grpc/issues/23231.
//@@
//@@ To use the raw representation 'raw_input_contents' must be
//@@ initialized with data for each tensor in the same order as
//@@ 'inputs'. For each tensor, the size of this content must
//@@ match what is expected by the tensor's shape and data
//@@ type. The raw data must be the flattened, one-dimensional,
//@@ row-major order of the tensor elements without any stride
//@@ or padding between the elements. Note that the FP16 and BF16 data
//@@ types must be represented as raw content as there is no
//@@ specific data type for a 16-bit float type.
//@@
//@@ If this field is specified then InferInputTensor::contents
//@@ must not be specified for any input tensor.
//@@
repeated
bytes
raw_input_contents
=
7
;
}
//@@
//@@.. cpp:var:: message ModelInferResponse
//@@
//@@ Response message for ModelInfer.
//@@
message
ModelInferResponse
{
//@@
//@@ .. cpp:var:: message InferOutputTensor
//@@
//@@ An output tensor returned for an inference request.
//@@
message
InferOutputTensor
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The tensor name.
//@@
string
name
=
1
;
//@@
//@@ .. cpp:var:: string datatype
//@@
//@@ The tensor data type.
//@@
string
datatype
=
2
;
//@@
//@@ .. cpp:var:: int64 shape (repeated)
//@@
//@@ The tensor shape.
//@@
repeated
int64
shape
=
3
;
//@@ .. cpp:var:: map<string,InferParameter> parameters
//@@
//@@ Optional output tensor parameters.
//@@
map
<
string
,
InferParameter
>
parameters
=
4
;
//@@ .. cpp:var:: InferTensorContents contents
//@@
//@@ The tensor contents using a data-type format. This field
//@@ must not be specified if tensor contents are being specified
//@@ in ModelInferResponse.raw_output_contents.
//@@
InferTensorContents
contents
=
5
;
}
//@@ .. cpp:var:: string model_name
//@@
//@@ The name of the model used for inference.
//@@
string
model_name
=
1
;
//@@ .. cpp:var:: string model_version
//@@
//@@ The version of the model used for inference.
//@@
string
model_version
=
2
;
//@@ .. cpp:var:: string id
//@@
//@@ The id of the inference request if one was specified.
//@@
string
id
=
3
;
//@@ .. cpp:var:: map<string,InferParameter> parameters
//@@
//@@ Optional inference response parameters.
//@@
map
<
string
,
InferParameter
>
parameters
=
4
;
//@@
//@@ .. cpp:var:: InferOutputTensor outputs (repeated)
//@@
//@@ The output tensors holding inference results.
//@@
repeated
InferOutputTensor
outputs
=
5
;
//@@
//@@ .. cpp:var:: bytes raw_output_contents
//@@
//@@ The data contained in an output tensor can be represented in
//@@ "raw" bytes form or in the repeated type that matches the
//@@ tensor's data type. Using the "raw" bytes form will
//@@ typically allow higher performance due to the way protobuf
//@@ allocation and reuse interacts with GRPC. For example, see
//@@ https://github.com/grpc/grpc/issues/23231.
//@@
//@@ To use the raw representation 'raw_output_contents' must be
//@@ initialized with data for each tensor in the same order as
//@@ 'outputs'. For each tensor, the size of this content must
//@@ match what is expected by the tensor's shape and data
//@@ type. The raw data must be the flattened, one-dimensional,
//@@ row-major order of the tensor elements without any stride
//@@ or padding between the elements. Note that the FP16 and BF16 data
//@@ types must be represented as raw content as there is no
//@@ specific data type for a 16-bit float type.
//@@
//@@ If this field is specified then InferOutputTensor::contents
//@@ must not be specified for any output tensor.
//@@
repeated
bytes
raw_output_contents
=
6
;
}
//@@
//@@.. cpp:var:: message ModelStreamInferResponse
//@@
//@@ Response message for ModelStreamInfer.
//@@
message
ModelStreamInferResponse
{
//@@
//@@ .. cpp:var:: string error_message
//@@
//@@ The message describing the error. The empty message
//@@ indicates the inference was successful without errors.
//@@
string
error_message
=
1
;
//@@
//@@ .. cpp:var:: ModelInferResponse infer_response
//@@
//@@ Holds the results of the request.
//@@
ModelInferResponse
infer_response
=
2
;
}
//@@
//@@.. cpp:var:: message ModelConfigRequest
//@@
//@@ Request message for ModelConfig.
//@@
message
ModelConfigRequest
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model.
//@@
string
name
=
1
;
//@@ .. cpp:var:: string version
//@@
//@@ The version of the model. If not given the model version
//@@ is selected automatically based on the version policy.
//@@
string
version
=
2
;
}
//@@
//@@.. cpp:var:: message ModelConfigResponse
//@@
//@@ Response message for ModelConfig.
//@@
message
ModelConfigResponse
{
//@@
//@@ .. cpp:var:: ModelConfig config
//@@
//@@ The model configuration.
//@@
ModelConfig
config
=
1
;
}
//@@
//@@.. cpp:var:: message ModelRepositoryParameter
//@@
//@@ An model repository parameter value.
//@@
message
ModelRepositoryParameter
{
//@@ .. cpp:var:: oneof parameter_choice
//@@
//@@ The parameter value can be a string, an int64 or
//@@ a boolean
//@@
oneof
parameter_choice
{
//@@ .. cpp:var:: bool bool_param
//@@
//@@ A boolean parameter value.
//@@
bool
bool_param
=
1
;
//@@ .. cpp:var:: int64 int64_param
//@@
//@@ An int64 parameter value.
//@@
int64
int64_param
=
2
;
//@@ .. cpp:var:: string string_param
//@@
//@@ A string parameter value.
//@@
string
string_param
=
3
;
//@@ .. cpp:var:: bytes bytes_param
//@@
//@@ A bytes parameter value.
//@@
bytes
bytes_param
=
4
;
}
}
lib/llm/src/grpc/protos/model_config.proto
0 → 100644
View file @
91a459c0
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
syntax
=
"proto3"
;
package
inference
;
//@@.. cpp:namespace:: inference
//@@
//@@.. cpp:enum:: DataType
//@@
//@@ Data types supported for input and output tensors.
//@@
enum
DataType
{
//@@ .. cpp:enumerator:: DataType::INVALID = 0
TYPE_INVALID
=
0
;
//@@ .. cpp:enumerator:: DataType::BOOL = 1
TYPE_BOOL
=
1
;
//@@ .. cpp:enumerator:: DataType::UINT8 = 2
TYPE_UINT8
=
2
;
//@@ .. cpp:enumerator:: DataType::UINT16 = 3
TYPE_UINT16
=
3
;
//@@ .. cpp:enumerator:: DataType::UINT32 = 4
TYPE_UINT32
=
4
;
//@@ .. cpp:enumerator:: DataType::UINT64 = 5
TYPE_UINT64
=
5
;
//@@ .. cpp:enumerator:: DataType::INT8 = 6
TYPE_INT8
=
6
;
//@@ .. cpp:enumerator:: DataType::INT16 = 7
TYPE_INT16
=
7
;
//@@ .. cpp:enumerator:: DataType::INT32 = 8
TYPE_INT32
=
8
;
//@@ .. cpp:enumerator:: DataType::INT64 = 9
TYPE_INT64
=
9
;
//@@ .. cpp:enumerator:: DataType::FP16 = 10
TYPE_FP16
=
10
;
//@@ .. cpp:enumerator:: DataType::FP32 = 11
TYPE_FP32
=
11
;
//@@ .. cpp:enumerator:: DataType::FP64 = 12
TYPE_FP64
=
12
;
//@@ .. cpp:enumerator:: DataType::STRING = 13
TYPE_STRING
=
13
;
//@@ .. cpp:enumerator:: DataType::BF16 = 14
TYPE_BF16
=
14
;
}
//@@
//@@ .. cpp:var:: message ModelRateLimiter
//@@
//@@ The specifications required by the rate limiter to properly
//@@ schedule the inference requests across the different models
//@@ and their instances.
//@@
message
ModelRateLimiter
{
//@@ .. cpp:var:: message Resource
//@@
//@@ The resource property.
//@@
message
Resource
{
//@@ .. cpp:var:: string name
//@@
//@@ The name associated with the resource.
//@@
string
name
=
1
;
//@@ .. cpp:var:: bool global
//@@
//@@ Whether or not the resource is global. If true then the resource
//@@ is assumed to be shared among the devices otherwise specified
//@@ count of the resource is assumed for each device associated
//@@ with the instance.
//@@
bool
global
=
2
;
//@@ .. cpp:var:: uint32 count
//@@
//@@ The number of resources required for the execution of the model
//@@ instance.
//@@
uint32
count
=
3
;
}
//@@ .. cpp:var:: Resource resources (repeated)
//@@
//@@ The resources required to execute the request on a model instance.
//@@ Resources are just names with a corresponding count. The execution
//@@ of the instance will be blocked until the specified resources are
//@@ available. By default an instance uses no rate-limiter resources.
//@@
repeated
Resource
resources
=
1
;
//@@ .. cpp:var:: uint32 priority
//@@
//@@ The optional weighting value to be used for prioritizing across
//@@ instances. An instance with priority 2 will be given 1/2 the
//@@ number of scheduling chances as an instance_group with priority
//@@ 1. The default priority is 1. The priority of value 0 will be
//@@ treated as priority 1.
//@@
uint32
priority
=
2
;
}
//@@
//@@.. cpp:var:: message ModelInstanceGroup
//@@
//@@ A group of one or more instances of a model and resources made
//@@ available for those instances.
//@@
message
ModelInstanceGroup
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ Kind of this instance group.
//@@
enum
Kind
{
//@@ .. cpp:enumerator:: Kind::KIND_AUTO = 0
//@@
//@@ This instance group represents instances that can run on either
//@@ CPU or GPU. If all GPUs listed in 'gpus' are available then
//@@ instances will be created on GPU(s), otherwise instances will
//@@ be created on CPU.
//@@
KIND_AUTO
=
0
;
//@@ .. cpp:enumerator:: Kind::KIND_GPU = 1
//@@
//@@ This instance group represents instances that must run on the
//@@ GPU.
//@@
KIND_GPU
=
1
;
//@@ .. cpp:enumerator:: Kind::KIND_CPU = 2
//@@
//@@ This instance group represents instances that must run on the
//@@ CPU.
//@@
KIND_CPU
=
2
;
//@@ .. cpp:enumerator:: Kind::KIND_MODEL = 3
//@@
//@@ This instance group represents instances that should run on the
//@@ CPU and/or GPU(s) as specified by the model or backend itself.
//@@ The inference server will not override the model/backend
//@@ settings.
//@@
KIND_MODEL
=
3
;
}
//@@
//@@ .. cpp:var:: message SecondaryDevice
//@@
//@@ A secondary device required for a model instance.
//@@
message
SecondaryDevice
{
//@@
//@@ .. cpp:enum:: SecondaryDeviceKind
//@@
//@@ The kind of the secondary device.
//@@
enum
SecondaryDeviceKind
{
//@@ .. cpp:enumerator:: SecondaryDeviceKind::KIND_NVDLA = 0
//@@
//@@ An NVDLA core. http://nvdla.org
//@@ Currently KIND_NVDLA is only supported by the TensorRT backend.
//@@
KIND_NVDLA
=
0
;
}
//@@ .. cpp:var:: SecondaryDeviceKind kind
//@@
//@@ The secondary device kind.
//@@
SecondaryDeviceKind
kind
=
1
;
//@@ .. cpp:var:: int64 device_id
//@@
//@@ Identifier for the secondary device.
//@@
int64
device_id
=
2
;
}
//@@ .. cpp:var:: string name
//@@
//@@ Optional name of this group of instances. If not specified the
//@@ name will be formed as <model name>_<group number>. The name of
//@@ individual instances will be further formed by a unique instance
//@@ number and GPU index:
//@@
string
name
=
1
;
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this instance group. Default is KIND_AUTO. If
//@@ KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and
//@@ may be specified. If KIND_CPU or KIND_MODEL only 'count' is valid
//@@ and 'gpu' cannot be specified.
//@@
Kind
kind
=
4
;
//@@ .. cpp:var:: int32 count
//@@
//@@ For a group assigned to GPU, the number of instances created for
//@@ each GPU listed in 'gpus'. For a group assigned to CPU the number
//@@ of instances created. Default is 1.
int32
count
=
2
;
//@@ .. cpp:var:: ModelRateLimiter rate_limiter
//@@
//@@ The rate limiter specific settings to be associated with this
//@@ instance group. Optional, if not specified no rate limiting
//@@ will be applied to this instance group.
//@@
ModelRateLimiter
rate_limiter
=
6
;
//@@ .. cpp:var:: int32 gpus (repeated)
//@@
//@@ GPU(s) where instances should be available. For each GPU listed,
//@@ 'count' instances of the model will be available. Setting 'gpus'
//@@ to empty (or not specifying at all) is equivalent to listing all
//@@ available GPUs.
//@@
repeated
int32
gpus
=
3
;
//@@ .. cpp:var:: SecondaryDevice secondary_devices (repeated)
//@@
//@@ Secondary devices that are required by instances specified by this
//@@ instance group. Optional.
//@@
repeated
SecondaryDevice
secondary_devices
=
8
;
//@@ .. cpp:var:: string profile (repeated)
//@@
//@@ For TensorRT models containing multiple optimization profile, this
//@@ parameter specifies a set of optimization profiles available to this
//@@ instance group. The inference server will choose the optimal profile
//@@ based on the shapes of the input tensors. This field should lie
//@@ between 0 and <TotalNumberOfOptimizationProfilesInPlanModel> - 1
//@@ and be specified only for TensorRT backend, otherwise an error will
//@@ be generated. If not specified, the server will select the first
//@@ optimization profile by default.
//@@
repeated
string
profile
=
5
;
//@@ .. cpp:var:: bool passive
//@@
//@@ Whether the instances within this instance group will be accepting
//@@ inference requests from the scheduler. If true, the instances will
//@@ not be added to the scheduler. Default value is false.
//@@
bool
passive
=
7
;
//@@ .. cpp:var:: string host_policy
//@@
//@@ The host policy name that the instance to be associated with.
//@@ The default value is set to reflect the device kind of the instance,
//@@ for instance, KIND_CPU is "cpu", KIND_MODEL is "model" and
//@@ KIND_GPU is "gpu_<gpu_id>".
//@@
string
host_policy
=
9
;
}
//@@
//@@.. cpp:var:: message ModelTensorReshape
//@@
//@@ Reshape specification for input and output tensors.
//@@
message
ModelTensorReshape
{
//@@ .. cpp:var:: int64 shape (repeated)
//@@
//@@ The shape to use for reshaping.
//@@
repeated
int64
shape
=
1
;
}
//@@
//@@.. cpp:var:: message ModelInput
//@@
//@@ An input required by the model.
//@@
message
ModelInput
{
//@@
//@@ .. cpp:enum:: Format
//@@
//@@ The format for the input.
//@@
enum
Format
{
//@@ .. cpp:enumerator:: Format::FORMAT_NONE = 0
//@@
//@@ The input has no specific format. This is the default.
//@@
FORMAT_NONE
=
0
;
//@@ .. cpp:enumerator:: Format::FORMAT_NHWC = 1
//@@
//@@ HWC image format. Tensors with this format require 3 dimensions
//@@ if the model does not support batching (max_batch_size = 0) or 4
//@@ dimensions if the model does support batching (max_batch_size
//@@ >= 1). In either case the 'dims' below should only specify the
//@@ 3 non-batch dimensions (i.e. HWC or CHW).
//@@
FORMAT_NHWC
=
1
;
//@@ .. cpp:enumerator:: Format::FORMAT_NCHW = 2
//@@
//@@ CHW image format. Tensors with this format require 3 dimensions
//@@ if the model does not support batching (max_batch_size = 0) or 4
//@@ dimensions if the model does support batching (max_batch_size
//@@ >= 1). In either case the 'dims' below should only specify the
//@@ 3 non-batch dimensions (i.e. HWC or CHW).
//@@
FORMAT_NCHW
=
2
;
}
//@@ .. cpp:var:: string name
//@@
//@@ The name of the input.
//@@
string
name
=
1
;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the input.
//@@
DataType
data_type
=
2
;
//@@ .. cpp:var:: Format format
//@@
//@@ The format of the input. Optional.
//@@
Format
format
=
3
;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The dimensions/shape of the input tensor that must be provided
//@@ when invoking the inference API for this model.
//@@
repeated
int64
dims
=
4
;
//@@ .. cpp:var:: ModelTensorReshape reshape
//@@
//@@ The shape expected for this input by the backend. The input will
//@@ be reshaped to this before being presented to the backend. The
//@@ reshape must have the same number of elements as the input shape
//@@ specified by 'dims'. Optional.
//@@
ModelTensorReshape
reshape
=
5
;
//@@ .. cpp:var:: bool is_shape_tensor
//@@
//@@ Whether or not the input is a shape tensor to the model. This field
//@@ is currently supported only for the TensorRT model. An error will be
//@@ generated if this specification does not comply with underlying
//@@ model.
//@@
bool
is_shape_tensor
=
6
;
//@@ .. cpp:var:: bool allow_ragged_batch
//@@
//@@ Whether or not the input is allowed to be "ragged" in a dynamically
//@@ created batch. Default is false indicating that two requests will
//@@ only be batched if this tensor has the same shape in both requests.
//@@ True indicates that two requests can be batched even if this tensor
//@@ has a different shape in each request.
//@@
bool
allow_ragged_batch
=
7
;
//@@ .. cpp:var:: bool optional
//@@
//@@ Whether or not the input is optional for the model execution.
//@@ If true, the input is not required in the inference request.
//@@ Default value is false.
//@@
bool
optional
=
8
;
//@@ .. cpp:var:: bool is_non_linear_format_io
//@@
//@@ Indicates whether the input tensor uses a non-linear IO format. This
//@@ field is currently supported only for TensorRT models. An error will
//@@ be generated if this specification does not comply with the
//@@ underlying model.
//@@
bool
is_non_linear_format_io
=
9
;
}
//@@
//@@.. cpp:var:: message ModelOutput
//@@
//@@ An output produced by the model.
//@@
message
ModelOutput
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the output.
//@@
string
name
=
1
;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the output.
//@@
DataType
data_type
=
2
;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The dimensions/shape of the output tensor.
//@@
repeated
int64
dims
=
3
;
//@@ .. cpp:var:: ModelTensorReshape reshape
//@@
//@@ The shape produced for this output by the backend. The output will
//@@ be reshaped from this to the shape specified in 'dims' before being
//@@ returned in the inference response. The reshape must have the same
//@@ number of elements as the output shape specified by 'dims'. Optional.
//@@
ModelTensorReshape
reshape
=
5
;
//@@ .. cpp:var:: string label_filename
//@@
//@@ The label file associated with this output. Should be specified only
//@@ for outputs that represent classifications. Optional.
//@@
string
label_filename
=
4
;
//@@ .. cpp:var:: bool is_shape_tensor
//@@
//@@ Whether or not the output is a shape tensor to the model. This field
//@@ is currently supported only for the TensorRT model. An error will be
//@@ generated if this specification does not comply with underlying
//@@ model.
//@@
bool
is_shape_tensor
=
6
;
//@@ .. cpp:var:: bool is_non_linear_format_io
//@@
//@@ Indicates whether the output tensor uses a non-linear IO format. This
//@@ field is currently supported only for TensorRT models. An error will
//@@ be generated if this specification does not comply with the
//@@ underlying model.
//@@
bool
is_non_linear_format_io
=
7
;
}
//@@ .. cpp:var:: message BatchInput
//@@
//@@ A batch input is an additional input that must be added by
//@@ the backend based on all the requests in a batch.
//@@
message
BatchInput
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ The kind of the batch input.
//@@
enum
Kind
{
//@@ .. cpp:enumerator:: Kind::BATCH_ELEMENT_COUNT = 0
//@@
//@@ The element count of the 'source_input' will be added as
//@@ input with shape [1].
//@@
BATCH_ELEMENT_COUNT
=
0
;
//@@ .. cpp:enumerator:: Kind::BATCH_ACCUMULATED_ELEMENT_COUNT = 1
//@@
//@@ The accumulated element count of the 'source_input' will be
//@@ added as input with shape [1]. For example, if there is a
//@@ batch of two request, each with 2 elements, an input of value
//@@ 2 will be added to the first request, and an input of value
//@@ 4 will be added to the second request.
//@@
BATCH_ACCUMULATED_ELEMENT_COUNT
=
1
;
//@@ .. cpp:enumerator::
//@@ Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2
//@@
//@@ The accumulated element count of the 'source_input' will be
//@@ added as input with shape [1], except for the first request
//@@ in the batch. For the first request in the batch, the input
//@@ will have shape [2] where the first element is value 0.
//@@
BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
=
2
;
//@@ .. cpp:enumerator:: Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3
//@@
//@@ Among the requests in the batch, the max element count of the
//@@ 'source_input' will be added as input with shape
//@@ [max_element_count] for the first request in the batch.
//@@ For other requests, such input will be with shape [0].
//@@ The data of the tensor will be uninitialized.
//@@
BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
=
3
;
//@@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE = 4
//@@
//@@ Among the requests in the batch, the shape of the
//@@ 'source_input' will be added as input with shape
//@@ [batch_size, len(input_dim)]. For example, if one
//@@ batch-2 input with shape [3, 1] and batch-1 input
//@@ with shape [2, 2] are batched, the batch input will
//@@ have shape [3, 2] and value [ [3, 1], [3, 1], [2, 2]].
//@@
BATCH_ITEM_SHAPE
=
4
;
//@@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE_FLATTEN = 5
//@@
//@@ Among the requests in the batch, the shape of the
//@@ 'source_input' will be added as input with single dimensional
//@@ shape [batch_size * len(input_dim)]. For example, if one
//@@ batch-2 input with shape [3, 1] and batch-1 input
//@@ with shape [2, 2] are batched, the batch input will
//@@ have shape [6] and value [3, 1, 3, 1, 2, 2].
//@@
BATCH_ITEM_SHAPE_FLATTEN
=
5
;
}
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this batch input.
//@@
Kind
kind
=
1
;
//@@ .. cpp:var:: string target_name (repeated)
//@@
//@@ The name of the model inputs that the backend will create
//@@ for this batch input.
//@@
repeated
string
target_name
=
2
;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The input's datatype. The data type can be TYPE_INT32 or
//@@ TYPE_FP32.
//@@
DataType
data_type
=
3
;
//@@ .. cpp:var:: string source_input (repeated)
//@@
//@@ The backend derives the value for each batch input from one or
//@@ more other inputs. 'source_input' gives the names of those
//@@ inputs.
//@@
repeated
string
source_input
=
4
;
}
//@@.. cpp:var:: message BatchOutput
//@@
//@@ A batch output is an output produced by the model that must be handled
//@@ differently by the backend based on all the requests in a batch.
//@@
message
BatchOutput
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ The kind of the batch output.
//@@
enum
Kind
{
//@@ .. cpp:enumerator:: Kind::BATCH_SCATTER_WITH_INPUT_SHAPE = 0
//@@
//@@ The output should be scattered according to the shape of
//@@ 'source_input'. The dynamic dimension of the output will
//@@ be set to the value of the same dimension in the input.
//@@
BATCH_SCATTER_WITH_INPUT_SHAPE
=
0
;
}
//@@ .. cpp:var:: string target_name (repeated)
//@@
//@@ The name of the outputs to be produced by this batch output
//@@ specification.
//@@
repeated
string
target_name
=
1
;
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this batch output.
//@@
Kind
kind
=
2
;
//@@ .. cpp:var:: string source_input (repeated)
//@@
//@@ The backend derives each batch output from one or more inputs.
//@@ 'source_input' gives the names of those inputs.
//@@
repeated
string
source_input
=
3
;
}
//@@
//@@.. cpp:var:: message ModelVersionPolicy
//@@
//@@ Policy indicating which versions of a model should be made
//@@ available by the inference server.
//@@
message
ModelVersionPolicy
{
//@@ .. cpp:var:: message Latest
//@@
//@@ Serve only the latest version(s) of a model. This is
//@@ the default policy.
//@@
message
Latest
{
//@@ .. cpp:var:: uint32 num_versions
//@@
//@@ Serve only the 'num_versions' highest-numbered versions. T
//@@ The default value of 'num_versions' is 1, indicating that by
//@@ default only the single highest-number version of a
//@@ model will be served.
//@@
uint32
num_versions
=
1
;
}
//@@ .. cpp:var:: message All
//@@
//@@ Serve all versions of the model.
//@@
message
All
{}
//@@ .. cpp:var:: message Specific
//@@
//@@ Serve only specific versions of the model.
//@@
message
Specific
{
//@@ .. cpp:var:: int64 versions (repeated)
//@@
//@@ The specific versions of the model that will be served.
//@@
repeated
int64
versions
=
1
;
}
//@@ .. cpp:var:: oneof policy_choice
//@@
//@@ Each model must implement only a single version policy. The
//@@ default policy is 'Latest'.
//@@
oneof
policy_choice
{
//@@ .. cpp:var:: Latest latest
//@@
//@@ Serve only latest version(s) of the model.
//@@
Latest
latest
=
1
;
//@@ .. cpp:var:: All all
//@@
//@@ Serve all versions of the model.
//@@
All
all
=
2
;
//@@ .. cpp:var:: Specific specific
//@@
//@@ Serve only specific version(s) of the model.
//@@
Specific
specific
=
3
;
}
}
//@@
//@@.. cpp:var:: message ModelOptimizationPolicy
//@@
//@@ Optimization settings for a model. These settings control if/how a
//@@ model is optimized and prioritized by the backend framework when
//@@ it is loaded.
//@@
message
ModelOptimizationPolicy
{
//@@
//@@ .. cpp:var:: message Graph
//@@
//@@ Enable generic graph optimization of the model. If not specified
//@@ the framework's default level of optimization is used. Supports
//@@ TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow
//@@ causes XLA to be enabled/disabled for the model. For Onnx defaults
//@@ to enabling all optimizations, -1 enables only basic optimizations,
//@@ +1 enables only basic and extended optimizations.
//@@
message
Graph
{
//@@ .. cpp:var:: int32 level
//@@
//@@ The optimization level. Defaults to 0 (zero) if not specified.
//@@
//@@ - -1: Disabled
//@@ - 0: Framework default
//@@ - 1+: Enable optimization level (greater values indicate
//@@ higher optimization levels)
//@@
int32
level
=
1
;
}
//@@
//@@ .. cpp:enum:: ModelPriority
//@@
//@@ Model priorities. A model will be given scheduling and execution
//@@ preference over models at lower priorities. Current model
//@@ priorities only work for TensorRT models.
//@@
enum
ModelPriority
{
//@@ .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0
//@@
//@@ The default model priority.
//@@
PRIORITY_DEFAULT
=
0
;
//@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1
//@@
//@@ The maximum model priority.
//@@
PRIORITY_MAX
=
1
;
//@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2
//@@
//@@ The minimum model priority.
//@@
PRIORITY_MIN
=
2
;
}
//@@
//@@ .. cpp:var:: message Cuda
//@@
//@@ CUDA-specific optimization settings.
//@@
message
Cuda
{
//@@ .. cpp:var:: message GraphSpec
//@@
//@@ Specification of the CUDA graph to be captured.
//@@
message
GraphSpec
{
//@@ .. cpp:var:: message Dims
//@@
//@@ Specification of tensor dimension.
//@@
message
Shape
{
//@@ .. cpp:var:: int64 dim (repeated)
//@@
//@@ The dimension.
//@@
repeated
int64
dim
=
1
;
}
message
LowerBound
{
//@@ .. cpp:var:: int32 batch_size
//@@
//@@ The batch size of the CUDA graph. If 'max_batch_size' is 0,
//@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must
//@@ be set to value between 1 and 'max_batch_size'.
//@@
int32
batch_size
=
1
;
//@@ .. cpp:var:: map<string, Shape> input
//@@
//@@ The specification of the inputs. 'Shape' is the shape of
//@@ the input without batching dimension.
//@@
map
<
string
,
Shape
>
input
=
2
;
}
//@@ .. cpp:var:: int32 batch_size
//@@
//@@ The batch size of the CUDA graph. If 'max_batch_size' is 0,
//@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must
//@@ be set to value between 1 and 'max_batch_size'.
//@@
int32
batch_size
=
1
;
//@@ .. cpp:var:: map<string, Shape> input
//@@
//@@ The specification of the inputs. 'Shape' is the shape of the
//@@ input without batching dimension.
//@@
map
<
string
,
Shape
>
input
=
2
;
//@@ .. cpp:var:: LowerBound graph_lower_bound
//@@
//@@ Specify the lower bound of the CUDA graph. Optional.
//@@ If specified, the graph can be used for input shapes and
//@@ batch sizes that are in closed interval between the lower
//@@ bound specification and graph specification. For dynamic
//@@ shape model, this allows CUDA graphs to be launched
//@@ frequently without capturing all possible shape combinations.
//@@ However, using graph for shape combinations different from
//@@ the one used for capturing introduces uninitialized data for
//@@ execution and it may distort the inference result if
//@@ the model is sensitive to uninitialized data.
//@@
LowerBound
graph_lower_bound
=
3
;
}
//@@ .. cpp:var:: bool graphs
//@@
//@@ Use CUDA graphs API to capture model operations and execute
//@@ them more efficiently. Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool
graphs
=
1
;
//@@ .. cpp:var:: bool busy_wait_events
//@@
//@@ Use busy-waiting to synchronize CUDA events to achieve minimum
//@@ latency from event complete to host thread to be notified, with
//@@ the cost of high CPU load. Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool
busy_wait_events
=
2
;
//@@ .. cpp:var:: GraphSpec graph_spec (repeated)
//@@
//@@ Specification of the CUDA graph to be captured. If not specified
//@@ and 'graphs' is true, the default CUDA graphs will be captured
//@@ based on model settings.
//@@ Currently only recognized by TensorRT backend.
//@@
repeated
GraphSpec
graph_spec
=
3
;
//@@ .. cpp:var:: bool output_copy_stream
//@@
//@@ Uses a CUDA stream separate from the inference stream to copy the
//@@ output to host. However, be aware that setting this option to
//@@ true will lead to an increase in the memory consumption of the
//@@ model as Triton will allocate twice as much GPU memory for its
//@@ I/O tensor buffers. Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool
output_copy_stream
=
4
;
}
//@@
//@@ .. cpp:var:: message ExecutionAccelerators
//@@
//@@ Specify the preferred execution accelerators to be used to execute
//@@ the model. Currently only recognized by ONNX Runtime backend and
//@@ TensorFlow backend.
//@@
//@@ For ONNX Runtime backend, it will deploy the model with the execution
//@@ accelerators by priority, the priority is determined based on the
//@@ order that they are set, i.e. the provider at the front has highest
//@@ priority. Overall, the priority will be in the following order:
//@@ <gpu_execution_accelerator> (if instance is on GPU)
//@@ CUDA Execution Provider (if instance is on GPU)
//@@ <cpu_execution_accelerator>
//@@ Default CPU Execution Provider
//@@
message
ExecutionAccelerators
{
//@@
//@@ .. cpp:var:: message Accelerator
//@@
//@@ Specify the accelerator to be used to execute the model.
//@@ Accelerator with the same name may accept different parameters
//@@ depending on the backends.
//@@
message
Accelerator
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the execution accelerator.
//@@
string
name
=
1
;
//@@ .. cpp:var:: map<string, string> parameters
//@@
//@@ Additional parameters used to configure the accelerator.
//@@
map
<
string
,
string
>
parameters
=
2
;
}
//@@ .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
//@@
//@@ The preferred execution provider to be used if the model instance
//@@ is deployed on GPU.
//@@
//@@ For ONNX Runtime backend, possible value is "tensorrt" as name,
//@@ and no parameters are required.
//@@
//@@ For TensorFlow backend, possible values are "tensorrt",
//@@ "auto_mixed_precision", "gpu_io".
//@@
//@@ For "tensorrt", the following parameters can be specified:
//@@ "precision_mode": The precision used for optimization.
//@@ Allowed values are "FP32" and "FP16". Default value is "FP32".
//@@
//@@ "max_cached_engines": The maximum number of cached TensorRT
//@@ engines in dynamic TensorRT ops. Default value is 100.
//@@
//@@ "minimum_segment_size": The smallest model subgraph that will
//@@ be considered for optimization by TensorRT. Default value is 3.
//@@
//@@ "max_workspace_size_bytes": The maximum GPU memory the model
//@@ can use temporarily during execution. Default value is 1GB.
//@@
//@@ For "auto_mixed_precision", no parameters are required. If set,
//@@ the model will try to use FP16 for better performance.
//@@ This optimization can not be set with "tensorrt".
//@@
//@@ For "gpu_io", no parameters are required. If set, the model will
//@@ be executed using TensorFlow Callable API to set input and output
//@@ tensors in GPU memory if possible, which can reduce data transfer
//@@ overhead if the model is used in ensemble. However, the Callable
//@@ object will be created on model creation and it will request all
//@@ outputs for every model execution, which may impact the
//@@ performance if a request does not require all outputs. This
//@@ optimization will only take affect if the model instance is
//@@ created with KIND_GPU.
//@@
repeated
Accelerator
gpu_execution_accelerator
=
1
;
//@@ .. cpp:var:: Accelerator cpu_execution_accelerator (repeated)
//@@
//@@ The preferred execution provider to be used if the model instance
//@@ is deployed on CPU.
//@@
//@@ For ONNX Runtime backend, possible value is "openvino" as name,
//@@ and no parameters are required.
//@@
repeated
Accelerator
cpu_execution_accelerator
=
2
;
}
//@@
//@@ .. cpp:var:: message PinnedMemoryBuffer
//@@
//@@ Specify whether to use a pinned memory buffer when transferring data
//@@ between non-pinned system memory and GPU memory. Using a pinned
//@@ memory buffer for system from/to GPU transfers will typically provide
//@@ increased performance. For example, in the common use case where the
//@@ request provides inputs and delivers outputs via non-pinned system
//@@ memory, if the model instance accepts GPU IOs, the inputs will be
//@@ processed by two copies: from non-pinned system memory to pinned
//@@ memory, and from pinned memory to GPU memory. Similarly, pinned
//@@ memory will be used for delivering the outputs.
//@@
message
PinnedMemoryBuffer
{
//@@ .. cpp:var:: bool enable
//@@
//@@ Use pinned memory buffer. Default is true.
//@@
bool
enable
=
1
;
}
//@@ .. cpp:var:: Graph graph
//@@
//@@ The graph optimization setting for the model. Optional.
//@@
Graph
graph
=
1
;
//@@ .. cpp:var:: ModelPriority priority
//@@
//@@ The priority setting for the model. Optional.
//@@
ModelPriority
priority
=
2
;
//@@ .. cpp:var:: Cuda cuda
//@@
//@@ CUDA-specific optimization settings. Optional.
//@@
Cuda
cuda
=
3
;
//@@ .. cpp:var:: ExecutionAccelerators execution_accelerators
//@@
//@@ The accelerators used for the model. Optional.
//@@
ExecutionAccelerators
execution_accelerators
=
4
;
//@@ .. cpp:var:: PinnedMemoryBuffer input_pinned_memory
//@@
//@@ Use pinned memory buffer when the data transfer for inputs
//@@ is between GPU memory and non-pinned system memory.
//@@ Default is true.
//@@
PinnedMemoryBuffer
input_pinned_memory
=
5
;
//@@ .. cpp:var:: PinnedMemoryBuffer output_pinned_memory
//@@
//@@ Use pinned memory buffer when the data transfer for outputs
//@@ is between GPU memory and non-pinned system memory.
//@@ Default is true.
//@@
PinnedMemoryBuffer
output_pinned_memory
=
6
;
//@@ .. cpp:var:: uint32 gather_kernel_buffer_threshold
//@@
//@@ The backend may use a gather kernel to gather input data if the
//@@ device has direct access to the source buffer and the destination
//@@ buffer. In such case, the gather kernel will be used only if the
//@@ number of buffers to be gathered is greater or equal to
//@@ the specified value. If 0, the gather kernel will be disabled.
//@@ Default value is 0.
//@@ Currently only recognized by TensorRT backend.
//@@
uint32
gather_kernel_buffer_threshold
=
7
;
//@@ .. cpp:var:: bool eager_batching
//@@
//@@ Start preparing the next batch before the model instance is ready
//@@ for the next inference. This option can be used to overlap the
//@@ batch preparation with model execution, with the trade-off that
//@@ the next batch might be smaller than what it could have been.
//@@ Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool
eager_batching
=
8
;
}
//@@
//@@.. cpp:var:: message ModelQueuePolicy
//@@
//@@ Queue policy for inference requests.
//@@
message
ModelQueuePolicy
{
//@@
//@@ .. cpp:enum:: TimeoutAction
//@@
//@@ The action applied to timed-out requests.
//@@
enum
TimeoutAction
{
//@@ .. cpp:enumerator:: Action::REJECT = 0
//@@
//@@ Reject the request and return error message accordingly.
//@@
REJECT
=
0
;
//@@ .. cpp:enumerator:: Action::DELAY = 1
//@@
//@@ Delay the request until all other requests at the same
//@@ (or higher) priority levels that have not reached their timeouts
//@@ are processed. A delayed request will eventually be processed,
//@@ but may be delayed indefinitely due to newly arriving requests.
//@@
DELAY
=
1
;
}
//@@
//@@ .. cpp:var:: TimeoutAction timeout_action
//@@
//@@ The action applied to timed-out request.
//@@ The default action is REJECT.
//@@
TimeoutAction
timeout_action
=
1
;
//@@
//@@ .. cpp:var:: uint64 default_timeout_microseconds
//@@
//@@ The default timeout for every request, in microseconds.
//@@ The default value is 0 which indicates that no timeout is set.
//@@
uint64
default_timeout_microseconds
=
2
;
//@@
//@@ .. cpp:var:: bool allow_timeout_override
//@@
//@@ Whether individual request can override the default timeout value.
//@@ When true, individual requests can set a timeout that is less than
//@@ the default timeout value but may not increase the timeout.
//@@ The default value is false.
//@@
bool
allow_timeout_override
=
3
;
//@@
//@@ .. cpp:var:: uint32 max_queue_size
//@@
//@@ The maximum queue size for holding requests. A request will be
//@@ rejected immediately if it can't be enqueued because the queue is
//@@ full. The default value is 0 which indicates that no maximum
//@@ queue size is enforced.
//@@
uint32
max_queue_size
=
4
;
}
//@@
//@@.. cpp:var:: message ModelDynamicBatching
//@@
//@@ Dynamic batching configuration. These settings control how dynamic
//@@ batching operates for the model.
//@@
message
ModelDynamicBatching
{
//@@ .. cpp:var:: int32 preferred_batch_size (repeated)
//@@
//@@ Preferred batch sizes for dynamic batching. If a batch of one of
//@@ these sizes can be formed it will be executed immediately. If
//@@ not specified a preferred batch size will be chosen automatically
//@@ based on model and GPU characteristics.
//@@
repeated
int32
preferred_batch_size
=
1
;
//@@ .. cpp:var:: uint64 max_queue_delay_microseconds
//@@
//@@ The maximum time, in microseconds, a request will be delayed in
//@@ the scheduling queue to wait for additional requests for
//@@ batching. Default is 0.
//@@
uint64
max_queue_delay_microseconds
=
2
;
//@@ .. cpp:var:: bool preserve_ordering
//@@
//@@ Should the dynamic batcher preserve the ordering of responses to
//@@ match the order of requests received by the scheduler. Default is
//@@ false. If true, the responses will be returned in the same order as
//@@ the order of requests sent to the scheduler. If false, the responses
//@@ may be returned in arbitrary order. This option is specifically
//@@ needed when a sequence of related inference requests (i.e. inference
//@@ requests with the same correlation ID) are sent to the dynamic
//@@ batcher to ensure that the sequence responses are in the correct
//@@ order.
//@@
bool
preserve_ordering
=
3
;
//@@ .. cpp:var:: uint64 priority_levels
//@@
//@@ The number of priority levels to be enabled for the model,
//@@ the priority level starts from 1 and 1 is the highest priority.
//@@ Requests are handled in priority order with all priority 1 requests
//@@ processed before priority 2, all priority 2 requests processed before
//@@ priority 3, etc. Requests with the same priority level will be
//@@ handled in the order that they are received.
//@@
uint64
priority_levels
=
4
;
//@@ .. cpp:var:: uint64 default_priority_level
//@@
//@@ The priority level used for requests that don't specify their
//@@ priority. The value must be in the range [ 1, 'priority_levels' ].
//@@
uint64
default_priority_level
=
5
;
//@@ .. cpp:var:: ModelQueuePolicy default_queue_policy
//@@
//@@ The default queue policy used for requests that don't require
//@@ priority handling and requests that specify priority levels where
//@@ there is no specific policy given. If not specified, a policy with
//@@ default field values will be used.
//@@
ModelQueuePolicy
default_queue_policy
=
6
;
//@@ .. cpp:var:: map<uint64, ModelQueuePolicy> priority_queue_policy
//@@
//@@ Specify the queue policy for the priority level. The default queue
//@@ policy will be used if a priority level doesn't specify a queue
//@@ policy.
//@@
map
<
uint64
,
ModelQueuePolicy
>
priority_queue_policy
=
7
;
}
//@@
//@@.. cpp:var:: message ModelSequenceBatching
//@@
//@@ Sequence batching configuration. These settings control how sequence
//@@ batching operates for the model.
//@@
message
ModelSequenceBatching
{
//@@ .. cpp:var:: message Control
//@@
//@@ A control is a signal that the sequence batcher uses to
//@@ communicate with a backend.
//@@
message
Control
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ The kind of the control.
//@@
enum
Kind
{
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_START = 0
//@@
//@@ A new sequence is/is-not starting. If true a sequence is
//@@ starting, if false a sequence is continuing. Must
//@@ specify either int32_false_true, fp32_false_true or
//@@ bool_false_true for this control. This control is optional.
//@@
CONTROL_SEQUENCE_START
=
0
;
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_READY = 1
//@@
//@@ A sequence is/is-not ready for inference. If true the
//@@ input tensor data is valid and should be used. If false
//@@ the input tensor data is invalid and inferencing should
//@@ be "skipped". Must specify either int32_false_true,
//@@ fp32_false_true or bool_false_true for this control. This
//@@ control is optional.
//@@
CONTROL_SEQUENCE_READY
=
1
;
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_END = 2
//@@
//@@ A sequence is/is-not ending. If true a sequence is
//@@ ending, if false a sequence is continuing. Must specify
//@@ either int32_false_true, fp32_false_true or bool_false_true
//@@ for this control. This control is optional.
//@@
CONTROL_SEQUENCE_END
=
2
;
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_CORRID = 3
//@@
//@@ The correlation ID of the sequence. The correlation ID
//@@ is an uint64_t value that is communicated in whole or
//@@ in part by the tensor. The tensor's datatype must be
//@@ specified by data_type and must be TYPE_UINT64, TYPE_INT64,
//@@ TYPE_UINT32 or TYPE_INT32. If a 32-bit datatype is specified
//@@ the correlation ID will be truncated to the low-order 32
//@@ bits. This control is optional.
//@@
CONTROL_SEQUENCE_CORRID
=
3
;
}
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this control.
//@@
Kind
kind
=
1
;
//@@ .. cpp:var:: int32 int32_false_true (repeated)
//@@
//@@ The control's true and false setting is indicated by setting
//@@ a value in an int32 tensor. The tensor must be a
//@@ 1-dimensional tensor with size equal to the batch size of
//@@ the request. 'int32_false_true' must have two entries: the
//@@ first the false value and the second the true value.
//@@
repeated
int32
int32_false_true
=
2
;
//@@ .. cpp:var:: float fp32_false_true (repeated)
//@@
//@@ The control's true and false setting is indicated by setting
//@@ a value in a fp32 tensor. The tensor must be a
//@@ 1-dimensional tensor with size equal to the batch size of
//@@ the request. 'fp32_false_true' must have two entries: the
//@@ first the false value and the second the true value.
//@@
repeated
float
fp32_false_true
=
3
;
//@@ .. cpp:var:: bool bool_false_true (repeated)
//@@
//@@ The control's true and false setting is indicated by setting
//@@ a value in a bool tensor. The tensor must be a
//@@ 1-dimensional tensor with size equal to the batch size of
//@@ the request. 'bool_false_true' must have two entries: the
//@@ first the false value and the second the true value.
//@@
repeated
bool
bool_false_true
=
5
;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The control's datatype.
//@@
DataType
data_type
=
4
;
}
//@@ .. cpp:var:: message ControlInput
//@@
//@@ The sequence control values to communicate by a model input.
//@@
message
ControlInput
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model input.
//@@
string
name
=
1
;
//@@ .. cpp:var:: Control control (repeated)
//@@
//@@ The control value(s) that should be communicated to the
//@@ model using this model input.
//@@
repeated
Control
control
=
2
;
}
//@@
//@@ .. cpp:var:: message InitialState
//@@
//@@ Settings used to initialize data for implicit state.
//@@
message
InitialState
{
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the state.
//@@
DataType
data_type
=
1
;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The shape of the state tensor, not including the batch
//@@ dimension.
//@@
repeated
int64
dims
=
2
;
//@@ .. cpp:var:: oneof state_data
//@@
//@@ Specify how the initial state data is generated.
//@@
oneof
state_data
{
//@@
//@@ .. cpp:var:: bool zero_data
//@@
//@@ The identifier for using zeros as initial state data.
//@@ Note that the value of 'zero_data' will not be checked,
//@@ instead, zero data will be used as long as the field is set.
//@@
bool
zero_data
=
3
;
//@@ .. cpp:var:: string data_file
//@@
//@@ The file whose content will be used as the initial data for
//@@ the state in row-major order. The file must be provided in
//@@ sub-directory 'initial_state' under the model directory.
//@@
string
data_file
=
4
;
}
//@@ .. cpp:var:: string name
//@@
//@@ The name of the state initialization.
//@@
string
name
=
5
;
}
//@@ .. cpp:var:: message State
//@@
//@@ An input / output pair of tensors that carry state for the sequence.
//@@
message
State
{
//@@ .. cpp:var:: string input_name
//@@
//@@ The name of the model state input.
//@@
string
input_name
=
1
;
//@@ .. cpp:var:: string output_name
//@@
//@@ The name of the model state output.
//@@
string
output_name
=
2
;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the state.
//@@
DataType
data_type
=
3
;
//@@ .. cpp:var:: int64 dim (repeated)
//@@
//@@ The dimension.
//@@
repeated
int64
dims
=
4
;
//@@ .. cpp:var:: InitialState initial_state (repeated)
//@@
//@@ The optional field to specify the initial state for the model.
//@@
repeated
InitialState
initial_state
=
5
;
//@@ .. cpp:var:: bool use_same_buffer_for_input_output
//@@
//@@ The optional field to use a single buffer for both input and output
//@@ state. Without this option, Triton allocates separate buffers
//@@ for input and output state
//@@ which can be problematic if the state size is
//@@ large. This option reduces the memory usage by allocating a single
//@@ buffer. Enabling this option is recommended whenever
//@@ the input state is processed before the output state is written.
//@@ When enabled the state
//@@ will always be updated independent of whether
//@@ TRITONBACKEND_StateUpdate is called
//@@ (however TRITONBACKEND_StateUpdate should still be called for
//@@ completeness).
//@@
//@@ The default value is false.
//@@
bool
use_same_buffer_for_input_output
=
6
;
//@@ .. cpp:var:: bool use_growable_memory
//@@
//@@ The optional field to enable an implicit state buffer to grow
//@@ without reallocating or copying existing memory.
//@@ Additional memory will be appended to the end of the buffer and
//@@ existing data will be preserved.
//@@ This option is only available for CUDA memory and requires enabling
//@@ use_same_buffer_for_input_output. When using this option,
//@@ StateBuffer call will always return CUDA memory even if CPU memory
//@@ is requested.
//@@
//@@ The default value is false.
//@@
bool
use_growable_memory
=
7
;
}
//@@ .. cpp:var:: message StrategyDirect
//@@
//@@ The sequence batcher uses a specific, unique batch
//@@ slot for each sequence. All inference requests in a
//@@ sequence are directed to the same batch slot in the same
//@@ model instance over the lifetime of the sequence. This
//@@ is the default strategy.
//@@
message
StrategyDirect
{
//@@ .. cpp:var:: uint64 max_queue_delay_microseconds
//@@
//@@ The maximum time, in microseconds, a candidate request
//@@ will be delayed in the sequence batch scheduling queue to
//@@ wait for additional requests for batching. Default is 0.
//@@
uint64
max_queue_delay_microseconds
=
1
;
//@@ .. cpp:var:: float minimum_slot_utilization
//@@
//@@ The minimum slot utilization that must be satisfied to
//@@ execute the batch before 'max_queue_delay_microseconds' expires.
//@@ For example, a value of 0.5 indicates that the batch should be
//@@ executed as soon as 50% or more of the slots are ready even if
//@@ the 'max_queue_delay_microseconds' timeout has not expired.
//@@ The default is 0.0, indicating that a batch will be executed
//@@ before 'max_queue_delay_microseconds' timeout expires if at least
//@@ one batch slot is ready. 'max_queue_delay_microseconds' will be
//@@ ignored unless minimum_slot_utilization is set to a non-zero
//@@ value.
//@@
float
minimum_slot_utilization
=
2
;
}
//@@ .. cpp:var:: message StrategyOldest
//@@
//@@ The sequence batcher maintains up to 'max_candidate_sequences'
//@@ candidate sequences. 'max_candidate_sequences' can be greater
//@@ than the model's 'max_batch_size'. For inferencing the batcher
//@@ chooses from the candidate sequences up to 'max_batch_size'
//@@ inference requests. Requests are chosen in an oldest-first
//@@ manner across all candidate sequences. A given sequence is
//@@ not guaranteed to be assigned to the same batch slot for
//@@ all inference requests of that sequence.
//@@
message
StrategyOldest
{
//@@ .. cpp:var:: int32 max_candidate_sequences
//@@
//@@ Maximum number of candidate sequences that the batcher
//@@ maintains. Excess sequences are kept in an ordered backlog
//@@ and become candidates when existing candidate sequences
//@@ complete.
//@@
int32
max_candidate_sequences
=
1
;
//@@ .. cpp:var:: int32 preferred_batch_size (repeated)
//@@
//@@ Preferred batch sizes for dynamic batching of candidate
//@@ sequences. If a batch of one of these sizes can be formed
//@@ it will be executed immediately. If not specified a
//@@ preferred batch size will be chosen automatically
//@@ based on model and GPU characteristics.
//@@
repeated
int32
preferred_batch_size
=
2
;
//@@ .. cpp:var:: uint64 max_queue_delay_microseconds
//@@
//@@ The maximum time, in microseconds, a candidate request
//@@ will be delayed in the dynamic batch scheduling queue to
//@@ wait for additional requests for batching. Default is 0.
//@@
uint64
max_queue_delay_microseconds
=
3
;
//@@ .. cpp:var:: bool preserve_ordering
//@@
//@@ Should the dynamic batcher preserve the ordering of responses to
//@@ match the order of requests received by the scheduler. Default is
//@@ false. If true, the responses will be returned in the same order
//@@ as the order of requests sent to the scheduler. If false, the
//@@ responses may be returned in arbitrary order. This option is
//@@ specifically needed when a sequence of related inference requests
//@@ (i.e. inference requests with the same correlation ID) are sent
//@@ to the dynamic batcher to ensure that the sequence responses are
//@@ in the correct order.
//@@
//@@ When using decoupled models, setting this to true may block the
//@@ responses from independent sequences from being returned to the
//@@ client until the previous request completes, hurting overall
//@@ performance. If using GRPC streaming protocol, the stream
//@@ ordering guarantee may be sufficient alone to ensure the
//@@ responses for each sequence are returned in sequence-order
//@@ without blocking based on independent requests, depending on the
//@@ use case.
//@@
bool
preserve_ordering
=
4
;
}
//@@ .. cpp:var:: oneof strategy_choice
//@@
//@@ The strategy used by the sequence batcher. Default strategy
//@@ is 'direct'.
//@@
oneof
strategy_choice
{
//@@ .. cpp:var:: StrategyDirect direct
//@@
//@@ StrategyDirect scheduling strategy.
//@@
StrategyDirect
direct
=
3
;
//@@ .. cpp:var:: StrategyOldest oldest
//@@
//@@ StrategyOldest scheduling strategy.
//@@
StrategyOldest
oldest
=
4
;
}
//@@ .. cpp:var:: uint64 max_sequence_idle_microseconds
//@@
//@@ The maximum time, in microseconds, that a sequence is allowed to
//@@ be idle before it is aborted. The inference server considers a
//@@ sequence idle when it does not have any inference request queued
//@@ for the sequence. If this limit is exceeded, the inference server
//@@ will free the sequence slot allocated by the sequence and make it
//@@ available for another sequence. If not specified (or specified as
//@@ zero) a default value of 1000000 (1 second) is used.
//@@
uint64
max_sequence_idle_microseconds
=
1
;
//@@ .. cpp:var:: ControlInput control_input (repeated)
//@@
//@@ The model input(s) that the server should use to communicate
//@@ sequence start, stop, ready and similar control values to the
//@@ model.
//@@
repeated
ControlInput
control_input
=
2
;
//@@ .. cpp:var:: State state (repeated)
//@@
//@@ The optional state that can be stored in Triton for performing
//@@ inference requests on a sequence. Each sequence holds an implicit
//@@ state local to itself. The output state tensor provided by the
//@@ model in 'output_name' field of the current inference request will
//@@ be transferred as an input tensor named 'input_name' in the next
//@@ request of the same sequence. The input state of the first request
//@@ in the sequence contains garbage data.
//@@
repeated
State
state
=
5
;
//@@ .. cpp:var:: bool iterative_sequence
//@@
//@@ Requests for iterative sequences are processed over a number
//@@ of iterations. An iterative sequence is initiated by a single
//@@ request and is "rescheduled" by the model until completion.
//@@ Requests for inflight requests will be batched together
//@@ and can complete independently. Note this feature
//@@ requires backend support. Default value is false.
bool
iterative_sequence
=
6
;
}
//@@
//@@.. cpp:var:: message ModelEnsembling
//@@
//@@ Model ensembling configuration. These settings specify the models that
//@@ compose the ensemble and how data flows between the models.
//@@
message
ModelEnsembling
{
//@@ .. cpp:var:: message Step
//@@
//@@ Each step specifies a model included in the ensemble,
//@@ maps ensemble tensor names to the model input tensors,
//@@ and maps model output tensors to ensemble tensor names
//@@
message
Step
{
//@@ .. cpp:var:: string model_name
//@@
//@@ The name of the model to execute for this step of the ensemble.
//@@
string
model_name
=
1
;
//@@ .. cpp:var:: int64 model_version
//@@
//@@ The version of the model to use for inference. If -1
//@@ the latest/most-recent version of the model is used.
//@@
int64
model_version
=
2
;
//@@ .. cpp:var:: map<string,string> input_map
//@@
//@@ Map from name of an input tensor on this step's model to ensemble
//@@ tensor name. The ensemble tensor must have the same data type and
//@@ shape as the model input. Each model input must be assigned to
//@@ one ensemble tensor, but the same ensemble tensor can be assigned
//@@ to multiple model inputs.
//@@
map
<
string
,
string
>
input_map
=
3
;
//@@ .. cpp:var:: map<string,string> output_map
//@@
//@@ Map from name of an output tensor on this step's model to ensemble
//@@ tensor name. The data type and shape of the ensemble tensor will
//@@ be inferred from the model output. It is optional to assign all
//@@ model outputs to ensemble tensors. One ensemble tensor name
//@@ can appear in an output map only once.
//@@
map
<
string
,
string
>
output_map
=
4
;
//@@ .. cpp:var:: string model_namespace
//@@
//@@ [RESERVED] currently this field is reserved for internal use, users
//@@ must not set any value to this field to avoid unexpected behavior.
//@@
string
model_namespace
=
5
;
}
//@@ .. cpp:var:: Step step (repeated)
//@@
//@@ The models and the input / output mappings used within the ensemble.
//@@
repeated
Step
step
=
1
;
}
//@@
//@@.. cpp:var:: message ModelParameter
//@@
//@@ A model parameter.
//@@
message
ModelParameter
{
//@@ .. cpp:var:: string string_value
//@@
//@@ The string value of the parameter.
//@@
string
string_value
=
1
;
}
//@@
//@@.. cpp:var:: message ModelWarmup
//@@
//@@ Settings used to construct the request sample for model warmup.
//@@
message
ModelWarmup
{
//@@
//@@ .. cpp:var:: message Input
//@@
//@@ Meta data associated with an input.
//@@
message
Input
{
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the input.
//@@
DataType
data_type
=
1
;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The shape of the input tensor, not including the batch dimension.
//@@
repeated
int64
dims
=
2
;
//@@ .. cpp:var:: oneof input_data_type
//@@
//@@ Specify how the input data is generated. If the input has STRING
//@@ data type and 'random_data' is set, the data generation will fall
//@@ back to 'zero_data'.
//@@
oneof
input_data_type
{
//@@
//@@ .. cpp:var:: bool zero_data
//@@
//@@ The identifier for using zeros as input data. Note that the
//@@ value of 'zero_data' will not be checked, instead, zero data
//@@ will be used as long as the field is set.
//@@
bool
zero_data
=
3
;
//@@
//@@ .. cpp:var:: bool random_data
//@@
//@@ The identifier for using random data as input data. Note that
//@@ the value of 'random_data' will not be checked, instead,
//@@ random data will be used as long as the field is set.
//@@
bool
random_data
=
4
;
//@@ .. cpp:var:: string input_data_file
//@@
//@@ The file whose content will be used as raw input data in
//@@ row-major order. The file must be provided in a sub-directory
//@@ 'warmup' under the model directory. The file contents should be
//@@ in binary format. For TYPE_STRING data-type, an element is
//@@ represented by a 4-byte unsigned integer giving the length
//@@ followed by the actual bytes.
//@@
string
input_data_file
=
5
;
}
}
//@@ .. cpp:var:: string name
//@@
//@@ The name of the request sample.
//@@
string
name
=
1
;
//@@ .. cpp:var:: uint32 batch_size
//@@
//@@ The batch size of the inference request. This must be >= 1. For
//@@ models that don't support batching, batch_size must be 1. If
//@@ batch_size > 1, the 'inputs' specified below will be duplicated to
//@@ match the batch size requested.
//@@
uint32
batch_size
=
2
;
//@@ .. cpp:var:: map<string, Input> inputs
//@@
//@@ The warmup meta data associated with every model input, including
//@@ control tensors.
//@@
map
<
string
,
Input
>
inputs
=
3
;
//@@ .. cpp:var:: uint32 count
//@@
//@@ The number of iterations that this warmup sample will be executed.
//@@ For example, if this field is set to 2, 2 model executions using this
//@@ sample will be scheduled for warmup. Default value is 0 which
//@@ indicates that this sample will be used only once.
//@@ Note that for sequence model, 'count' may not work well
//@@ because the model often expect a valid sequence of requests which
//@@ should be represented by a series of warmup samples. 'count > 1'
//@@ essentially "resends" one of the sample, which may invalidate the
//@@ sequence and result in unexpected warmup failure.
//@@
uint32
count
=
4
;
}
//@@
//@@ .. cpp:var:: message ModelOperations
//@@
//@@ The metadata of libraries providing custom operations for this model.
//@@
message
ModelOperations
{
//@@ .. cpp:var:: string op_library_filename (repeated)
//@@
//@@ Optional paths of the libraries providing custom operations for
//@@ this model. Valid only for ONNX models.
//@@
repeated
string
op_library_filename
=
1
;
}
//@@
//@@ .. cpp:var:: message ModelTransactionPolicy
//@@
//@@ The specification that describes the nature of transactions
//@@ to be expected from the model.
//@@
message
ModelTransactionPolicy
{
//@@ .. cpp:var:: bool decoupled
//@@
//@@ Indicates whether responses generated by the model are decoupled with
//@@ the requests issued to it, which means the number of responses
//@@ generated by model may differ from number of requests issued, and
//@@ that the responses may be out of order relative to the order of
//@@ requests. The default is false, which means the model will generate
//@@ exactly one response for each request.
//@@
bool
decoupled
=
1
;
}
//@@
//@@.. cpp:var:: message ModelRepositoryAgents
//@@
//@@ The repository agents for the model.
//@@
message
ModelRepositoryAgents
{
//@@
//@@ .. cpp:var:: message Agent
//@@
//@@ A repository agent that should be invoked for the specified
//@@ repository actions for this model.
//@@
message
Agent
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the agent.
//@@
string
name
=
1
;
//@@ .. cpp:var:: map<string, string> parameters
//@@
//@@ The parameters for the agent.
//@@
map
<
string
,
string
>
parameters
=
2
;
}
//@@
//@@ .. cpp:var:: Agent agents (repeated)
//@@
//@@ The ordered list of agents for the model. These agents will be
//@@ invoked in order to respond to repository actions occurring for the
//@@ model.
//@@
repeated
Agent
agents
=
1
;
}
//@@
//@@.. cpp:var:: message ModelResponseCache
//@@
//@@ The response cache setting for the model.
//@@
message
ModelResponseCache
{
//@@
//@@ .. cpp::var:: bool enable
//@@
//@@ Whether or not to use response cache for the model. If True, the
//@@ responses from the model are cached and when identical request
//@@ is encountered, instead of going through the model execution,
//@@ the response from the cache is utilized. By default, response
//@@ cache is disabled for the models.
//@@
bool
enable
=
1
;
}
//@@
//@@ .. cpp:var:: message ModelMetrics
//@@
//@@ The metrics setting of this model.
//@@ NOTE: Consider reusing this message body for backend metric custom
//@@ configuration.
//@@
message
ModelMetrics
{
//@@
//@@ .. cpp:var:: message MetricControl
//@@
//@@ Override metrics settings of this model.
//@@
message
MetricControl
{
//@@
//@@ .. cpp:var:: message MetricIdentifier
//@@
//@@ Specify metrics to be overridden with metric_option.
//@@
message
MetricIdentifier
{
//@@ .. cpp:var:: string family
//@@
//@@ The name of the metric family to override with the custom value.
//@@ All core histogram metrics reported by Triton are customizable.
//@@
// https://github.com/triton-inference-server/server/blob/main/docs/user_guide/metrics.md#histograms
//@@
string
family
=
1
;
}
//@@ .. cpp:var:: message HistogramOptions
//@@
//@@ Histogram metrics options.
//@@
message
HistogramOptions
{
//@@ .. cpp:var:: double buckets (repeated)
//@@
//@@ Repeated double type in ascending order for histogram bucket
//@@ boundaries. Each bucket value represents a range less than or
//@@ equal to itself. The range greater than the largest bucket value
//@@ is allocated implicitly.
//@@ For example, [ -5.0, -2, 0, 3.5, 5 ].
//@@
repeated
double
buckets
=
1
;
}
//@@ .. cpp:var:: MetricIdentifier metric_identifier
//@@
//@@ The identifier defining metrics to be overridden with the
//@@ metric_options.
//@@
MetricIdentifier
metric_identifier
=
1
;
//@@ .. cpp:var:: oneof metric_options
//@@
//@@ The value to override the metrics defined in metric_identifier.
//@@
oneof
metric_options
{
//@@ .. cpp:var:: HistogramOptions histogram_options
//@@
//@@ Histogram options.
//@@
HistogramOptions
histogram_options
=
2
;
}
}
//@@
//@@ .. cpp::var:: MetricControl metric_control (repeated)
//@@
//@@ Optional custom configuration for selected metrics.
//@@
repeated
MetricControl
metric_control
=
1
;
}
//@@
//@@.. cpp:var:: message ModelConfig
//@@
//@@ A model configuration.
//@@
message
ModelConfig
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model.
//@@
string
name
=
1
;
//@@ .. cpp:var:: string platform
//@@
//@@ Additional backend-specific configuration for the model.
//@@ Please refer to the backend documentation on whether this field
//@@ should be specified.
//@@
string
platform
=
2
;
//@@ .. cpp:var:: string backend
//@@
//@@ The backend used by the model.
//@@
string
backend
=
17
;
//@@ .. cpp:var:: string runtime
//@@
//@@ The name of the backend library file used by the model.
//@@
string
runtime
=
25
;
//@@ .. cpp:var:: ModelVersionPolicy version_policy
//@@
//@@ Policy indicating which version(s) of the model will be served.
//@@
ModelVersionPolicy
version_policy
=
3
;
//@@ .. cpp:var:: int32 max_batch_size
//@@
//@@ Maximum batch size allowed for inference. This can only decrease
//@@ what is allowed by the model itself. A max_batch_size value of 0
//@@ indicates that batching is not allowed for the model and the
//@@ dimension/shape of the input and output tensors must exactly
//@@ match what is specified in the input and output configuration. A
//@@ max_batch_size value > 0 indicates that batching is allowed and
//@@ so the model expects the input tensors to have an additional
//@@ initial dimension for the batching that is not specified in the
//@@ input (for example, if the model supports batched inputs of
//@@ 2-dimensional tensors then the model configuration will specify
//@@ the input shape as [ X, Y ] but the model will expect the actual
//@@ input tensors to have shape [ N, X, Y ]). For max_batch_size > 0
//@@ returned outputs will also have an additional initial dimension
//@@ for the batch.
//@@
int32
max_batch_size
=
4
;
//@@ .. cpp:var:: ModelInput input (repeated)
//@@
//@@ The inputs request by the model.
//@@
repeated
ModelInput
input
=
5
;
//@@ .. cpp:var:: ModelOutput output (repeated)
//@@
//@@ The outputs produced by the model.
//@@
repeated
ModelOutput
output
=
6
;
//@@ .. cpp:var:: BatchInput batch_input (repeated)
//@@
//@@ The model input(s) that the server should use to communicate
//@@ batch related values to the model.
//@@
repeated
BatchInput
batch_input
=
20
;
//@@ .. cpp:var:: BatchOutput batch_output (repeated)
//@@
//@@ The outputs produced by the model that requires special handling
//@@ by the model backend.
//@@
repeated
BatchOutput
batch_output
=
21
;
//@@ .. cpp:var:: ModelOptimizationPolicy optimization
//@@
//@@ Optimization configuration for the model. If not specified
//@@ then default optimization policy is used.
//@@
ModelOptimizationPolicy
optimization
=
12
;
//@@ .. cpp:var:: oneof scheduling_choice
//@@
//@@ The scheduling policy for the model. If not specified the
//@@ default scheduling policy is used for the model. The default
//@@ policy is to execute each inference request independently.
//@@
oneof
scheduling_choice
{
//@@ .. cpp:var:: ModelDynamicBatching dynamic_batching
//@@
//@@ If specified, enables the dynamic-batching scheduling
//@@ policy. With dynamic-batching the scheduler may group
//@@ together independent requests into a single batch to
//@@ improve inference throughput.
//@@
ModelDynamicBatching
dynamic_batching
=
11
;
//@@ .. cpp:var:: ModelSequenceBatching sequence_batching
//@@
//@@ If specified, enables the sequence-batching scheduling
//@@ policy. With sequence-batching, inference requests
//@@ with the same correlation ID are routed to the same
//@@ model instance. Multiple sequences of inference requests
//@@ may be batched together into a single batch to
//@@ improve inference throughput.
//@@
ModelSequenceBatching
sequence_batching
=
13
;
//@@ .. cpp:var:: ModelEnsembling ensemble_scheduling
//@@
//@@ If specified, enables the model-ensembling scheduling
//@@ policy. With model-ensembling, inference requests
//@@ will be processed according to the specification, such as an
//@@ execution sequence of models. The input specified in this model
//@@ config will be the input for the ensemble, and the output
//@@ specified will be the output of the ensemble.
//@@
ModelEnsembling
ensemble_scheduling
=
15
;
}
//@@ .. cpp:var:: ModelInstanceGroup instance_group (repeated)
//@@
//@@ Instances of this model. If not specified, one instance
//@@ of the model will be instantiated on each available GPU.
//@@
repeated
ModelInstanceGroup
instance_group
=
7
;
//@@ .. cpp:var:: string default_model_filename
//@@
//@@ Optional filename of the model file to use if a
//@@ compute-capability specific model is not specified in
//@@ :cpp:var:`cc_model_filenames`. If not specified the default name
//@@ is 'model.graphdef', 'model.savedmodel', 'model.plan' or
//@@ 'model.pt' depending on the model type.
//@@
string
default_model_filename
=
8
;
//@@ .. cpp:var:: map<string,string> cc_model_filenames
//@@
//@@ Optional map from CUDA compute capability to the filename of
//@@ the model that supports that compute capability. The filename
//@@ refers to a file within the model version directory.
//@@
map
<
string
,
string
>
cc_model_filenames
=
9
;
//@@ .. cpp:var:: map<string,string> metric_tags
//@@
//@@ Optional metric tags. User-specific key-value pairs for metrics
//@@ reported for this model. These tags are applied to the metrics
//@@ reported on the HTTP metrics port.
//@@
map
<
string
,
string
>
metric_tags
=
10
;
//@@ .. cpp:var:: map<string,ModelParameter> parameters
//@@
//@@ Optional model parameters. User-specified parameter values.
//@@
map
<
string
,
ModelParameter
>
parameters
=
14
;
//@@ .. cpp:var:: ModelWarmup model_warmup (repeated)
//@@
//@@ Warmup setting of this model. If specified, all instances
//@@ will be run with the request samples in sequence before
//@@ serving the model.
//@@ This field can only be specified if the model is not an ensemble
//@@ model.
//@@
repeated
ModelWarmup
model_warmup
=
16
;
//@@ .. cpp:var:: ModelOperations model_operations
//@@
//@@ Optional metadata of the libraries providing custom operations for
//@@ this model.
//@@
ModelOperations
model_operations
=
18
;
//@@ .. cpp:var:: ModelTransactionPolicy model_transaction_policy
//@@
//@@ Optional specification that describes the nature of transactions
//@@ to be expected from the model.
//@@
ModelTransactionPolicy
model_transaction_policy
=
19
;
//@@ .. cpp:var:: ModelRepositoryAgents model_repository_agents
//@@
//@@ Optional specification of the agent(s) that should be invoked
//@@ with repository actions are performed for this model.
//@@
ModelRepositoryAgents
model_repository_agents
=
23
;
//@@ .. cpp:var:: ModelResponseCache response_cache
//@@
//@@ Optional setting for utilizing the response cache for this
//@@ model.
//@@
ModelResponseCache
response_cache
=
24
;
//@@ .. cpp:var:: ModelMetrics model_metrics
//@@
//@@ Optional setting for custom metrics configuration for this model.
//@@ Application default is applied to metrics that are not specified.
//@@
ModelMetrics
model_metrics
=
26
;
}
lib/llm/src/grpc/service.rs
0 → 100644
View file @
91a459c0
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
pub
mod
kserve
;
pub
mod
openai
;
lib/llm/src/grpc/service/kserve.rs
0 → 100644
View file @
91a459c0
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
std
::
pin
::
Pin
;
use
std
::
sync
::
Arc
;
use
crate
::
grpc
::
service
::
kserve
::
inference
::
DataType
;
use
crate
::
grpc
::
service
::
kserve
::
inference
::
ModelInput
;
use
crate
::
grpc
::
service
::
kserve
::
inference
::
ModelOutput
;
use
crate
::
http
::
service
::
Metrics
;
use
crate
::
http
::
service
::
metrics
;
use
crate
::
discovery
::
ModelManager
;
use
crate
::
request_template
::
RequestTemplate
;
use
anyhow
::
Result
;
use
derive_builder
::
Builder
;
use
dynamo_async_openai
::
types
::{
CompletionFinishReason
,
CreateCompletionRequest
,
Prompt
};
use
dynamo_runtime
::
transports
::
etcd
;
use
futures
::
pin_mut
;
use
tokio
::
task
::
JoinHandle
;
use
tokio_stream
::{
Stream
,
StreamExt
};
use
tokio_util
::
sync
::
CancellationToken
;
use
crate
::
grpc
::
service
::
openai
::{
completion_response_stream
,
get_parsing_options
};
use
tonic
::{
Request
,
Response
,
Status
,
transport
::
Server
};
use
crate
::
protocols
::
openai
::
completions
::{
NvCreateCompletionRequest
,
NvCreateCompletionResponse
,
};
pub
mod
inference
{
tonic
::
include_proto!
(
"inference"
);
}
use
inference
::
grpc_inference_service_server
::{
GrpcInferenceService
,
GrpcInferenceServiceServer
};
use
inference
::{
InferParameter
,
ModelConfig
,
ModelConfigRequest
,
ModelConfigResponse
,
ModelInferRequest
,
ModelInferResponse
,
ModelMetadataRequest
,
ModelMetadataResponse
,
ModelStreamInferResponse
,
};
/// [gluo TODO] 'metrics' are for HTTP service and there is HTTP endpoint
/// for it as part of HTTP service. Should we always start HTTP service up
/// for non-inference?
pub
struct
State
{
metrics
:
Arc
<
Metrics
>
,
manager
:
Arc
<
ModelManager
>
,
etcd_client
:
Option
<
etcd
::
Client
>
,
}
impl
State
{
pub
fn
new
(
manager
:
Arc
<
ModelManager
>
)
->
Self
{
Self
{
manager
,
metrics
:
Arc
::
new
(
Metrics
::
default
()),
etcd_client
:
None
,
}
}
pub
fn
new_with_etcd
(
manager
:
Arc
<
ModelManager
>
,
etcd_client
:
Option
<
etcd
::
Client
>
)
->
Self
{
Self
{
manager
,
metrics
:
Arc
::
new
(
Metrics
::
default
()),
etcd_client
,
}
}
/// Get the Prometheus [`Metrics`] object which tracks request counts and inflight requests
pub
fn
metrics_clone
(
&
self
)
->
Arc
<
Metrics
>
{
self
.metrics
.clone
()
}
pub
fn
manager
(
&
self
)
->
&
ModelManager
{
Arc
::
as_ref
(
&
self
.manager
)
}
pub
fn
manager_clone
(
&
self
)
->
Arc
<
ModelManager
>
{
self
.manager
.clone
()
}
pub
fn
etcd_client
(
&
self
)
->
Option
<&
etcd
::
Client
>
{
self
.etcd_client
.as_ref
()
}
}
#[derive(Clone)]
pub
struct
KserveService
{
// The state we share with every request handler
state
:
Arc
<
State
>
,
port
:
u16
,
host
:
String
,
request_template
:
Option
<
RequestTemplate
>
,
}
#[derive(Clone,
Builder)]
#[builder(pattern
=
"owned"
,
build_fn(private,
name
=
"build_internal"
))]
pub
struct
KserveServiceConfig
{
#[builder(default
=
"8787"
)]
port
:
u16
,
#[builder(setter(into),
default
=
"String::from(
\"
0.0.0.0
\"
)"
)]
host
:
String
,
#[builder(default
=
"None"
)]
request_template
:
Option
<
RequestTemplate
>
,
#[builder(default
=
"None"
)]
etcd_client
:
Option
<
etcd
::
Client
>
,
}
impl
KserveService
{
pub
fn
builder
()
->
KserveServiceConfigBuilder
{
KserveServiceConfigBuilder
::
default
()
}
pub
fn
state_clone
(
&
self
)
->
Arc
<
State
>
{
self
.state
.clone
()
}
pub
fn
state
(
&
self
)
->
&
State
{
Arc
::
as_ref
(
&
self
.state
)
}
pub
fn
model_manager
(
&
self
)
->
&
ModelManager
{
self
.state
()
.manager
()
}
pub
async
fn
spawn
(
&
self
,
cancel_token
:
CancellationToken
)
->
JoinHandle
<
Result
<
()
>>
{
let
this
=
self
.clone
();
tokio
::
spawn
(
async
move
{
this
.run
(
cancel_token
)
.await
})
}
pub
async
fn
run
(
&
self
,
cancel_token
:
CancellationToken
)
->
Result
<
()
>
{
let
address
=
format!
(
"{}:{}"
,
self
.host
,
self
.port
);
tracing
::
info!
(
address
,
"Starting KServe gRPC service on: {address}"
);
let
observer
=
cancel_token
.child_token
();
Server
::
builder
()
.add_service
(
GrpcInferenceServiceServer
::
new
(
self
.clone
()))
.serve_with_shutdown
(
address
.parse
()
?
,
observer
.cancelled_owned
())
.await
.inspect_err
(|
_
|
cancel_token
.cancel
())
?
;
Ok
(())
}
}
impl
KserveServiceConfigBuilder
{
pub
fn
build
(
self
)
->
Result
<
KserveService
,
anyhow
::
Error
>
{
let
config
:
KserveServiceConfig
=
self
.build_internal
()
?
;
let
model_manager
=
Arc
::
new
(
ModelManager
::
new
());
let
state
=
Arc
::
new
(
State
::
new_with_etcd
(
model_manager
,
config
.etcd_client
));
// enable prometheus metrics
let
registry
=
metrics
::
Registry
::
new
();
state
.metrics_clone
()
.register
(
&
registry
)
?
;
Ok
(
KserveService
{
state
,
port
:
config
.port
,
host
:
config
.host
,
request_template
:
config
.request_template
,
})
}
pub
fn
with_request_template
(
mut
self
,
request_template
:
Option
<
RequestTemplate
>
)
->
Self
{
self
.request_template
=
Some
(
request_template
);
self
}
pub
fn
with_etcd_client
(
mut
self
,
etcd_client
:
Option
<
etcd
::
Client
>
)
->
Self
{
self
.etcd_client
=
Some
(
etcd_client
);
self
}
}
#[tonic::async_trait]
impl
GrpcInferenceService
for
KserveService
{
async
fn
model_infer
(
&
self
,
request
:
Request
<
ModelInferRequest
>
,
)
->
Result
<
Response
<
ModelInferResponse
>
,
Status
>
{
let
request
=
request
.into_inner
();
let
request_id
=
request
.id
.clone
();
let
mut
completion_request
:
NvCreateCompletionRequest
=
request
.try_into
()
.map_err
(|
e
|
Status
::
invalid_argument
(
format!
(
"Failed to parse request: {}"
,
e
)))
?
;
if
completion_request
.inner.stream
.unwrap_or
(
false
)
{
// return error that streaming is not supported
return
Err
(
Status
::
invalid_argument
(
"Streaming is not supported for this endpoint"
,
));
}
// Apply template values if present
if
let
Some
(
template
)
=
self
.request_template
.as_ref
()
{
if
completion_request
.inner.model
.is_empty
()
{
completion_request
.inner.model
=
template
.model
.clone
();
}
if
completion_request
.inner.temperature
.unwrap_or
(
0.0
)
==
0.0
{
completion_request
.inner.temperature
=
Some
(
template
.temperature
);
}
if
completion_request
.inner.max_tokens
.unwrap_or
(
0
)
==
0
{
completion_request
.inner.max_tokens
=
Some
(
template
.max_completion_tokens
);
}
}
let
model
=
completion_request
.inner.model
.clone
();
let
parsing_options
=
get_parsing_options
(
self
.state
.manager
(),
&
model
);
let
stream
=
completion_response_stream
(
self
.state_clone
(),
completion_request
)
.await
?
;
let
completion_response
=
NvCreateCompletionResponse
::
from_annotated_stream
(
stream
,
parsing_options
)
.await
.map_err
(|
e
|
{
tracing
::
error!
(
"Failed to fold completions stream: {:?}"
,
e
);
Status
::
internal
(
"Failed to fold completions stream"
)
})
?
;
let
mut
reply
:
ModelInferResponse
=
completion_response
.try_into
()
.map_err
(|
e
|
Status
::
invalid_argument
(
format!
(
"Failed to parse response: {}"
,
e
)))
?
;
reply
.id
=
request_id
;
Ok
(
Response
::
new
(
reply
))
}
type
ModelStreamInferStream
=
Pin
<
Box
<
dyn
Stream
<
Item
=
Result
<
ModelStreamInferResponse
,
Status
>>
+
Send
+
'static
>>
;
async
fn
model_stream_infer
(
&
self
,
request
:
Request
<
tonic
::
Streaming
<
ModelInferRequest
>>
,
)
->
Result
<
Response
<
Self
::
ModelStreamInferStream
>
,
Status
>
{
let
mut
request_stream
=
request
.into_inner
();
let
state
=
self
.state_clone
();
let
template
=
self
.request_template
.clone
();
let
output
=
async_stream
::
try_stream!
{
// [gluo FIXME] should be able to demux request / response streaming
// await requests in a separate task until cancellation / completion,
// and passing AsyncEngineStream for each request to the response stream
// which will be collectively polling.
while
let
Some
(
request
)
=
request_stream
.next
()
.await
{
// Must keep track of 'request_id' which will be returned in corresponding response
let
request_id
:
String
;
let
mut
completion_request
:
NvCreateCompletionRequest
=
match
request
{
Err
(
e
)
=>
{
tracing
::
error!
(
"Unexpected gRPC failed to read request: {}"
,
e
);
yield
ModelStreamInferResponse
{
error_message
:
e
.to_string
(),
infer_response
:
None
};
continue
;
}
Ok
(
request
)
=>
{
request_id
=
request
.id
.clone
();
request
.try_into
()
.map_err
(|
e
|
{
Status
::
invalid_argument
(
format!
(
"Failed to parse request: {}"
,
e
))
})
?
}
};
// Apply template values if present
if
let
Some
(
template
)
=
&
template
{
if
completion_request
.inner.model
.is_empty
()
{
completion_request
.inner.model
=
template
.model
.clone
();
}
if
completion_request
.inner.temperature
.unwrap_or
(
0.0
)
==
0.0
{
completion_request
.inner.temperature
=
Some
(
template
.temperature
);
}
if
completion_request
.inner.max_tokens
.unwrap_or
(
0
)
==
0
{
completion_request
.inner.max_tokens
=
Some
(
template
.max_completion_tokens
);
}
}
let
model
=
completion_request
.inner.model
.clone
();
let
parsing_options
=
get_parsing_options
(
state
.manager
(),
&
model
);
let
streaming
=
completion_request
.inner.stream
.unwrap_or
(
false
);
let
stream
=
completion_response_stream
(
state
.clone
(),
completion_request
)
.await
?
;
if
streaming
{
pin_mut!
(
stream
);
while
let
Some
(
response
)
=
stream
.next
()
.await
{
match
response
.data
{
Some
(
data
)
=>
{
let
mut
reply
=
ModelStreamInferResponse
::
try_from
(
data
)
.map_err
(|
e
|
{
Status
::
invalid_argument
(
format!
(
"Failed to parse response: {}"
,
e
))
})
?
;
if
reply
.infer_response
.is_some
()
{
reply
.infer_response
.as_mut
()
.unwrap
()
.id
=
request_id
.clone
();
}
yield
reply
;
},
None
=>
{
// Skip if no data is present, the response is for annotation
},
}
}
}
else
{
let
completion_response
=
NvCreateCompletionResponse
::
from_annotated_stream
(
stream
,
parsing_options
)
.await
.map_err
(|
e
|
{
tracing
::
error!
(
"Failed to fold completions stream: {:?}"
,
e
);
Status
::
internal
(
"Failed to fold completions stream"
)
})
?
;
let
mut
response
:
ModelStreamInferResponse
=
completion_response
.try_into
()
.map_err
(|
e
|
{
Status
::
invalid_argument
(
format!
(
"Failed to parse response: {}"
,
e
))
})
?
;
if
response
.infer_response
.is_some
()
{
response
.infer_response
.as_mut
()
.unwrap
()
.id
=
request_id
.clone
();
}
yield
response
;
}
}
};
Ok
(
Response
::
new
(
Box
::
pin
(
output
)
as
Self
::
ModelStreamInferStream
))
}
async
fn
model_metadata
(
&
self
,
request
:
Request
<
ModelMetadataRequest
>
,
)
->
Result
<
Response
<
ModelMetadataResponse
>
,
Status
>
{
let
models
=
self
.state
.manager
()
.list_completions_models
();
let
request_model_name
=
&
request
.into_inner
()
.name
;
if
let
Some
(
model_name
)
=
models
.into_iter
()
.find
(|
n
|
request_model_name
==
n
)
{
return
Ok
(
Response
::
new
(
ModelMetadataResponse
{
name
:
model_name
,
versions
:
vec!
[
"1"
.to_string
()],
platform
:
"dynamo"
.to_string
(),
inputs
:
vec!
[
inference
::
model_metadata_response
::
TensorMetadata
{
name
:
"text_input"
.to_string
(),
datatype
:
"BYTES"
.to_string
(),
shape
:
vec!
[
1
],
},
inference
::
model_metadata_response
::
TensorMetadata
{
name
:
"streaming"
.to_string
(),
datatype
:
"BOOL"
.to_string
(),
shape
:
vec!
[
1
],
},
],
outputs
:
vec!
[
inference
::
model_metadata_response
::
TensorMetadata
{
name
:
"text_output"
.to_string
(),
datatype
:
"BYTES"
.to_string
(),
shape
:
vec!
[
-
1
],
},
inference
::
model_metadata_response
::
TensorMetadata
{
name
:
"finish_reason"
.to_string
(),
datatype
:
"BYTES"
.to_string
(),
shape
:
vec!
[
-
1
],
},
],
}));
}
Err
(
Status
::
not_found
(
format!
(
"Model '{}' not found"
,
request_model_name
)))
}
async
fn
model_config
(
&
self
,
request
:
Request
<
ModelConfigRequest
>
,
)
->
Result
<
Response
<
ModelConfigResponse
>
,
Status
>
{
let
models
=
self
.state
.manager
()
.list_completions_models
();
let
request_model_name
=
&
request
.into_inner
()
.name
;
if
let
Some
(
model_name
)
=
models
.into_iter
()
.find
(|
n
|
request_model_name
==
n
)
{
let
config
=
ModelConfig
{
name
:
model_name
,
platform
:
"dynamo"
.to_string
(),
backend
:
"dynamo"
.to_string
(),
input
:
vec!
[
ModelInput
{
name
:
"text_input"
.to_string
(),
data_type
:
DataType
::
TypeString
as
i32
,
dims
:
vec!
[
1
],
..
Default
::
default
()
},
ModelInput
{
name
:
"streaming"
.to_string
(),
data_type
:
DataType
::
TypeBool
as
i32
,
dims
:
vec!
[
1
],
optional
:
true
,
..
Default
::
default
()
},
],
output
:
vec!
[
ModelOutput
{
name
:
"text_output"
.to_string
(),
data_type
:
DataType
::
TypeString
as
i32
,
dims
:
vec!
[
-
1
],
..
Default
::
default
()
},
ModelOutput
{
name
:
"finish_reason"
.to_string
(),
data_type
:
DataType
::
TypeString
as
i32
,
dims
:
vec!
[
-
1
],
..
Default
::
default
()
},
],
..
Default
::
default
()
};
return
Ok
(
Response
::
new
(
ModelConfigResponse
{
config
:
Some
(
config
),
}));
}
Err
(
Status
::
not_found
(
format!
(
"Model '{}' not found"
,
request_model_name
)))
}
}
impl
TryFrom
<
ModelInferRequest
>
for
NvCreateCompletionRequest
{
type
Error
=
Status
;
fn
try_from
(
request
:
ModelInferRequest
)
->
Result
<
Self
,
Self
::
Error
>
{
// Protocol requires if `raw_input_contents` is used to hold input data,
// it must be used for all inputs.
if
!
request
.raw_input_contents
.is_empty
()
&&
request
.inputs
.len
()
!=
request
.raw_input_contents
.len
()
{
return
Err
(
Status
::
invalid_argument
(
"`raw_input_contents` must be used for all inputs"
,
));
}
// iterate through inputs
let
mut
text_input
=
None
;
let
mut
stream
=
false
;
for
(
idx
,
input
)
in
request
.inputs
.iter
()
.enumerate
()
{
match
input
.name
.as_str
()
{
"text_input"
=>
{
if
input
.datatype
!=
"BYTES"
{
return
Err
(
Status
::
invalid_argument
(
format!
(
"Expected 'text_input' to be of type BYTES for string input, got {:?}"
,
input
.datatype
)));
}
if
input
.shape
!=
vec!
[
1
]
&&
input
.shape
!=
vec!
[
1
,
1
]
{
return
Err
(
Status
::
invalid_argument
(
format!
(
"Expected 'text_input' to have shape [1], got {:?}"
,
input
.shape
)));
}
match
&
input
.contents
{
Some
(
content
)
=>
{
let
bytes
=
&
content
.bytes_contents
[
0
];
text_input
=
Some
(
String
::
from_utf8_lossy
(
bytes
)
.to_string
());
}
None
=>
{
let
raw_input
=
request
.raw_input_contents
.get
(
idx
)
.ok_or_else
(||
{
Status
::
invalid_argument
(
"Missing raw input for 'text_input'"
)
})
?
;
if
raw_input
.len
()
<
4
{
return
Err
(
Status
::
invalid_argument
(
"'text_input' raw input must be length-prefixed (>= 4 bytes)"
,
));
}
// We restrict the 'text_input' only contain one element, only need to
// parse the first element. Skip first four bytes that is used to store
// the length of the input.
text_input
=
Some
(
String
::
from_utf8_lossy
(
&
raw_input
[
4
..
])
.to_string
());
}
}
}
"streaming"
|
"stream"
=>
{
if
input
.datatype
!=
"BOOL"
{
return
Err
(
Status
::
invalid_argument
(
format!
(
"Expected '{}' to be of type BOOL, got {:?}"
,
input
.name
,
input
.datatype
)));
}
if
input
.shape
!=
vec!
[
1
]
{
return
Err
(
Status
::
invalid_argument
(
format!
(
"Expected 'stream' to have shape [1], got {:?}"
,
input
.shape
)));
}
match
&
input
.contents
{
Some
(
content
)
=>
{
stream
=
content
.bool_contents
[
0
];
}
None
=>
{
let
raw_input
=
request
.raw_input_contents
.get
(
idx
)
.ok_or_else
(||
{
Status
::
invalid_argument
(
"Missing raw input for 'stream'"
)
})
?
;
if
raw_input
.is_empty
()
{
return
Err
(
Status
::
invalid_argument
(
"'stream' raw input must contain at least one byte"
,
));
}
stream
=
raw_input
[
0
]
!=
0
;
}
}
}
_
=>
{
return
Err
(
Status
::
invalid_argument
(
format!
(
"Invalid input name: {}, supported inputs are 'text_input', 'stream'"
,
input
.name
)));
}
}
}
// return error if text_input is None
let
text_input
=
match
text_input
{
Some
(
input
)
=>
input
,
None
=>
{
return
Err
(
Status
::
invalid_argument
(
"Missing required input: 'text_input'"
,
));
}
};
Ok
(
NvCreateCompletionRequest
{
inner
:
CreateCompletionRequest
{
model
:
request
.model_name
,
prompt
:
Prompt
::
String
(
text_input
),
stream
:
Some
(
stream
),
user
:
if
request
.id
.is_empty
()
{
None
}
else
{
Some
(
request
.id
.clone
())
},
..
Default
::
default
()
},
common
:
Default
::
default
(),
nvext
:
None
,
})
}
}
impl
TryFrom
<
NvCreateCompletionResponse
>
for
ModelInferResponse
{
type
Error
=
anyhow
::
Error
;
fn
try_from
(
response
:
NvCreateCompletionResponse
)
->
Result
<
Self
,
Self
::
Error
>
{
let
mut
outputs
=
vec!
[];
let
mut
text_output
=
vec!
[];
let
mut
finish_reason
=
vec!
[];
for
choice
in
&
response
.inner.choices
{
text_output
.push
(
choice
.text
.clone
());
if
let
Some
(
reason
)
=
choice
.finish_reason
.as_ref
()
{
match
reason
{
CompletionFinishReason
::
Stop
=>
{
finish_reason
.push
(
"stop"
.to_string
());
}
CompletionFinishReason
::
Length
=>
{
finish_reason
.push
(
"length"
.to_string
());
}
CompletionFinishReason
::
ContentFilter
=>
{
finish_reason
.push
(
"content_filter"
.to_string
());
}
}
}
}
outputs
.push
(
inference
::
model_infer_response
::
InferOutputTensor
{
name
:
"text_output"
.to_string
(),
datatype
:
"BYTES"
.to_string
(),
shape
:
vec!
[
text_output
.len
()
as
i64
],
contents
:
Some
(
inference
::
InferTensorContents
{
bytes_contents
:
text_output
.into_iter
()
.map
(|
text
|
text
.as_bytes
()
.to_vec
())
.collect
(),
..
Default
::
default
()
}),
..
Default
::
default
()
});
outputs
.push
(
inference
::
model_infer_response
::
InferOutputTensor
{
name
:
"finish_reason"
.to_string
(),
datatype
:
"BYTES"
.to_string
(),
shape
:
vec!
[
finish_reason
.len
()
as
i64
],
contents
:
Some
(
inference
::
InferTensorContents
{
bytes_contents
:
finish_reason
.into_iter
()
.map
(|
text
|
text
.as_bytes
()
.to_vec
())
.collect
(),
..
Default
::
default
()
}),
..
Default
::
default
()
});
Ok
(
ModelInferResponse
{
model_name
:
response
.inner.model
,
model_version
:
"1"
.to_string
(),
id
:
response
.inner.id
,
outputs
,
parameters
:
::
std
::
collections
::
HashMap
::
<
String
,
InferParameter
>
::
new
(),
raw_output_contents
:
vec!
[],
})
}
}
impl
TryFrom
<
NvCreateCompletionResponse
>
for
ModelStreamInferResponse
{
type
Error
=
anyhow
::
Error
;
fn
try_from
(
response
:
NvCreateCompletionResponse
)
->
Result
<
Self
,
Self
::
Error
>
{
match
ModelInferResponse
::
try_from
(
response
)
{
Ok
(
response
)
=>
Ok
(
ModelStreamInferResponse
{
infer_response
:
Some
(
response
),
..
Default
::
default
()
}),
Err
(
e
)
=>
Ok
(
ModelStreamInferResponse
{
infer_response
:
None
,
error_message
:
format!
(
"Failed to convert response: {}"
,
e
),
}),
}
}
}
lib/llm/src/grpc/service/openai.rs
0 → 100644
View file @
91a459c0
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
dynamo_runtime
::{
engine
::
AsyncEngineContext
,
pipeline
::{
AsyncEngineContextProvider
,
Context
},
protocols
::
annotated
::
AnnotationsProvider
,
};
use
futures
::{
Stream
,
StreamExt
,
stream
};
use
std
::
sync
::
Arc
;
use
crate
::
discovery
::
ModelManager
;
use
crate
::
protocols
::
openai
::{
ParsingOptions
,
completions
::{
NvCreateCompletionRequest
,
NvCreateCompletionResponse
},
};
use
crate
::
types
::
Annotated
;
use
super
::
kserve
;
// [gluo NOTE] These are common utilities that should be shared between frontends
use
crate
::
http
::
service
::{
disconnect
::{
ConnectionHandle
,
create_connection_monitor
},
metrics
::{
Endpoint
,
ResponseMetricCollector
},
};
use
crate
::{
http
::
service
::
metrics
::
InflightGuard
,
preprocessor
::
LLMMetricAnnotation
};
use
tonic
::
Status
;
/// Dynamo Annotation for the request ID
pub
const
ANNOTATION_REQUEST_ID
:
&
str
=
"request_id"
;
// [gluo NOTE] strip down version of lib/llm/src/http/service/openai.rs
// dupliating it here as the original file has coupling with HTTP objects.
/// OpenAI Completions Request Handler
///
/// This method will handle the incoming request for the `/v1/completions endpoint`. The endpoint is a "source"
/// for an [`super::OpenAICompletionsStreamingEngine`] and will return a stream of
/// responses which will be forward to the client.
///
/// Note: For all requests, streaming or non-streaming, we always call the engine with streaming enabled. For
/// non-streaming requests, we will fold the stream into a single response as part of this handler.
pub
async
fn
completion_response_stream
(
state
:
Arc
<
kserve
::
State
>
,
request
:
NvCreateCompletionRequest
,
)
->
Result
<
impl
Stream
<
Item
=
Annotated
<
NvCreateCompletionResponse
>>
,
Status
>
{
// create the context for the request
// [WIP] from request id.
let
request_id
=
get_or_create_request_id
(
request
.inner.user
.as_deref
());
let
request
=
Context
::
with_id
(
request
,
request_id
.clone
());
let
context
=
request
.context
();
// create the connection handles
let
(
mut
connection_handle
,
stream_handle
)
=
create_connection_monitor
(
context
.clone
())
.await
;
let
streaming
=
request
.inner.stream
.unwrap_or
(
false
);
// update the request to always stream
let
request
=
request
.map
(|
mut
req
|
{
req
.inner.stream
=
Some
(
true
);
req
});
// todo - make the protocols be optional for model name
// todo - when optional, if none, apply a default
let
model
=
&
request
.inner.model
;
// todo - error handling should be more robust
let
engine
=
state
.manager
()
.get_completions_engine
(
model
)
.map_err
(|
_
|
Status
::
not_found
(
"model not found"
))
?
;
let
inflight_guard
=
state
.metrics_clone
()
.create_inflight_guard
(
model
,
Endpoint
::
Completions
,
streaming
);
let
mut
response_collector
=
state
.metrics_clone
()
.create_response_collector
(
model
);
// prepare to process any annotations
let
annotations
=
request
.annotations
();
// issue the generate call on the engine
let
stream
=
engine
.generate
(
request
)
.await
.map_err
(|
e
|
Status
::
internal
(
format!
(
"Failed to generate completions: {}"
,
e
)))
?
;
// capture the context to cancel the stream if the client disconnects
let
ctx
=
stream
.context
();
// prepare any requested annotations
let
annotations
=
annotations
.map_or
(
Vec
::
new
(),
|
annotations
|
{
annotations
.iter
()
.filter_map
(|
annotation
|
{
if
annotation
==
ANNOTATION_REQUEST_ID
{
Annotated
::
<
NvCreateCompletionResponse
>
::
from_annotation
(
ANNOTATION_REQUEST_ID
,
&
request_id
,
)
.ok
()
}
else
{
None
}
})
.collect
::
<
Vec
<
_
>>
()
});
// apply any annotations to the front of the stream
let
stream
=
stream
::
iter
(
annotations
)
.chain
(
stream
);
// Tap on the stream to collect response metrics
let
stream
=
stream
.inspect
(
move
|
response
|
{
process_metrics_only
(
response
,
&
mut
response_collector
);
});
let
stream
=
grpc_monitor_for_disconnects
(
stream
,
ctx
,
inflight_guard
,
stream_handle
);
// if we got here, then we will return a response and the potentially long running task has completed successfully
// without need to be cancelled.
connection_handle
.disarm
();
Ok
(
stream
)
}
/// This method will consume an AsyncEngineStream and monitor for disconnects or context cancellation.
/// This is gRPC variant of `monitor_for_disconnects` as that implementation has SSE specific handling.
/// Should decouple and reuse `monitor_for_disconnects`
///
/// Uses `tokio::select!` to choose between receiving responses from the source stream or detecting when
/// the context is stopped. If the context is stopped, we break the stream. If the source stream ends
/// naturally, we mark the request as successful and send the final `[DONE]` event.
pub
fn
grpc_monitor_for_disconnects
<
T
>
(
stream
:
impl
Stream
<
Item
=
Annotated
<
T
>>
,
context
:
Arc
<
dyn
AsyncEngineContext
>
,
mut
inflight_guard
:
InflightGuard
,
mut
stream_handle
:
ConnectionHandle
,
)
->
impl
Stream
<
Item
=
Annotated
<
T
>>
{
stream_handle
.arm
();
async_stream
::
stream!
{
tokio
::
pin!
(
stream
);
loop
{
tokio
::
select!
{
event
=
stream
.next
()
=>
{
match
event
{
Some
(
response
)
=>
{
yield
response
;
}
None
=>
{
// Stream ended normally
inflight_guard
.mark_ok
();
stream_handle
.disarm
();
break
;
}
}
}
_
=
context
.stopped
()
=>
{
tracing
::
trace!
(
"Context stopped; breaking stream"
);
break
;
}
}
}
}
}
fn
process_metrics_only
<
T
>
(
annotated
:
&
Annotated
<
T
>
,
response_collector
:
&
mut
ResponseMetricCollector
,
)
{
// update metrics
if
let
Ok
(
Some
(
metrics
))
=
LLMMetricAnnotation
::
from_annotation
(
annotated
)
{
response_collector
.observe_current_osl
(
metrics
.output_tokens
);
response_collector
.observe_response
(
metrics
.input_tokens
,
metrics
.chunk_tokens
);
}
}
/// Get the request ID from a primary source, or lastly create a new one if not present
fn
get_or_create_request_id
(
primary
:
Option
<&
str
>
)
->
String
{
// Try to get the request ID from the primary source
if
let
Some
(
primary
)
=
primary
&&
let
Ok
(
uuid
)
=
uuid
::
Uuid
::
parse_str
(
primary
)
{
return
uuid
.to_string
();
}
// Try to parse the request ID as a UUID, or generate a new one if missing/invalid
let
uuid
=
uuid
::
Uuid
::
new_v4
();
uuid
.to_string
()
}
pub
fn
get_parsing_options
(
manager
:
&
ModelManager
,
model
:
&
str
)
->
ParsingOptions
{
let
tool_call_parser
=
manager
.get_model_tool_call_parser
(
model
);
let
reasoning_parser
=
None
;
// TODO: Implement reasoning parser
ParsingOptions
::
new
(
tool_call_parser
,
reasoning_parser
)
}
lib/llm/src/http/service/openai.rs
View file @
91a459c0
...
@@ -31,7 +31,6 @@ use super::{
...
@@ -31,7 +31,6 @@ use super::{
metrics
::{
Endpoint
,
ResponseMetricCollector
},
metrics
::{
Endpoint
,
ResponseMetricCollector
},
service_v2
,
service_v2
,
};
};
use
crate
::
preprocessor
::
LLMMetricAnnotation
;
use
crate
::
protocols
::
openai
::
chat_completions
::
aggregator
::
ChatCompletionAggregator
;
use
crate
::
protocols
::
openai
::
chat_completions
::
aggregator
::
ChatCompletionAggregator
;
use
crate
::
protocols
::
openai
::{
use
crate
::
protocols
::
openai
::{
ParsingOptions
,
ParsingOptions
,
...
@@ -42,6 +41,7 @@ use crate::protocols::openai::{
...
@@ -42,6 +41,7 @@ use crate::protocols::openai::{
};
};
use
crate
::
request_template
::
RequestTemplate
;
use
crate
::
request_template
::
RequestTemplate
;
use
crate
::
types
::
Annotated
;
use
crate
::
types
::
Annotated
;
use
crate
::{
discovery
::
ModelManager
,
preprocessor
::
LLMMetricAnnotation
};
use
dynamo_runtime
::
logging
::
get_distributed_tracing_context
;
use
dynamo_runtime
::
logging
::
get_distributed_tracing_context
;
use
tracing
::
Instrument
;
use
tracing
::
Instrument
;
...
@@ -195,8 +195,8 @@ fn get_or_create_request_id(primary: Option<&str>, headers: &HeaderMap) -> Strin
...
@@ -195,8 +195,8 @@ fn get_or_create_request_id(primary: Option<&str>, headers: &HeaderMap) -> Strin
uuid
.to_string
()
uuid
.to_string
()
}
}
fn
get_parsing_options
(
state
:
&
Arc
<
service_v2
::
State
>
,
model
:
&
str
)
->
ParsingOptions
{
fn
get_parsing_options
(
manager
:
&
ModelManager
,
model
:
&
str
)
->
ParsingOptions
{
let
tool_call_parser
=
state
.
manager
()
.get_model_tool_call_parser
(
model
);
let
tool_call_parser
=
manager
.get_model_tool_call_parser
(
model
);
let
reasoning_parser
=
None
;
// TODO: Implement reasoning parser
let
reasoning_parser
=
None
;
// TODO: Implement reasoning parser
ParsingOptions
::
new
(
tool_call_parser
,
reasoning_parser
)
ParsingOptions
::
new
(
tool_call_parser
,
reasoning_parser
)
...
@@ -274,7 +274,7 @@ async fn completions(
...
@@ -274,7 +274,7 @@ async fn completions(
.get_completions_engine
(
model
)
.get_completions_engine
(
model
)
.map_err
(|
_
|
ErrorMessage
::
model_not_found
())
?
;
.map_err
(|
_
|
ErrorMessage
::
model_not_found
())
?
;
let
parsing_options
=
get_parsing_options
(
&
state
,
model
);
let
parsing_options
=
get_parsing_options
(
state
.manager
()
,
model
);
let
mut
inflight_guard
=
let
mut
inflight_guard
=
state
state
...
@@ -501,7 +501,7 @@ async fn chat_completions(
...
@@ -501,7 +501,7 @@ async fn chat_completions(
.get_chat_completions_engine
(
model
)
.get_chat_completions_engine
(
model
)
.map_err
(|
_
|
ErrorMessage
::
model_not_found
())
?
;
.map_err
(|
_
|
ErrorMessage
::
model_not_found
())
?
;
let
parsing_options
=
get_parsing_options
(
&
state
,
model
);
let
parsing_options
=
get_parsing_options
(
state
.manager
()
,
model
);
let
mut
inflight_guard
=
let
mut
inflight_guard
=
state
state
...
@@ -736,7 +736,7 @@ async fn responses(
...
@@ -736,7 +736,7 @@ async fn responses(
.get_chat_completions_engine
(
model
)
.get_chat_completions_engine
(
model
)
.map_err
(|
_
|
ErrorMessage
::
model_not_found
())
?
;
.map_err
(|
_
|
ErrorMessage
::
model_not_found
())
?
;
let
parsing_options
=
get_parsing_options
(
&
state
,
model
);
let
parsing_options
=
get_parsing_options
(
state
.manager
()
,
model
);
let
mut
inflight_guard
=
let
mut
inflight_guard
=
state
state
...
...
lib/llm/src/lib.rs
View file @
91a459c0
...
@@ -18,6 +18,7 @@ pub mod endpoint_type;
...
@@ -18,6 +18,7 @@ pub mod endpoint_type;
pub
mod
engines
;
pub
mod
engines
;
pub
mod
entrypoint
;
pub
mod
entrypoint
;
pub
mod
gguf
;
pub
mod
gguf
;
pub
mod
grpc
;
pub
mod
http
;
pub
mod
http
;
pub
mod
hub
;
pub
mod
hub
;
// pub mod key_value_store;
// pub mod key_value_store;
...
...
lib/llm/tests/kserve_service.rs
0 → 100644
View file @
91a459c0
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
pub
mod
kserve_test
{
// For using gRPC client for test
pub
mod
inference
{
tonic
::
include_proto!
(
"inference"
);
}
use
inference
::
grpc_inference_service_client
::
GrpcInferenceServiceClient
;
use
inference
::{
DataType
,
ModelConfigRequest
,
ModelInferRequest
,
ModelInferResponse
,
ModelMetadataRequest
,
};
use
anyhow
::
Error
;
use
async_stream
::
stream
;
use
dynamo_llm
::
grpc
::
service
::
kserve
::
KserveService
;
use
dynamo_llm
::
protocols
::{
Annotated
,
openai
::{
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
,
},
completions
::{
NvCreateCompletionRequest
,
NvCreateCompletionResponse
},
},
};
use
dynamo_runtime
::{
CancellationToken
,
pipeline
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ManyOut
,
ResponseStream
,
SingleIn
,
async_trait
,
},
};
use
rstest
::
*
;
use
std
::
sync
::
Arc
;
use
std
::
time
::
Duration
;
use
tokio
::
time
::
timeout
;
use
tonic
::{
Request
,
Response
,
transport
::
Channel
};
use
dynamo_async_openai
::
types
::
Prompt
;
struct
SplitEngine
{}
// Add a new long-running test engine
struct
LongRunningEngine
{
delay_ms
:
u64
,
cancelled
:
Arc
<
std
::
sync
::
atomic
::
AtomicBool
>
,
}
impl
LongRunningEngine
{
fn
new
(
delay_ms
:
u64
)
->
Self
{
Self
{
delay_ms
,
cancelled
:
Arc
::
new
(
std
::
sync
::
atomic
::
AtomicBool
::
new
(
false
)),
}
}
fn
was_cancelled
(
&
self
)
->
bool
{
self
.cancelled
.load
(
std
::
sync
::
atomic
::
Ordering
::
Acquire
)
}
// Wait for the duration of generation delay to ensure the generate stream
// has been terminated early (`was_cancelled` remains true).
async
fn
wait_for_delay
(
&
self
)
{
tokio
::
time
::
sleep
(
std
::
time
::
Duration
::
from_millis
(
self
.delay_ms
))
.await
;
}
}
#[async_trait]
impl
AsyncEngine
<
SingleIn
<
NvCreateCompletionRequest
>
,
ManyOut
<
Annotated
<
NvCreateCompletionResponse
>>
,
Error
,
>
for
SplitEngine
{
async
fn
generate
(
&
self
,
request
:
SingleIn
<
NvCreateCompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
NvCreateCompletionResponse
>>
,
Error
>
{
let
(
request
,
context
)
=
request
.transfer
(());
let
ctx
=
context
.context
();
// let generator = NvCreateChatCompletionStreamResponse::generator(request.model.clone());
let
generator
=
request
.response_generator
(
ctx
.id
()
.to_string
());
let
word_list
:
Vec
<
String
>
=
match
request
.inner.prompt
{
Prompt
::
String
(
str
)
=>
str
.split
(
' '
)
.map
(|
s
|
s
.to_string
())
.collect
(),
_
=>
{
return
Err
(
Error
::
msg
(
"SplitEngine only support prompt type String"
))
?
;
}
};
let
stream
=
stream!
{
tokio
::
time
::
sleep
(
std
::
time
::
Duration
::
from_millis
(
10
))
.await
;
for
word
in
word_list
{
yield
Annotated
::
from_data
(
generator
.create_choice
(
0
,
Some
(
word
.to_string
()),
None
,
None
));
}
};
Ok
(
ResponseStream
::
new
(
Box
::
pin
(
stream
),
ctx
))
}
}
#[async_trait]
impl
AsyncEngine
<
SingleIn
<
NvCreateCompletionRequest
>
,
ManyOut
<
Annotated
<
NvCreateCompletionResponse
>>
,
Error
,
>
for
LongRunningEngine
{
async
fn
generate
(
&
self
,
request
:
SingleIn
<
NvCreateCompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
NvCreateCompletionResponse
>>
,
Error
>
{
let
(
_
request
,
context
)
=
request
.transfer
(());
let
ctx
=
context
.context
();
tracing
::
info!
(
"LongRunningEngine: Starting generation with {}ms delay"
,
self
.delay_ms
);
let
cancelled_flag
=
self
.cancelled
.clone
();
let
delay_ms
=
self
.delay_ms
;
let
ctx_clone
=
ctx
.clone
();
let
stream
=
async_stream
::
stream!
{
// the stream can be dropped or it can be cancelled
// either way we consider this a cancellation
cancelled_flag
.store
(
true
,
std
::
sync
::
atomic
::
Ordering
::
SeqCst
);
tokio
::
select!
{
_
=
tokio
::
time
::
sleep
(
std
::
time
::
Duration
::
from_millis
(
delay_ms
))
=>
{
// the stream went to completion
cancelled_flag
.store
(
false
,
std
::
sync
::
atomic
::
Ordering
::
SeqCst
);
}
_
=
ctx_clone
.stopped
()
=>
{
cancelled_flag
.store
(
true
,
std
::
sync
::
atomic
::
Ordering
::
SeqCst
);
}
}
yield
Annotated
::
<
NvCreateCompletionResponse
>
::
from_annotation
(
"event.dynamo.test.sentinel"
,
&
"DONE"
.to_string
())
.expect
(
"Failed to create annotated response"
);
};
Ok
(
ResponseStream
::
new
(
Box
::
pin
(
stream
),
ctx
))
}
}
struct
AlwaysFailEngine
{}
#[async_trait]
impl
AsyncEngine
<
SingleIn
<
NvCreateChatCompletionRequest
>
,
ManyOut
<
Annotated
<
NvCreateChatCompletionStreamResponse
>>
,
Error
,
>
for
AlwaysFailEngine
{
async
fn
generate
(
&
self
,
_
request
:
SingleIn
<
NvCreateChatCompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
NvCreateChatCompletionStreamResponse
>>
,
Error
>
{
Err
(
Error
::
msg
(
"Always fail"
))
?
}
}
#[async_trait]
impl
AsyncEngine
<
SingleIn
<
NvCreateCompletionRequest
>
,
ManyOut
<
Annotated
<
NvCreateCompletionResponse
>>
,
Error
,
>
for
AlwaysFailEngine
{
async
fn
generate
(
&
self
,
_
request
:
SingleIn
<
NvCreateCompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
NvCreateCompletionResponse
>>
,
Error
>
{
Err
(
Error
::
msg
(
"Always fail"
))
?
}
}
/// Wait for the HTTP service to be ready by checking its health endpoint
async
fn
get_ready_client
(
port
:
u16
,
timeout_secs
:
u64
)
->
GrpcInferenceServiceClient
<
Channel
>
{
let
start
=
tokio
::
time
::
Instant
::
now
();
let
timeout
=
tokio
::
time
::
Duration
::
from_secs
(
timeout_secs
);
loop
{
let
address
=
format!
(
"http://0.0.0.0:{}"
,
port
);
match
GrpcInferenceServiceClient
::
connect
(
address
)
.await
{
Ok
(
client
)
=>
return
client
,
Err
(
_
)
if
start
.elapsed
()
<
timeout
=>
{
tokio
::
time
::
sleep
(
tokio
::
time
::
Duration
::
from_millis
(
10
))
.await
;
}
Err
(
e
)
=>
panic!
(
"Service failed to start within timeout: {}"
,
e
),
}
}
}
#[fixture]
fn
text_input
(
#[default(
"dummy input"
)]
text
:
&
str
,
)
->
inference
::
model_infer_request
::
InferInputTensor
{
inference
::
model_infer_request
::
InferInputTensor
{
name
:
"text_input"
.into
(),
datatype
:
"BYTES"
.into
(),
shape
:
vec!
[
1
],
contents
:
Some
(
inference
::
InferTensorContents
{
bytes_contents
:
vec!
[
text
.into
()],
..
Default
::
default
()
}),
..
Default
::
default
()
}
}
#[fixture]
fn
service_with_engines
(
#[default(
8990
)]
port
:
u16
,
)
->
(
KserveService
,
Arc
<
SplitEngine
>
,
Arc
<
AlwaysFailEngine
>
,
Arc
<
LongRunningEngine
>
,
)
{
let
service
=
KserveService
::
builder
()
.port
(
port
)
.build
()
.unwrap
();
let
manager
=
service
.model_manager
();
let
split
=
Arc
::
new
(
SplitEngine
{});
let
failure
=
Arc
::
new
(
AlwaysFailEngine
{});
let
long_running
=
Arc
::
new
(
LongRunningEngine
::
new
(
1_000
));
manager
.add_completions_model
(
"split"
,
split
.clone
())
.unwrap
();
manager
.add_chat_completions_model
(
"failure"
,
failure
.clone
())
.unwrap
();
manager
.add_completions_model
(
"failure"
,
failure
.clone
())
.unwrap
();
manager
.add_completions_model
(
"long_running"
,
long_running
.clone
())
.unwrap
();
(
service
,
split
,
failure
,
long_running
)
}
struct
RunningService
{
token
:
CancellationToken
,
}
impl
RunningService
{
fn
spawn
(
service
:
KserveService
)
->
Self
{
let
token
=
CancellationToken
::
new
();
tokio
::
spawn
({
let
t
=
token
.clone
();
async
move
{
service
.run
(
t
)
.await
}
});
Self
{
token
}
}
}
impl
Drop
for
RunningService
{
fn
drop
(
&
mut
self
)
{
self
.token
.cancel
();
}
}
// Tests may run in parallel, use this enum to keep track of port used for different
// test cases
enum
TestPort
{
InferFailure
=
8988
,
InferSuccess
=
8989
,
StreamInferFailure
=
8990
,
StreamInferSuccess
=
8991
,
InferCancellation
=
8992
,
StreamInferCancellation
=
8993
,
ModelInfo
=
8994
,
}
#[rstest]
#[tokio::test]
async
fn
test_infer_failure
(
#[with(TestPort::InferFailure
as
u16)]
service_with_engines
:
(
KserveService
,
Arc
<
SplitEngine
>
,
Arc
<
AlwaysFailEngine
>
,
Arc
<
LongRunningEngine
>
,
),
text_input
:
inference
::
model_infer_request
::
InferInputTensor
,
)
{
// start server
let
_
running
=
RunningService
::
spawn
(
service_with_engines
.0
);
// create client and send request to unregistered model
let
mut
client
=
get_ready_client
(
TestPort
::
InferFailure
as
u16
,
5
)
.await
;
// unknown_model
let
request
=
tonic
::
Request
::
new
(
ModelInferRequest
{
model_name
:
"Tonic"
.into
(),
model_version
:
"1"
.into
(),
id
:
"1234"
.into
(),
inputs
:
vec!
[
text_input
.clone
()],
..
Default
::
default
()
});
let
response
=
client
.model_infer
(
request
)
.await
;
assert
!
(
response
.is_err
());
let
err
=
response
.unwrap_err
();
assert_eq!
(
err
.code
(),
tonic
::
Code
::
NotFound
,
"Expected NotFound error for unregistered model, get {}"
,
err
);
// missing input
let
request
=
tonic
::
Request
::
new
(
ModelInferRequest
{
model_name
:
"split"
.into
(),
model_version
:
"1"
.into
(),
id
:
"1234"
.into
(),
inputs
:
vec!
[],
..
Default
::
default
()
});
let
response
=
client
.model_infer
(
request
)
.await
;
assert
!
(
response
.is_err
());
let
err
=
response
.unwrap_err
();
assert_eq!
(
err
.code
(),
tonic
::
Code
::
InvalidArgument
,
"Expected InvalidArgument error for missing input, get {}"
,
err
);
// request streaming
let
request
=
tonic
::
Request
::
new
(
ModelInferRequest
{
model_name
:
"split"
.into
(),
model_version
:
"1"
.into
(),
id
:
"1234"
.into
(),
inputs
:
vec!
[
text_input
.clone
(),
inference
::
model_infer_request
::
InferInputTensor
{
name
:
"streaming"
.into
(),
datatype
:
"BOOL"
.into
(),
shape
:
vec!
[
1
],
contents
:
Some
(
inference
::
InferTensorContents
{
bool_contents
:
vec!
[
true
],
..
Default
::
default
()
}),
..
Default
::
default
()
},
],
..
Default
::
default
()
});
let
response
=
client
.model_infer
(
request
)
.await
;
assert
!
(
response
.is_err
());
let
err
=
response
.unwrap_err
();
assert_eq!
(
err
.code
(),
tonic
::
Code
::
InvalidArgument
,
"Expected InvalidArgument error for streaming, get {}"
,
err
);
// assert "stream" in error message
assert
!
(
err
.message
()
.contains
(
"Streaming is not supported"
),
"Expected error message to contain 'Streaming is not supported', got: {}"
,
err
.message
()
);
// AlwaysFailEngine
let
request
=
tonic
::
Request
::
new
(
ModelInferRequest
{
model_name
:
"failure"
.into
(),
model_version
:
"1"
.into
(),
id
:
"1234"
.into
(),
inputs
:
vec!
[
text_input
.clone
()],
..
Default
::
default
()
});
let
response
=
client
.model_infer
(
request
)
.await
;
assert
!
(
response
.is_err
());
let
err
=
response
.unwrap_err
();
assert_eq!
(
err
.code
(),
tonic
::
Code
::
Internal
,
"Expected Internal error for streaming, get {}"
,
err
);
assert
!
(
err
.message
()
.contains
(
"Failed to generate completions:"
),
"Expected error message to contain 'Failed to generate completions:', got: {}"
,
err
.message
()
);
}
#[rstest]
#[tokio::test]
async
fn
test_infer_success
(
#[with(TestPort::InferSuccess
as
u16)]
service_with_engines
:
(
KserveService
,
Arc
<
SplitEngine
>
,
Arc
<
AlwaysFailEngine
>
,
Arc
<
LongRunningEngine
>
,
),
text_input
:
inference
::
model_infer_request
::
InferInputTensor
,
)
{
// start server
let
_
running
=
RunningService
::
spawn
(
service_with_engines
.0
);
let
mut
client
=
get_ready_client
(
TestPort
::
InferSuccess
as
u16
,
5
)
.await
;
let
model_name
=
"split"
;
let
request
=
tonic
::
Request
::
new
(
ModelInferRequest
{
model_name
:
model_name
.into
(),
model_version
:
"1"
.into
(),
id
:
"1234"
.into
(),
inputs
:
vec!
[
text_input
.clone
()],
..
Default
::
default
()
});
let
response
=
client
.model_infer
(
request
)
.await
.unwrap
();
validate_infer_response
(
response
,
model_name
);
// Input data in raw_input_content
let
mut
text_input
=
text_input
.clone
();
text_input
.contents
=
None
;
// Clear contents to use raw_input_contents
let
text_input_str
=
"dummy input"
;
let
input_len
=
text_input_str
.len
()
as
u32
;
let
mut
serialized_input
=
input_len
.to_le_bytes
()
.to_vec
();
serialized_input
.extend_from_slice
(
text_input_str
.as_bytes
());
let
request
=
tonic
::
Request
::
new
(
ModelInferRequest
{
model_name
:
model_name
.into
(),
model_version
:
"1"
.into
(),
id
:
"1234"
.into
(),
inputs
:
vec!
[
text_input
],
raw_input_contents
:
vec!
[
serialized_input
],
..
Default
::
default
()
});
let
response
=
client
.model_infer
(
request
)
.await
.unwrap
();
validate_infer_response
(
response
,
model_name
);
}
fn
validate_infer_response
(
response
:
Response
<
ModelInferResponse
>
,
model_name
:
&
str
)
{
assert_eq!
(
response
.get_ref
()
.model_name
,
model_name
,
"Expected response of the same model name"
,
);
for
output
in
&
response
.get_ref
()
.outputs
{
match
output
.name
.as_str
()
{
"text_output"
=>
{
assert_eq!
(
output
.datatype
,
"BYTES"
,
"Expected 'text_output' to have datatype 'BYTES'"
);
assert_eq!
(
output
.shape
,
vec!
[
1
],
"Expected 'text_output' to have shape [1]"
);
let
expected_output
:
Vec
<
Vec
<
u8
>>
=
vec!
[
"dummyinput"
.into
()];
assert_eq!
(
output
.contents
.as_ref
()
.unwrap
()
.bytes_contents
,
expected_output
,
"Expected 'text_output' to contain 'dummy input'"
);
}
"finish_reason"
=>
{
assert_eq!
(
output
.datatype
,
"BYTES"
,
"Expected 'finish_reason' to have datatype 'BYTES'"
);
assert_eq!
(
output
.shape
,
vec!
[
0
],
"Expected 'finish_reason' to have shape [0]"
);
}
_
=>
panic!
(
"Unexpected output name: {}"
,
output
.name
),
}
}
}
#[rstest]
#[tokio::test]
async
fn
test_infer_cancellation
(
#[with(TestPort::InferCancellation
as
u16)]
service_with_engines
:
(
KserveService
,
Arc
<
SplitEngine
>
,
Arc
<
AlwaysFailEngine
>
,
Arc
<
LongRunningEngine
>
,
),
text_input
:
inference
::
model_infer_request
::
InferInputTensor
,
)
{
// start server
let
_
running
=
RunningService
::
spawn
(
service_with_engines
.0
);
let
long_running
=
service_with_engines
.3
;
// create client and send request to unregistered model
let
mut
client
=
get_ready_client
(
TestPort
::
InferCancellation
as
u16
,
5
)
.await
;
let
model_name
=
"long_running"
;
let
request
=
tonic
::
Request
::
new
(
ModelInferRequest
{
model_name
:
model_name
.into
(),
model_version
:
"1"
.into
(),
id
:
"1234"
.into
(),
inputs
:
vec!
[
text_input
],
..
Default
::
default
()
});
assert
!
(
!
long_running
.was_cancelled
(),
"Expected long running engine is not cancelled"
);
// Cancelling the request by dropping the request future after 1 second
let
response
=
match
timeout
(
Duration
::
from_millis
(
500
),
client
.model_infer
(
request
))
.await
{
Ok
(
_
)
=>
Err
(
"Expect request timed out"
),
Err
(
_
)
=>
{
println!
(
"Cancelled request after 500ms"
);
Ok
(
"timed out"
)
}
};
assert
!
(
response
.is_ok
(),
"Expected client timed out"
,);
long_running
.wait_for_delay
()
.await
;
assert
!
(
long_running
.was_cancelled
(),
"Expected long running engine to be cancelled"
);
}
#[rstest]
#[tokio::test]
async
fn
test_stream_infer_success
(
#[with(TestPort::StreamInferSuccess
as
u16)]
service_with_engines
:
(
KserveService
,
Arc
<
SplitEngine
>
,
Arc
<
AlwaysFailEngine
>
,
Arc
<
LongRunningEngine
>
,
),
text_input
:
inference
::
model_infer_request
::
InferInputTensor
,
)
{
// start server
let
_
running
=
RunningService
::
spawn
(
service_with_engines
.0
);
// create client and send request to unregistered model
let
mut
client
=
get_ready_client
(
TestPort
::
StreamInferSuccess
as
u16
,
5
)
.await
;
let
model_name
=
"split"
;
let
request_id
=
"1234"
;
// Response streaming true
{
let
text_input
=
text_input
.clone
();
let
outbound
=
async_stream
::
stream!
{
let
request_count
=
1
;
for
_
in
0
..
request_count
{
let
request
=
ModelInferRequest
{
model_name
:
model_name
.into
(),
model_version
:
"1"
.into
(),
id
:
request_id
.into
(),
inputs
:
vec!
[
text_input
.clone
(),
inference
::
model_infer_request
::
InferInputTensor
{
name
:
"streaming"
.into
(),
datatype
:
"BOOL"
.into
(),
shape
:
vec!
[
1
],
contents
:
Some
(
inference
::
InferTensorContents
{
bool_contents
:
vec!
[
true
],
..
Default
::
default
()
}),
..
Default
::
default
()
}],
..
Default
::
default
()
};
yield
request
;
}
};
let
response
=
client
.model_stream_infer
(
Request
::
new
(
outbound
))
.await
.unwrap
();
let
mut
inbound
=
response
.into_inner
();
let
mut
response_idx
=
0
;
while
let
Some
(
response
)
=
inbound
.message
()
.await
.unwrap
()
{
assert
!
(
response
.error_message
.is_empty
(),
"Expected successful inference"
);
assert
!
(
response
.infer_response
.is_some
(),
"Expected successful inference"
);
if
let
Some
(
response
)
=
&
response
.infer_response
{
assert_eq!
(
response
.model_name
,
model_name
,
"Expected response of the same model name"
,
);
assert_eq!
(
response
.id
,
request_id
,
"Expected response ID to match request ID"
);
let
expected_output
:
Vec
<
Vec
<
u8
>>
=
vec!
[
"dummy"
.into
(),
"input"
.into
()];
for
output
in
&
response
.outputs
{
match
output
.name
.as_str
()
{
"text_output"
=>
{
assert_eq!
(
output
.datatype
,
"BYTES"
,
"Expected 'text_output' to have datatype 'BYTES'"
);
assert_eq!
(
output
.shape
,
vec!
[
1
],
"Expected 'text_output' to have shape [1]"
);
assert_eq!
(
output
.contents
.as_ref
()
.unwrap
()
.bytes_contents
,
vec!
[
expected_output
[
response_idx
]
.clone
()],
"Expected 'text_output' to contain 'dummy input'"
);
}
"finish_reason"
=>
{
assert_eq!
(
output
.datatype
,
"BYTES"
,
"Expected 'finish_reason' to have datatype 'BYTES'"
);
assert_eq!
(
output
.shape
,
vec!
[
0
],
"Expected 'finish_reason' to have shape [0]"
);
}
_
=>
panic!
(
"Unexpected output name: {}"
,
output
.name
),
}
}
}
response_idx
+=
1
;
}
assert_eq!
(
response_idx
,
2
,
"Expected 2 responses"
)
}
// Response streaming false
{
let
text_input
=
text_input
.clone
();
let
outbound
=
async_stream
::
stream!
{
let
request_count
=
2
;
for
idx
in
0
..
request_count
{
let
request
=
ModelInferRequest
{
model_name
:
model_name
.into
(),
model_version
:
"1"
.into
(),
id
:
format!
(
"{idx}"
),
inputs
:
vec!
[
text_input
.clone
()],
..
Default
::
default
()
};
yield
request
;
}
};
let
response
=
client
.model_stream_infer
(
Request
::
new
(
outbound
))
.await
.unwrap
();
let
mut
inbound
=
response
.into_inner
();
let
mut
response_idx
=
0
;
while
let
Some
(
response
)
=
inbound
.message
()
.await
.unwrap
()
{
assert
!
(
response
.error_message
.is_empty
(),
"Expected successful inference"
);
assert
!
(
response
.infer_response
.is_some
(),
"Expected successful inference"
);
// Each response is the complete inference
if
let
Some
(
response
)
=
&
response
.infer_response
{
assert_eq!
(
response
.model_name
,
model_name
,
"Expected response of the same model name"
,
);
// [gluo NOTE] Here we assume the responses across requests are
// processed in the order of receiving requests, which is not true
// if we improve stream handling in gRPC frontend. Consider:
// time 0: request 0 -> long running -> response 0 (time 5)
// time 1: request 1 -> short running -> response 1 (time 2)
// We expect response 1 to be received before response 0 as their
// requests are independent from each other.
assert_eq!
(
response
.id
,
format!
(
"{response_idx}"
),
"Expected response ID to match request ID"
);
for
output
in
&
response
.outputs
{
match
output
.name
.as_str
()
{
"text_output"
=>
{
assert_eq!
(
output
.datatype
,
"BYTES"
,
"Expected 'text_output' to have datatype 'BYTES'"
);
assert_eq!
(
output
.shape
,
vec!
[
1
],
"Expected 'text_output' to have shape [1]"
);
let
expected_output
:
Vec
<
Vec
<
u8
>>
=
vec!
[
"dummyinput"
.into
()];
assert_eq!
(
output
.contents
.as_ref
()
.unwrap
()
.bytes_contents
,
expected_output
,
"Expected 'text_output' to contain 'dummyinput'"
);
}
"finish_reason"
=>
{
assert_eq!
(
output
.datatype
,
"BYTES"
,
"Expected 'finish_reason' to have datatype 'BYTES'"
);
assert_eq!
(
output
.shape
,
vec!
[
0
],
"Expected 'finish_reason' to have shape [0]"
);
}
_
=>
panic!
(
"Unexpected output name: {}"
,
output
.name
),
}
}
}
response_idx
+=
1
;
}
assert_eq!
(
response_idx
,
2
,
"Expected 2 responses, each for one of the two requests"
)
}
}
#[rstest]
#[tokio::test]
async
fn
test_stream_infer_failure
(
#[with(TestPort::StreamInferFailure
as
u16)]
service_with_engines
:
(
KserveService
,
Arc
<
SplitEngine
>
,
Arc
<
AlwaysFailEngine
>
,
Arc
<
LongRunningEngine
>
,
),
text_input
:
inference
::
model_infer_request
::
InferInputTensor
,
)
{
// start server
let
_
running
=
RunningService
::
spawn
(
service_with_engines
.0
);
// create client and send request to unregistered model
let
mut
client
=
get_ready_client
(
TestPort
::
StreamInferFailure
as
u16
,
5
)
.await
;
let
model_name
=
"failure"
;
let
outbound
=
async_stream
::
stream!
{
let
request_count
=
1
;
for
_
in
0
..
request_count
{
let
request
=
ModelInferRequest
{
model_name
:
model_name
.into
(),
model_version
:
"1"
.into
(),
id
:
"1234"
.into
(),
inputs
:
vec!
[
text_input
.clone
()],
..
Default
::
default
()
};
yield
request
;
}
};
let
response
=
client
.model_stream_infer
(
Request
::
new
(
outbound
))
.await
.unwrap
();
let
mut
inbound
=
response
.into_inner
();
loop
{
match
inbound
.message
()
.await
{
Ok
(
Some
(
_
))
=>
{
panic!
(
"Expecting failure in the stream"
);
}
Err
(
err
)
=>
{
assert_eq!
(
err
.code
(),
tonic
::
Code
::
Internal
,
"Expected Internal error for streaming, get {}"
,
err
);
assert
!
(
err
.message
()
.contains
(
"Failed to generate completions:"
),
"Expected error message to contain 'Failed to generate completions:', got: {}"
,
err
.message
()
);
}
Ok
(
None
)
=>
{
// End of stream
break
;
}
}
}
}
#[rstest]
#[tokio::test]
async
fn
test_stream_infer_cancellation
(
#[with(TestPort::StreamInferCancellation
as
u16)]
service_with_engines
:
(
KserveService
,
Arc
<
SplitEngine
>
,
Arc
<
AlwaysFailEngine
>
,
Arc
<
LongRunningEngine
>
,
),
text_input
:
inference
::
model_infer_request
::
InferInputTensor
,
)
{
// start server
let
_
running
=
RunningService
::
spawn
(
service_with_engines
.0
);
let
long_running
=
service_with_engines
.3
;
// create client and send request to unregistered model
let
mut
client
=
get_ready_client
(
TestPort
::
StreamInferCancellation
as
u16
,
5
)
.await
;
let
model_name
=
"long_running"
;
let
outbound
=
async_stream
::
stream!
{
let
request_count
=
1
;
for
_
in
0
..
request_count
{
let
request
=
ModelInferRequest
{
model_name
:
model_name
.into
(),
model_version
:
"1"
.into
(),
id
:
"1234"
.into
(),
inputs
:
vec!
[
text_input
.clone
()],
..
Default
::
default
()
};
yield
request
;
}
};
assert
!
(
!
long_running
.was_cancelled
(),
"Expected long running engine is still running"
);
// Cancelling the request by dropping the request future after 1 second
let
response
=
match
timeout
(
Duration
::
from_millis
(
500
),
client
.model_stream_infer
(
Request
::
new
(
outbound
)),
)
.await
{
Ok
(
response
)
=>
response
.unwrap
(),
Err
(
_
)
=>
{
panic!
(
"Expected response stream is returned immediately"
);
}
};
std
::
mem
::
drop
(
response
);
// Drop the response to cancel the stream
long_running
.wait_for_delay
()
.await
;
assert
!
(
long_running
.was_cancelled
(),
"Expected long running engine to be cancelled"
);
}
#[rstest]
#[tokio::test]
async
fn
test_model_info
(
#[with(TestPort::ModelInfo
as
u16)]
service_with_engines
:
(
KserveService
,
Arc
<
SplitEngine
>
,
Arc
<
AlwaysFailEngine
>
,
Arc
<
LongRunningEngine
>
,
),
)
{
// start server
let
_
running
=
RunningService
::
spawn
(
service_with_engines
.0
);
// create client and send request to unregistered model
let
mut
client
=
get_ready_client
(
TestPort
::
ModelInfo
as
u16
,
5
)
.await
;
// Failure unknown_model
let
request
=
tonic
::
Request
::
new
(
ModelMetadataRequest
{
name
:
"Tonic"
.into
(),
version
:
""
.into
(),
});
let
response
=
client
.model_metadata
(
request
)
.await
;
assert
!
(
response
.is_err
());
let
err
=
response
.unwrap_err
();
assert_eq!
(
err
.code
(),
tonic
::
Code
::
NotFound
,
"Expected NotFound error for unregistered model, get {}"
,
err
);
let
request
=
tonic
::
Request
::
new
(
ModelConfigRequest
{
name
:
"Tonic"
.into
(),
version
:
""
.into
(),
});
let
response
=
client
.model_config
(
request
)
.await
;
assert
!
(
response
.is_err
());
let
err
=
response
.unwrap_err
();
assert_eq!
(
err
.code
(),
tonic
::
Code
::
NotFound
,
"Expected NotFound error for unregistered model, get {}"
,
err
);
// Success metadata
let
model_name
=
"split"
;
let
request
=
tonic
::
Request
::
new
(
ModelMetadataRequest
{
name
:
model_name
.into
(),
version
:
"1"
.into
(),
});
let
response
=
client
.model_metadata
(
request
)
.await
.unwrap
();
assert_eq!
(
response
.get_ref
()
.name
,
model_name
,
"Expected response of the same model name"
,
);
// input
for
io
in
&
response
.get_ref
()
.inputs
{
match
io
.name
.as_str
()
{
"text_input"
=>
{
assert_eq!
(
io
.datatype
,
"BYTES"
,
"Expected 'text_input' to have datatype 'BYTES'"
);
assert_eq!
(
io
.shape
,
vec!
[
1
],
"Expected 'text_output' to have shape [1]"
);
}
"streaming"
=>
{
assert_eq!
(
io
.datatype
,
"BOOL"
,
"Expected 'streaming' to have datatype 'BOOL'"
);
assert_eq!
(
io
.shape
,
vec!
[
1
],
"Expected 'streaming' to have shape [1]"
);
}
_
=>
panic!
(
"Unexpected output name: {}"
,
io
.name
),
}
}
// output
for
io
in
&
response
.get_ref
()
.outputs
{
match
io
.name
.as_str
()
{
"text_output"
=>
{
assert_eq!
(
io
.datatype
,
"BYTES"
,
"Expected 'text_output' to have datatype 'BYTES'"
);
assert_eq!
(
io
.shape
,
vec!
[
-
1
],
"Expected 'text_output' to have shape [-1]"
);
}
"finish_reason"
=>
{
assert_eq!
(
io
.datatype
,
"BYTES"
,
"Expected 'finish_reason' to have datatype 'BYTES'"
);
assert_eq!
(
io
.shape
,
vec!
[
-
1
],
"Expected 'finish_reason' to have shape [-1]"
);
}
_
=>
panic!
(
"Unexpected output name: {}"
,
io
.name
),
}
}
// success config
let
request
=
tonic
::
Request
::
new
(
ModelConfigRequest
{
name
:
model_name
.into
(),
version
:
"1"
.into
(),
});
let
response
=
client
.model_config
(
request
)
.await
.unwrap
()
.into_inner
()
.config
;
let
Some
(
config
)
=
response
else
{
panic!
(
"Expected Some(config), got None"
);
};
assert_eq!
(
config
.name
,
model_name
,
"Expected response of the same model name"
,
);
// input
for
io
in
&
config
.input
{
match
io
.name
.as_str
()
{
"text_input"
=>
{
assert_eq!
(
io
.data_type
,
DataType
::
TypeString
as
i32
,
"Expected 'text_input' to have datatype 'TYPE_STRING'"
);
assert_eq!
(
io
.dims
,
vec!
[
1
],
"Expected 'text_output' to have shape [1]"
);
}
"streaming"
=>
{
assert_eq!
(
io
.data_type
,
DataType
::
TypeBool
as
i32
,
"Expected 'streaming' to have datatype 'TYPE_BOOL'"
);
assert_eq!
(
io
.dims
,
vec!
[
1
],
"Expected 'streaming' to have shape [1]"
);
}
_
=>
panic!
(
"Unexpected output name: {}"
,
io
.name
),
}
}
// output
for
io
in
&
config
.output
{
match
io
.name
.as_str
()
{
"text_output"
=>
{
assert_eq!
(
io
.data_type
,
DataType
::
TypeString
as
i32
,
"Expected 'text_output' to have datatype 'TYPE_STRING'"
);
assert_eq!
(
io
.dims
,
vec!
[
-
1
],
"Expected 'text_output' to have shape [-1]"
);
}
"finish_reason"
=>
{
assert_eq!
(
io
.data_type
,
DataType
::
TypeString
as
i32
,
"Expected 'finish_reason' to have datatype 'TYPE_STRING'"
);
assert_eq!
(
io
.dims
,
vec!
[
-
1
],
"Expected 'finish_reason' to have shape [-1]"
);
}
_
=>
panic!
(
"Unexpected output name: {}"
,
io
.name
),
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment