Commit ffc6dde1 authored by Ryan Olson's avatar Ryan Olson Committed by GitHub
Browse files

feat: OpenAI compatible http service (#123)


Signed-off-by: default avatarRyan Olson <ryanolson@users.noreply.github.com>
Co-authored-by: default avatarRyan McCormick <rmccormick@nvidia.com>
Co-authored-by: default avatarNeelay Shah <neelays@nvidia.com>
parent 9d6643b7
...@@ -43,7 +43,7 @@ repos: ...@@ -43,7 +43,7 @@ repos:
- id: codespell - id: codespell
additional_dependencies: [tomli] additional_dependencies: [tomli]
args: ["--toml", "pyproject.toml"] args: ["--toml", "pyproject.toml"]
exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$) exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$|.*tests/data/replays.*)
# More details about these pre-commit hooks here: # More details about these pre-commit hooks here:
# https://pre-commit.com/hooks.html # https://pre-commit.com/hooks.html
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
......
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "addr2line"
version = "0.24.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
dependencies = [
"gimli",
]
[[package]]
name = "adler2"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "android-tzdata"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "anyhow"
version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
[[package]]
name = "arrayref"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
[[package]]
name = "arrayvec"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "async-nats"
version = "0.38.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76433c4de73442daedb3a59e991d94e85c14ebfc33db53dfcd347a21cd6ef4f8"
dependencies = [
"base64",
"bytes",
"futures",
"memchr",
"nkeys",
"nuid",
"once_cell",
"pin-project",
"portable-atomic",
"rand",
"regex",
"ring",
"rustls-native-certs 0.7.3",
"rustls-pemfile",
"rustls-webpki",
"serde",
"serde_json",
"serde_nanos",
"serde_repr",
"thiserror 1.0.69",
"time",
"tokio",
"tokio-rustls",
"tokio-util",
"tokio-websockets",
"tracing",
"tryhard",
"url",
]
[[package]]
name = "async-once-cell"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a"
[[package]]
name = "async-stream"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476"
dependencies = [
"async-stream-impl",
"futures-core",
"pin-project-lite",
]
[[package]]
name = "async-stream-impl"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "async-trait"
version = "0.1.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "atomic"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d818003e740b63afc82337e3160717f4f63078720a810b7b903e70a5d1d2994"
dependencies = [
"bytemuck",
]
[[package]]
name = "atomic-waker"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
[[package]]
name = "autocfg"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
[[package]]
name = "axum"
version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
dependencies = [
"async-trait",
"axum-core 0.4.5",
"bytes",
"futures-util",
"http",
"http-body",
"http-body-util",
"itoa",
"matchit 0.7.3",
"memchr",
"mime",
"percent-encoding",
"pin-project-lite",
"rustversion",
"serde",
"sync_wrapper",
"tower 0.5.2",
"tower-layer",
"tower-service",
]
[[package]]
name = "axum"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d6fd624c75e18b3b4c6b9caf42b1afe24437daaee904069137d8bab077be8b8"
dependencies = [
"axum-core 0.5.0",
"bytes",
"form_urlencoded",
"futures-util",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-util",
"itoa",
"matchit 0.8.4",
"memchr",
"mime",
"percent-encoding",
"pin-project-lite",
"rustversion",
"serde",
"serde_json",
"serde_path_to_error",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tower 0.5.2",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "axum-core"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
dependencies = [
"async-trait",
"bytes",
"futures-util",
"http",
"http-body",
"http-body-util",
"mime",
"pin-project-lite",
"rustversion",
"sync_wrapper",
"tower-layer",
"tower-service",
]
[[package]]
name = "axum-core"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df1362f362fd16024ae199c1970ce98f9661bf5ef94b9808fee734bc3698b733"
dependencies = [
"bytes",
"futures-util",
"http",
"http-body",
"http-body-util",
"mime",
"pin-project-lite",
"rustversion",
"sync_wrapper",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "backtrace"
version = "0.3.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
dependencies = [
"addr2line",
"cfg-if",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
"windows-targets",
]
[[package]]
name = "base64"
version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "base64ct"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]]
name = "bit-set"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
[[package]]
name = "bitflags"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
[[package]]
name = "blake3"
version = "1.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e"
dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if",
"constant_time_eq",
]
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]]
name = "bstr"
version = "1.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0"
dependencies = [
"memchr",
"serde",
]
[[package]]
name = "bumpalo"
version = "3.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
[[package]]
name = "bytemuck"
version = "1.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "bytes"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
dependencies = [
"serde",
]
[[package]]
name = "cc"
version = "1.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "755717a7de9ec452bf7f3f1a3099085deabd7f2962b861dae91ecd7a365903d2"
dependencies = [
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cfg_aliases"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
[[package]]
name = "chrono"
version = "0.4.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-targets",
]
[[package]]
name = "console"
version = "0.15.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b"
dependencies = [
"encode_unicode",
"libc",
"once_cell",
"windows-sys 0.59.0",
]
[[package]]
name = "const-oid"
version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
[[package]]
name = "constant_time_eq"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
[[package]]
name = "core-foundation"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]]
name = "core-foundation"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b55271e5c8c478ad3f38ad24ef34923091e0548492a266d19b3c0b4d82574c63"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "cpufeatures"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
dependencies = [
"libc",
]
[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "curve25519-dalek"
version = "4.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
dependencies = [
"cfg-if",
"cpufeatures",
"curve25519-dalek-derive",
"digest",
"fiat-crypto",
"rustc_version",
"subtle",
]
[[package]]
name = "curve25519-dalek-derive"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "darling"
version = "0.20.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989"
dependencies = [
"darling_core",
"darling_macro",
]
[[package]]
name = "darling_core"
version = "0.20.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim",
"syn 2.0.98",
]
[[package]]
name = "darling_macro"
version = "0.20.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806"
dependencies = [
"darling_core",
"quote",
"syn 2.0.98",
]
[[package]]
name = "data-encoding"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e60eed09d8c01d3cee5b7d30acb059b76614c918fa0f992e0dd6eeb10daad6f"
[[package]]
name = "der"
version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f55bf8e7b65898637379c1b74eb1551107c8294ed26d855ceb9fd1a09cfc9bc0"
dependencies = [
"const-oid",
"pem-rfc7468",
"zeroize",
]
[[package]]
name = "deranged"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4"
dependencies = [
"powerfmt",
"serde",
]
[[package]]
name = "derive-getters"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74ef43543e701c01ad77d3a5922755c6a1d71b22d942cb8042be4994b380caff"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "derive_builder"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
dependencies = [
"derive_builder_macro",
]
[[package]]
name = "derive_builder_core"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
dependencies = [
"darling",
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "derive_builder_macro"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
dependencies = [
"derive_builder_core",
"syn 2.0.98",
]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
]
[[package]]
name = "displaydoc"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "ed25519"
version = "2.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53"
dependencies = [
"signature",
]
[[package]]
name = "ed25519-dalek"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a3daa8e81a3963a60642bcc1f90a670680bd4a77535faa384e9d1c79d620871"
dependencies = [
"curve25519-dalek",
"ed25519",
"sha2",
"signature",
"subtle",
]
[[package]]
name = "educe"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d7bc049e1bd8cdeb31b68bbd586a9464ecf9f3944af3958a7a9d0f8b9799417"
dependencies = [
"enum-ordinalize",
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "either"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
dependencies = [
"serde",
]
[[package]]
name = "encode_unicode"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
[[package]]
name = "enum-ordinalize"
version = "4.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea0dcfa4e54eeb516fe454635a95753ddd39acda650ce703031c6973e315dd5"
dependencies = [
"enum-ordinalize-derive",
]
[[package]]
name = "enum-ordinalize-derive"
version = "4.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "errno"
version = "0.3.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d"
dependencies = [
"libc",
"windows-sys 0.59.0",
]
[[package]]
name = "etcd-client"
version = "0.14.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0452bcc559431b16f472b7ab86e2f9ccd5f3c2da3795afbd6b773665e047fe"
dependencies = [
"http",
"prost",
"tokio",
"tokio-stream",
"tonic",
"tonic-build",
"tower 0.4.13",
"tower-service",
]
[[package]]
name = "fastrand"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "fiat-crypto"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d"
[[package]]
name = "figment"
version = "0.10.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3"
dependencies = [
"atomic",
"parking_lot",
"pear",
"serde",
"serde_json",
"tempfile",
"toml",
"uncased",
"version_check",
]
[[package]]
name = "fixedbitset"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "form_urlencoded"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
dependencies = [
"percent-encoding",
]
[[package]]
name = "futures"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
dependencies = [
"futures-channel",
"futures-core",
"futures-executor",
"futures-io",
"futures-sink",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-channel"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
dependencies = [
"futures-core",
"futures-sink",
]
[[package]]
name = "futures-core"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
[[package]]
name = "futures-executor"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
dependencies = [
"futures-core",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-io"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
[[package]]
name = "futures-macro"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "futures-sink"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
[[package]]
name = "futures-task"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
[[package]]
name = "futures-util"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-macro",
"futures-sink",
"futures-task",
"memchr",
"pin-project-lite",
"pin-utils",
"slab",
]
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "getrandom"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"js-sys",
"libc",
"wasi 0.11.0+wasi-snapshot-preview1",
"wasm-bindgen",
]
[[package]]
name = "getrandom"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
dependencies = [
"cfg-if",
"libc",
"wasi 0.13.3+wasi-0.2.2",
"windows-targets",
]
[[package]]
name = "gimli"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
[[package]]
name = "globset"
version = "0.4.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19"
dependencies = [
"aho-corasick",
"bstr",
"log",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "h2"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e"
dependencies = [
"atomic-waker",
"bytes",
"fnv",
"futures-core",
"futures-sink",
"http",
"indexmap 2.7.1",
"slab",
"tokio",
"tokio-util",
"tracing",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "hashbrown"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "http"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea"
dependencies = [
"bytes",
"fnv",
"itoa",
]
[[package]]
name = "http-body"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
dependencies = [
"bytes",
"http",
]
[[package]]
name = "http-body-util"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f"
dependencies = [
"bytes",
"futures-util",
"http",
"http-body",
"pin-project-lite",
]
[[package]]
name = "httparse"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2d708df4e7140240a16cd6ab0ab65c972d7433ab77819ea693fde9c43811e2a"
[[package]]
name = "httpdate"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
[[package]]
name = "hyper"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80"
dependencies = [
"bytes",
"futures-channel",
"futures-util",
"h2",
"http",
"http-body",
"httparse",
"httpdate",
"itoa",
"pin-project-lite",
"smallvec",
"tokio",
"want",
]
[[package]]
name = "hyper-rustls"
version = "0.27.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2"
dependencies = [
"futures-util",
"http",
"hyper",
"hyper-util",
"rustls",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tower-service",
"webpki-roots",
]
[[package]]
name = "hyper-timeout"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0"
dependencies = [
"hyper",
"hyper-util",
"pin-project-lite",
"tokio",
"tower-service",
]
[[package]]
name = "hyper-util"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4"
dependencies = [
"bytes",
"futures-channel",
"futures-util",
"http",
"http-body",
"hyper",
"pin-project-lite",
"socket2",
"tokio",
"tower-service",
"tracing",
]
[[package]]
name = "iana-time-zone"
version = "0.1.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "icu_collections"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
dependencies = [
"displaydoc",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_locid"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
dependencies = [
"displaydoc",
"litemap",
"tinystr",
"writeable",
"zerovec",
]
[[package]]
name = "icu_locid_transform"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
dependencies = [
"displaydoc",
"icu_locid",
"icu_locid_transform_data",
"icu_provider",
"tinystr",
"zerovec",
]
[[package]]
name = "icu_locid_transform_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
[[package]]
name = "icu_normalizer"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
dependencies = [
"displaydoc",
"icu_collections",
"icu_normalizer_data",
"icu_properties",
"icu_provider",
"smallvec",
"utf16_iter",
"utf8_iter",
"write16",
"zerovec",
]
[[package]]
name = "icu_normalizer_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
[[package]]
name = "icu_properties"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
dependencies = [
"displaydoc",
"icu_collections",
"icu_locid_transform",
"icu_properties_data",
"icu_provider",
"tinystr",
"zerovec",
]
[[package]]
name = "icu_properties_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
[[package]]
name = "icu_provider"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
dependencies = [
"displaydoc",
"icu_locid",
"icu_provider_macros",
"stable_deref_trait",
"tinystr",
"writeable",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_provider_macros"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "idna"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
dependencies = [
"idna_adapter",
"smallvec",
"utf8_iter",
]
[[package]]
name = "idna_adapter"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
dependencies = [
"icu_normalizer",
"icu_properties",
]
[[package]]
name = "indexmap"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg",
"hashbrown 0.12.3",
]
[[package]]
name = "indexmap"
version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652"
dependencies = [
"equivalent",
"hashbrown 0.15.2",
]
[[package]]
name = "inlinable_string"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
[[package]]
name = "insta"
version = "1.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71c1b125e30d93896b365e156c33dadfffab45ee8400afcbba4752f59de08a86"
dependencies = [
"console",
"globset",
"linked-hash-map",
"once_cell",
"pest",
"pest_derive",
"pin-project",
"serde",
"similar",
"walkdir",
]
[[package]]
name = "ipnet"
version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
[[package]]
name = "js-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libc"
version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]]
name = "linked-hash-map"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
[[package]]
name = "linux-raw-sys"
version = "0.4.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
[[package]]
name = "litemap"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
[[package]]
name = "local-ip-address"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3669cf5561f8d27e8fc84cc15e58350e70f557d4d65f70e3154e54cd2f8e1782"
dependencies = [
"libc",
"neli",
"thiserror 1.0.69",
"windows-sys 0.59.0",
]
[[package]]
name = "lock_api"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f"
[[package]]
name = "matchit"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
[[package]]
name = "matchit"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "mime"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "miniz_oxide"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924"
dependencies = [
"adler2",
]
[[package]]
name = "mio"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
dependencies = [
"libc",
"wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.52.0",
]
[[package]]
name = "multimap"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03"
[[package]]
name = "neli"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93062a0dce6da2517ea35f301dfc88184ce18d3601ec786a727a87bf535deca9"
dependencies = [
"byteorder",
"libc",
"log",
"neli-proc-macros",
]
[[package]]
name = "neli-proc-macros"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c8034b7fbb6f9455b2a96c19e6edf8dc9fc34c70449938d8ee3b4df363f61fe"
dependencies = [
"either",
"proc-macro2",
"quote",
"serde",
"syn 1.0.109",
]
[[package]]
name = "nid"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4abdf1789932b85dc39446e27f45a1064a30f9e19a2b872b1d09bd59283f85f3"
dependencies = [
"rand",
"serde",
"thiserror 1.0.69",
]
[[package]]
name = "nix"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
dependencies = [
"bitflags",
"cfg-if",
"cfg_aliases",
"libc",
]
[[package]]
name = "nkeys"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f49e787f4c61cbd0f9320b31cc26e58719f6aa5068e34697dd3aea361412fe3"
dependencies = [
"data-encoding",
"ed25519",
"ed25519-dalek",
"getrandom 0.2.15",
"log",
"rand",
"signatory",
]
[[package]]
name = "nuid"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc895af95856f929163a0aa20c26a78d26bfdc839f51b9d5aa7a5b79e52b7e83"
dependencies = [
"rand",
]
[[package]]
name = "num-conv"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "object"
version = "0.36.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
dependencies = [
"memchr",
]
[[package]]
name = "once_cell"
version = "1.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
[[package]]
name = "openssl-probe"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
[[package]]
name = "parking_lot"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-targets",
]
[[package]]
name = "pear"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467"
dependencies = [
"inlinable_string",
"pear_codegen",
"yansi",
]
[[package]]
name = "pear_codegen"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147"
dependencies = [
"proc-macro2",
"proc-macro2-diagnostics",
"quote",
"syn 2.0.98",
]
[[package]]
name = "pem-rfc7468"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412"
dependencies = [
"base64ct",
]
[[package]]
name = "percent-encoding"
version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "pest"
version = "2.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b7cafe60d6cf8e62e1b9b2ea516a089c008945bb5a275416789e7db0bc199dc"
dependencies = [
"memchr",
"thiserror 2.0.11",
"ucd-trie",
]
[[package]]
name = "pest_derive"
version = "2.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "816518421cfc6887a0d62bf441b6ffb4536fcc926395a69e1a85852d4363f57e"
dependencies = [
"pest",
"pest_generator",
]
[[package]]
name = "pest_generator"
version = "2.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d1396fd3a870fc7838768d171b4616d5c91f6cc25e377b673d714567d99377b"
dependencies = [
"pest",
"pest_meta",
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "pest_meta"
version = "2.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1e58089ea25d717bfd31fb534e4f3afcc2cc569c70de3e239778991ea3b7dea"
dependencies = [
"once_cell",
"pest",
"sha2",
]
[[package]]
name = "petgraph"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
dependencies = [
"fixedbitset",
"indexmap 2.7.1",
]
[[package]]
name = "pin-project"
version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfe2e71e1471fe07709406bf725f710b02927c9c54b2b5b2ec0e8087d97c327d"
dependencies = [
"pin-project-internal",
]
[[package]]
name = "pin-project-internal"
version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6e859e6e5bd50440ab63c47e3ebabc90f26251f7c73c3d3e837b74a1cc3fa67"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "pin-project-lite"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
[[package]]
name = "pin-utils"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pkcs8"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
dependencies = [
"der",
"spki",
]
[[package]]
name = "portable-atomic"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
[[package]]
name = "powerfmt"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "ppv-lite86"
version = "0.2.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
dependencies = [
"zerocopy",
]
[[package]]
name = "prettyplease"
version = "0.2.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac"
dependencies = [
"proc-macro2",
"syn 2.0.98",
]
[[package]]
name = "proc-macro-error-attr2"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5"
dependencies = [
"proc-macro2",
"quote",
]
[[package]]
name = "proc-macro-error2"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802"
dependencies = [
"proc-macro-error-attr2",
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "proc-macro2"
version = "1.0.93"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
dependencies = [
"unicode-ident",
]
[[package]]
name = "proc-macro2-diagnostics"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
"version_check",
"yansi",
]
[[package]]
name = "prometheus"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1"
dependencies = [
"cfg-if",
"fnv",
"lazy_static",
"memchr",
"parking_lot",
"protobuf",
"thiserror 1.0.69",
]
[[package]]
name = "proptest"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14cae93065090804185d3b75f0bf93b8eeda30c7a9b4a33d3bdb3988d6229e50"
dependencies = [
"bit-set",
"bit-vec",
"bitflags",
"lazy_static",
"num-traits",
"rand",
"rand_chacha",
"rand_xorshift",
"regex-syntax",
"rusty-fork",
"tempfile",
"unarray",
]
[[package]]
name = "prost"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec"
dependencies = [
"bytes",
"prost-derive",
]
[[package]]
name = "prost-build"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0f3e5beed80eb580c68e2c600937ac2c4eedabdfd5ef1e5b7ea4f3fba84497b"
dependencies = [
"heck",
"itertools",
"log",
"multimap",
"once_cell",
"petgraph",
"prettyplease",
"prost",
"prost-types",
"regex",
"syn 2.0.98",
"tempfile",
]
[[package]]
name = "prost-derive"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3"
dependencies = [
"anyhow",
"itertools",
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "prost-types"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc2f1e56baa61e93533aebc21af4d2134b70f66275e0fcdf3cbe43d77ff7e8fc"
dependencies = [
"prost",
]
[[package]]
name = "protobuf"
version = "2.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
[[package]]
name = "quick-error"
version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
[[package]]
name = "quinn"
version = "0.11.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef"
dependencies = [
"bytes",
"pin-project-lite",
"quinn-proto",
"quinn-udp",
"rustc-hash",
"rustls",
"socket2",
"thiserror 2.0.11",
"tokio",
"tracing",
]
[[package]]
name = "quinn-proto"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d"
dependencies = [
"bytes",
"getrandom 0.2.15",
"rand",
"ring",
"rustc-hash",
"rustls",
"rustls-pki-types",
"slab",
"thiserror 2.0.11",
"tinyvec",
"tracing",
"web-time",
]
[[package]]
name = "quinn-udp"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c40286217b4ba3a71d644d752e6a0b71f13f1b6a2c5311acfcbe0c2418ed904"
dependencies = [
"cfg_aliases",
"libc",
"once_cell",
"socket2",
"tracing",
"windows-sys 0.52.0",
]
[[package]]
name = "quote"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom 0.2.15",
]
[[package]]
name = "rand_xorshift"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f"
dependencies = [
"rand_core",
]
[[package]]
name = "redox_syscall"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
dependencies = [
"bitflags",
]
[[package]]
name = "regex"
version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "reqwest"
version = "0.12.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da"
dependencies = [
"base64",
"bytes",
"futures-core",
"futures-util",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-rustls",
"hyper-util",
"ipnet",
"js-sys",
"log",
"mime",
"once_cell",
"percent-encoding",
"pin-project-lite",
"quinn",
"rustls",
"rustls-pemfile",
"rustls-pki-types",
"serde",
"serde_json",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tokio-rustls",
"tokio-util",
"tower 0.5.2",
"tower-service",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
"wasm-streams",
"web-sys",
"webpki-roots",
"windows-registry",
]
[[package]]
name = "ring"
version = "0.17.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d"
dependencies = [
"cc",
"cfg-if",
"getrandom 0.2.15",
"libc",
"spin",
"untrusted",
"windows-sys 0.52.0",
]
[[package]]
name = "rustc-demangle"
version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
[[package]]
name = "rustc-hash"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497"
[[package]]
name = "rustc_version"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
dependencies = [
"semver",
]
[[package]]
name = "rustix"
version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [
"bitflags",
"errno",
"libc",
"linux-raw-sys",
"windows-sys 0.59.0",
]
[[package]]
name = "rustls"
version = "0.23.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fb9263ab4eb695e42321db096e3b8fbd715a59b154d5c88d82db2175b681ba7"
dependencies = [
"once_cell",
"ring",
"rustls-pki-types",
"rustls-webpki",
"subtle",
"zeroize",
]
[[package]]
name = "rustls-native-certs"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5"
dependencies = [
"openssl-probe",
"rustls-pemfile",
"rustls-pki-types",
"schannel",
"security-framework 2.11.1",
]
[[package]]
name = "rustls-native-certs"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3"
dependencies = [
"openssl-probe",
"rustls-pki-types",
"schannel",
"security-framework 3.2.0",
]
[[package]]
name = "rustls-pemfile"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50"
dependencies = [
"rustls-pki-types",
]
[[package]]
name = "rustls-pki-types"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c"
dependencies = [
"web-time",
]
[[package]]
name = "rustls-webpki"
version = "0.102.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9"
dependencies = [
"ring",
"rustls-pki-types",
"untrusted",
]
[[package]]
name = "rustversion"
version = "1.0.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4"
[[package]]
name = "rusty-fork"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f"
dependencies = [
"fnv",
"quick-error",
"tempfile",
"wait-timeout",
]
[[package]]
name = "ryu"
version = "1.0.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "schannel"
version = "0.1.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d"
dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "security-framework"
version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [
"bitflags",
"core-foundation 0.9.4",
"core-foundation-sys",
"libc",
"security-framework-sys",
]
[[package]]
name = "security-framework"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316"
dependencies = [
"bitflags",
"core-foundation 0.10.0",
"core-foundation-sys",
"libc",
"security-framework-sys",
]
[[package]]
name = "security-framework-sys"
version = "2.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]]
name = "semver"
version = "1.0.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03"
[[package]]
name = "serde"
version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "serde_json"
version = "1.0.138"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949"
dependencies = [
"itoa",
"memchr",
"ryu",
"serde",
]
[[package]]
name = "serde_nanos"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a93142f0367a4cc53ae0fead1bcda39e85beccfad3dcd717656cacab94b12985"
dependencies = [
"serde",
]
[[package]]
name = "serde_path_to_error"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6"
dependencies = [
"itoa",
"serde",
]
[[package]]
name = "serde_repr"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "serde_spanned"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1"
dependencies = [
"serde",
]
[[package]]
name = "serde_urlencoded"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
dependencies = [
"form_urlencoded",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "sha2"
version = "0.10.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "signal-hook-registry"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1"
dependencies = [
"libc",
]
[[package]]
name = "signatory"
version = "0.27.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1e303f8205714074f6068773f0e29527e0453937fe837c9717d066635b65f31"
dependencies = [
"pkcs8",
"rand_core",
"signature",
"zeroize",
]
[[package]]
name = "signature"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
dependencies = [
"digest",
"rand_core",
]
[[package]]
name = "similar"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
[[package]]
name = "slab"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
dependencies = [
"autocfg",
]
[[package]]
name = "smallvec"
version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
[[package]]
name = "socket2"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
[[package]]
name = "spin"
version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
[[package]]
name = "spki"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d"
dependencies = [
"base64ct",
"der",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "subtle"
version = "2.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "sync_wrapper"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
dependencies = [
"futures-core",
]
[[package]]
name = "synstructure"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "tempfile"
version = "3.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38c246215d7d24f48ae091a2902398798e05d978b24315d6efbc00ede9a8bb91"
dependencies = [
"cfg-if",
"fastrand",
"getrandom 0.3.1",
"once_cell",
"rustix",
"windows-sys 0.59.0",
]
[[package]]
name = "thiserror"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
dependencies = [
"thiserror-impl 1.0.69",
]
[[package]]
name = "thiserror"
version = "2.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc"
dependencies = [
"thiserror-impl 2.0.11",
]
[[package]]
name = "thiserror-impl"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "thiserror-impl"
version = "2.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "time"
version = "0.3.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21"
dependencies = [
"deranged",
"itoa",
"num-conv",
"powerfmt",
"serde",
"time-core",
"time-macros",
]
[[package]]
name = "time-core"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
[[package]]
name = "time-macros"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de"
dependencies = [
"num-conv",
"time-core",
]
[[package]]
name = "tinystr"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
dependencies = [
"displaydoc",
"zerovec",
]
[[package]]
name = "tinyvec"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tokio"
version = "1.43.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e"
dependencies = [
"backtrace",
"bytes",
"libc",
"mio",
"parking_lot",
"pin-project-lite",
"signal-hook-registry",
"socket2",
"tokio-macros",
"windows-sys 0.52.0",
]
[[package]]
name = "tokio-macros"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "tokio-rustls"
version = "0.26.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37"
dependencies = [
"rustls",
"tokio",
]
[[package]]
name = "tokio-stream"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047"
dependencies = [
"futures-core",
"pin-project-lite",
"tokio",
]
[[package]]
name = "tokio-util"
version = "0.7.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078"
dependencies = [
"bytes",
"futures-core",
"futures-sink",
"pin-project-lite",
"tokio",
]
[[package]]
name = "tokio-websockets"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f591660438b3038dd04d16c938271c79e7e06260ad2ea2885a4861bfb238605d"
dependencies = [
"base64",
"bytes",
"futures-core",
"futures-sink",
"http",
"httparse",
"rand",
"ring",
"rustls-native-certs 0.8.1",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tokio-util",
]
[[package]]
name = "toml"
version = "0.8.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148"
dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"toml_edit",
]
[[package]]
name = "toml_datetime"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.22.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02a8b472d1a3d7c18e2d61a489aee3453fd9031c33e4f55bd533f4a7adca1bee"
dependencies = [
"indexmap 2.7.1",
"serde",
"serde_spanned",
"toml_datetime",
"winnow",
]
[[package]]
name = "tonic"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
dependencies = [
"async-stream",
"async-trait",
"axum 0.7.9",
"base64",
"bytes",
"h2",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-timeout",
"hyper-util",
"percent-encoding",
"pin-project",
"prost",
"socket2",
"tokio",
"tokio-stream",
"tower 0.4.13",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "tonic-build"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11"
dependencies = [
"prettyplease",
"proc-macro2",
"prost-build",
"prost-types",
"quote",
"syn 2.0.98",
]
[[package]]
name = "tower"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
dependencies = [
"futures-core",
"futures-util",
"indexmap 1.9.3",
"pin-project",
"pin-project-lite",
"rand",
"slab",
"tokio",
"tokio-util",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "tower"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
dependencies = [
"futures-core",
"futures-util",
"pin-project-lite",
"sync_wrapper",
"tokio",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "tower-layer"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
[[package]]
name = "tower-service"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
[[package]]
name = "tracing"
version = "0.1.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
dependencies = [
"log",
"pin-project-lite",
"tracing-attributes",
"tracing-core",
]
[[package]]
name = "tracing-attributes"
version = "0.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "tracing-core"
version = "0.1.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c"
dependencies = [
"once_cell",
]
[[package]]
name = "triton-distributed"
version = "0.1.3"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"blake3",
"bytes",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"local-ip-address",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
name = "triton-llm"
version = "0.1.3"
dependencies = [
"anyhow",
"async-stream",
"async-trait",
"axum 0.8.1",
"bytes",
"chrono",
"derive_builder",
"futures",
"insta",
"prometheus",
"proptest",
"regex",
"reqwest",
"serde",
"serde_json",
"thiserror 2.0.11",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"triton-distributed",
"unicode-segmentation",
"uuid",
"validator",
]
[[package]]
name = "try-lock"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "tryhard"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c9f0a709784e86923586cff0d872dba54cd2d2e116b3bc57587d15737cfce9d"
dependencies = [
"futures",
"pin-project-lite",
"tokio",
]
[[package]]
name = "typenum"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "ucd-trie"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
[[package]]
name = "unarray"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
[[package]]
name = "uncased"
version = "0.9.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697"
dependencies = [
"version_check",
]
[[package]]
name = "unicode-ident"
version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
[[package]]
name = "unicode-segmentation"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]]
name = "untrusted"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
[[package]]
name = "url"
version = "2.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60"
dependencies = [
"form_urlencoded",
"idna",
"percent-encoding",
]
[[package]]
name = "utf16_iter"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
[[package]]
name = "utf8_iter"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]]
name = "uuid"
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ced87ca4be083373936a67f8de945faa23b6b42384bd5b64434850802c6dccd0"
dependencies = [
"getrandom 0.3.1",
"serde",
]
[[package]]
name = "validator"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43fb22e1a008ece370ce08a3e9e4447a910e92621bb49b85d6e48a45397e7cfa"
dependencies = [
"idna",
"once_cell",
"regex",
"serde",
"serde_derive",
"serde_json",
"url",
"validator_derive",
]
[[package]]
name = "validator_derive"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7df16e474ef958526d1205f6dda359fdfab79d9aa6d54bafcb92dcd07673dca"
dependencies = [
"darling",
"once_cell",
"proc-macro-error2",
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "version_check"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "wait-timeout"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
dependencies = [
"libc",
]
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "want"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
dependencies = [
"try-lock",
]
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasi"
version = "0.13.3+wasi-0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
dependencies = [
"wit-bindgen-rt",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
dependencies = [
"bumpalo",
"log",
"proc-macro2",
"quote",
"syn 2.0.98",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
dependencies = [
"cfg-if",
"js-sys",
"once_cell",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
dependencies = [
"unicode-ident",
]
[[package]]
name = "wasm-streams"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
dependencies = [
"futures-util",
"js-sys",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]
[[package]]
name = "web-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "web-time"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "webpki-roots"
version = "0.26.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2210b291f7ea53617fbafcc4939f10914214ec15aace5ba62293a668f322c5c9"
dependencies = [
"rustls-pki-types",
]
[[package]]
name = "winapi-util"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "windows-core"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-registry"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0"
dependencies = [
"windows-result",
"windows-strings",
"windows-targets",
]
[[package]]
name = "windows-result"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-strings"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
dependencies = [
"windows-result",
"windows-targets",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "winnow"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86e376c75f4f43f44db463cf729e0d3acbf954d13e22c51e26e4c264b4ab545f"
dependencies = [
"memchr",
]
[[package]]
name = "wit-bindgen-rt"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
dependencies = [
"bitflags",
]
[[package]]
name = "write16"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
[[package]]
name = "writeable"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
[[package]]
name = "xxhash-rust"
version = "0.8.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
[[package]]
name = "yansi"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
[[package]]
name = "yoke"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40"
dependencies = [
"serde",
"stable_deref_trait",
"yoke-derive",
"zerofrom",
]
[[package]]
name = "yoke-derive"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
"synstructure",
]
[[package]]
name = "zerocopy"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
"byteorder",
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "zerofrom"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e"
dependencies = [
"zerofrom-derive",
]
[[package]]
name = "zerofrom-derive"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
"synstructure",
]
[[package]]
name = "zeroize"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
[[package]]
name = "zerovec"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
dependencies = [
"yoke",
"zerofrom",
"zerovec-derive",
]
[[package]]
name = "zerovec-derive"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[workspace]
members = [
"triton-llm",
]
resolver = "2"
[workspace.package]
version = "0.1.3"
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
homepage = "https://github.com/triton-inference-server/triton_distributed"
repository = "https://github.com/triton-inference-server/triton_distributed"
[workspace.dependencies]
# local or crates.io
triton-distributed = { version = "0.1.3", path = "../../runtime/rust" }
# crates.io
anyhow = { version = "1" }
async-stream = { version = "0.3" }
async-trait = { version = "0.1" }
bytes = "1"
derive_builder = "0.20"
futures = "0.3"
serde = { version = "1", features = ["derive"] }
thiserror = { version = "2.0.11" }
tokio = { version = "1", features = ["full"] }
tokio-stream = { version = "0.1" }
tokio-util = { version = "0.7", features = ["codec", "net"] }
tracing = { version = "0.1" }
validator = { version = "0.20.0", features = ["derive"] }
uuid = { version = "1", features = ["v4", "serde"] }
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[package]
name = "triton-llm"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
homepage.workspace = true
[dependencies]
# repo
triton-distributed = { workspace = true }
# workspace
anyhow = { workspace = true }
async-stream = { workspace = true }
async-trait = { workspace = true }
bytes = { workspace = true }
derive_builder = {workspace = true }
futures = { workspace = true }
serde = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
tokio-stream = { workspace = true }
tokio-util = { workspace = true }
tracing = { workspace = true }
validator = { workspace = true }
uuid = { workspace = true }
# protocols
chrono = { version = "0.4" }
serde_json = { version = "1" }
regex = "1"
unicode-segmentation = "1.12"
# http-service
axum = "0.8"
prometheus = { version = "0.13" }
[dev-dependencies]
insta = { version = "1.41", features = ["glob", "json", "redactions"]}
proptest = "1.5.0"
reqwest = { version = "0.12", default-features = false, features = ["json", "stream", "rustls-tls"] }
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod service;
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! HTTP Service for Nova LLM
//!
//! The primary purpose of this crate is to service the nova-llm-protocols via OpenAI compatible HTTP endpoints. This component
//! is meant to be a gateway/ingress into the Nova LLM Distributed Runtime.
//!
//! In order to create a common pattern, the HttpService forwards the incoming OAI Chat Request or OAI Completion Request to the
//! to a model-specific engines. The engines can be attached and detached dynamically using the [`ModelManager`].
//!
//! Note: All requests, whether the client requests `stream=true` or `stream=false`, are propagated downstream as `stream=true`.
//! This enables use to handle only 1 pattern of request-response in the downstream services. Non-streaming user requests are
//! aggregated by the HttpService and returned as a single response.
//!
//! TODO(): Add support for model-specific metadata and status. Status will allow us to return a 503 when the model is supposed
//! to be ready, but there is a problem with the model.
//!
//! The [`service::HttpService`] can be further extended to host any [`axum::Router`] using the [`service::HttpServiceBuilder`].
mod openai;
pub mod error;
pub mod metrics;
pub mod service_v2;
// #[cfg(feature = "py3")]
// pub mod py3;
pub use async_trait::async_trait;
pub use axum;
pub use error::ServiceHttpError;
pub use metrics::Metrics;
use crate::types::openai::{
chat_completions::OpenAIChatCompletionsStreamingEngine,
completions::OpenAICompletionsStreamingEngine,
};
use std::{
collections::HashMap,
sync::{Arc, Mutex},
};
#[derive(Clone)]
pub struct ModelManager {
state: Arc<DeploymentState>,
}
impl Default for ModelManager {
fn default() -> Self {
Self::new()
}
}
impl ModelManager {
pub fn new() -> Self {
let state = Arc::new(DeploymentState::new());
Self { state }
}
pub fn state(&self) -> Arc<DeploymentState> {
self.state.clone()
}
pub fn has_model_any(&self, model: &str) -> bool {
self.state
.chat_completion_engines
.lock()
.unwrap()
.contains(model)
|| self
.state
.completion_engines
.lock()
.unwrap()
.contains(model)
}
pub fn list_chat_completions_models(&self) -> Vec<String> {
self.state.chat_completion_engines.lock().unwrap().list()
}
pub fn list_completions_models(&self) -> Vec<String> {
self.state.completion_engines.lock().unwrap().list()
}
pub fn add_completions_model(
&self,
model: &str,
engine: OpenAICompletionsStreamingEngine,
) -> Result<(), ServiceHttpError> {
let mut clients = self.state.completion_engines.lock().unwrap();
clients.add(model, engine)
}
pub fn add_chat_completions_model(
&self,
model: &str,
engine: OpenAIChatCompletionsStreamingEngine,
) -> Result<(), ServiceHttpError> {
let mut clients = self.state.chat_completion_engines.lock().unwrap();
clients.add(model, engine)
}
pub fn remove_completions_model(&self, model: &str) -> Result<(), ServiceHttpError> {
let mut clients = self.state.completion_engines.lock().unwrap();
clients.remove(model)
}
pub fn remove_chat_completions_model(&self, model: &str) -> Result<(), ServiceHttpError> {
let mut clients = self.state.chat_completion_engines.lock().unwrap();
clients.remove(model)
}
/// Get the Prometheus [`Metrics`] object which tracks request counts and inflight requests
pub fn metrics(&self) -> Arc<Metrics> {
self.state.metrics.clone()
}
}
struct ModelEngines<E> {
/// Optional default model name
default: Option<String>,
engines: HashMap<String, E>,
}
impl<E> Default for ModelEngines<E> {
fn default() -> Self {
Self {
default: None,
engines: HashMap::new(),
}
}
}
impl<E> ModelEngines<E> {
#[allow(dead_code)]
fn set_default(&mut self, model: &str) {
self.default = Some(model.to_string());
}
#[allow(dead_code)]
fn clear_default(&mut self) {
self.default = None;
}
fn add(&mut self, model: &str, engine: E) -> Result<(), ServiceHttpError> {
if self.engines.contains_key(model) {
return Err(ServiceHttpError::ModelAlreadyExists(model.to_string()));
}
self.engines.insert(model.to_string(), engine);
Ok(())
}
fn remove(&mut self, model: &str) -> Result<(), ServiceHttpError> {
if self.engines.remove(model).is_none() {
return Err(ServiceHttpError::ModelNotFound(model.to_string()));
}
Ok(())
}
fn get(&self, model: &str) -> Option<&E> {
self.engines.get(model)
}
fn contains(&self, model: &str) -> bool {
self.engines.contains_key(model)
}
fn list(&self) -> Vec<String> {
self.engines.keys().map(|k| k.to_owned()).collect()
}
}
/// The DeploymentState is a global state that is shared across all the workers
/// this provides set of known clients to Engines
pub struct DeploymentState {
completion_engines: Arc<Mutex<ModelEngines<OpenAICompletionsStreamingEngine>>>,
chat_completion_engines: Arc<Mutex<ModelEngines<OpenAIChatCompletionsStreamingEngine>>>,
metrics: Arc<Metrics>,
}
impl DeploymentState {
fn new() -> Self {
Self {
completion_engines: Arc::new(Mutex::new(ModelEngines::default())),
chat_completion_engines: Arc::new(Mutex::new(ModelEngines::default())),
metrics: Arc::new(Metrics::default()),
}
}
fn get_completions_engine(
&self,
model: &str,
) -> Result<OpenAICompletionsStreamingEngine, ServiceHttpError> {
self.completion_engines
.lock()
.unwrap()
.get(model)
.cloned()
.ok_or(ServiceHttpError::ModelNotFound(model.to_string()))
}
fn get_chat_completions_engine(
&self,
model: &str,
) -> Result<OpenAIChatCompletionsStreamingEngine, ServiceHttpError> {
self.chat_completion_engines
.lock()
.unwrap()
.get(model)
.cloned()
.ok_or(ServiceHttpError::ModelNotFound(model.to_string()))
}
}
/// Documentation for a route
#[derive(Debug)]
pub struct RouteDoc {
method: axum::http::Method,
path: String,
}
impl std::fmt::Display for RouteDoc {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{} {}", self.method, self.path)
}
}
impl RouteDoc {
pub fn new<T: Into<String>>(method: axum::http::Method, path: T) -> Self {
RouteDoc {
method,
path: path.into(),
}
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use thiserror::Error;
#[derive(Debug, Error)]
pub enum ServiceHttpError {
#[error("Model not found: {0}")]
ModelNotFound(String),
#[error("Model already exists: {0}")]
ModelAlreadyExists(String),
}
/// Implementation of the Completion Engines served by the HTTP service should
/// map their custom errors to to this error type if they wish to return error
/// codes besides 500.
#[derive(Debug, Error)]
#[error("HTTP Error {code}: {message}")]
pub struct HttpError {
pub code: u16,
pub message: String,
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use axum::{extract::State, http::StatusCode, response::IntoResponse, routing::get, Router};
use prometheus::{Encoder, HistogramOpts, HistogramVec, IntCounterVec, IntGaugeVec, Opts};
use std::{sync::Arc, time::Instant};
pub use prometheus::Registry;
use super::{DeploymentState, RouteDoc};
/// Value for the `status` label in the request counter for successful requests
pub const REQUEST_STATUS_SUCCESS: &str = "success";
/// Value for the `status` label in the request counter if the request failed
pub const REQUEST_STATUS_ERROR: &str = "error";
/// Partial value for the `type` label in the request counter for streaming requests
pub const REQUEST_TYPE_STREAM: &str = "stream";
/// Partial value for the `type` label in the request counter for unary requests
pub const REQUEST_TYPE_UNARY: &str = "unary";
pub struct Metrics {
request_counter: IntCounterVec,
inflight_gauge: IntGaugeVec,
request_duration: HistogramVec,
}
/// RAII object for inflight gauge and request counters
/// If this object is dropped without calling `mark_ok`, then the request will increment
/// the request counter with the `status` label with [`REQUEST_STATUS_ERROR`]; otherwise, it will increment
/// the counter with `status` label [`REQUEST_STATUS_SUCCESS`]
pub struct InflightGuard {
metrics: Arc<Metrics>,
model: String,
endpoint: Endpoint,
request_type: RequestType,
status: Status,
timer: Instant,
}
/// Requests will be logged by the type of endpoint hit
/// This will include llamastack in the future
pub enum Endpoint {
/// OAI Completions
Completions,
/// OAI Chat Completions
ChatCompletions,
}
/// Metrics for the HTTP service
pub enum RequestType {
/// SingleIn / SingleOut
Unary,
/// SingleIn / ManyOut
Stream,
}
/// Status
pub enum Status {
Success,
Error,
}
impl Default for Metrics {
fn default() -> Self {
Self::new("nv_llm")
}
}
impl Metrics {
/// Create Metrics with the given prefix
/// The following metrics will be created:
/// - `{prefix}_http_service_requests_total` - IntCounterVec for the total number of requests processed
/// - `{prefix}_http_service_inflight_requests` - IntGaugeVec for the number of inflight requests
/// - `{prefix}_http_service_request_duration_seconds` - HistogramVec for the duration of requests
pub fn new(prefix: &str) -> Self {
let request_counter = IntCounterVec::new(
Opts::new(
format!("{}_http_service_requests_total", prefix),
"Total number of LLM requests processed",
),
&["model", "endpoint", "request_type", "status"],
)
.unwrap();
let inflight_gauge = IntGaugeVec::new(
Opts::new(
format!("{}_http_service_inflight_requests", prefix),
"Number of inflight requests",
),
&["model"],
)
.unwrap();
let buckets = vec![0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0];
let request_duration = HistogramVec::new(
HistogramOpts::new(
format!("{}_http_service_request_duration_seconds", prefix),
"Duration of LLM requests",
)
.buckets(buckets),
&["model"],
)
.unwrap();
Metrics {
request_counter,
inflight_gauge,
request_duration,
}
}
/// Get the number of successful requests for the given dimensions:
/// - model
/// - endpoint (completions/chat_completions)
/// - request type (unary/stream)
/// - status (success/error)
pub fn get_request_counter(
&self,
model: &str,
endpoint: &Endpoint,
request_type: &RequestType,
status: &Status,
) -> u64 {
self.request_counter
.with_label_values(&[
model,
endpoint.as_str(),
request_type.as_str(),
status.as_str(),
])
.get()
}
/// Increment the counter for requests for the given dimensions:
/// - model
/// - endpoint (completions/chat_completions)
/// - request type (unary/stream)
/// - status (success/error)
fn inc_request_counter(
&self,
model: &str,
endpoint: &Endpoint,
request_type: &RequestType,
status: &Status,
) {
self.request_counter
.with_label_values(&[
model,
endpoint.as_str(),
request_type.as_str(),
status.as_str(),
])
.inc()
}
/// Get the number if inflight requests for the given model
pub fn get_inflight_count(&self, model: &str) -> i64 {
self.inflight_gauge.with_label_values(&[model]).get()
}
fn inc_inflight_gauge(&self, model: &str) {
self.inflight_gauge.with_label_values(&[model]).inc()
}
fn dec_inflight_gauge(&self, model: &str) {
self.inflight_gauge.with_label_values(&[model]).dec()
}
pub fn register(&self, registry: &Registry) -> Result<(), prometheus::Error> {
registry.register(Box::new(self.request_counter.clone()))?;
registry.register(Box::new(self.inflight_gauge.clone()))?;
registry.register(Box::new(self.request_duration.clone()))?;
Ok(())
}
}
impl DeploymentState {
/// Create a new [`InflightGuard`] for the given model and annotate if its a streaming request,
/// and the kind of endpoint that was hit
///
/// The [`InflightGuard`] is an RAII object will handle incrementing the inflight gauge and
/// request counters.
pub fn create_inflight_guard(
&self,
model: &str,
endpoint: Endpoint,
streaming: bool,
) -> InflightGuard {
let request_type = if streaming {
RequestType::Stream
} else {
RequestType::Unary
};
InflightGuard::new(
self.metrics.clone(),
model.to_string(),
endpoint,
request_type,
)
}
}
impl InflightGuard {
fn new(
metrics: Arc<Metrics>,
model: String,
endpoint: Endpoint,
request_type: RequestType,
) -> Self {
// Start the timer
let timer = Instant::now();
// Increment the inflight gauge when the guard is created
metrics.inc_inflight_gauge(&model);
// Return the RAII Guard
InflightGuard {
metrics,
model,
endpoint,
request_type,
status: Status::Error,
timer,
}
}
pub(crate) fn mark_ok(&mut self) {
self.status = Status::Success;
}
}
impl Drop for InflightGuard {
fn drop(&mut self) {
// Decrement the gauge when the guard is dropped
self.metrics.dec_inflight_gauge(&self.model);
// the frequency on incrementing the full request counter is relatively low
// if we were incrementing the counter on every forward pass, we'd use static CounterVec or
// discrete counter object without the more costly lookup required for the following calls
self.metrics.inc_request_counter(
&self.model,
&self.endpoint,
&self.request_type,
&self.status,
);
// Record the duration of the request
self.metrics
.request_duration
.with_label_values(&[&self.model])
.observe(self.timer.elapsed().as_secs_f64());
}
}
impl std::fmt::Display for Endpoint {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Endpoint::Completions => write!(f, "completions"),
Endpoint::ChatCompletions => write!(f, "chat_completions"),
}
}
}
impl Endpoint {
pub fn as_str(&self) -> &'static str {
match self {
Endpoint::Completions => "completions",
Endpoint::ChatCompletions => "chat_completions",
}
}
}
impl RequestType {
pub fn as_str(&self) -> &'static str {
match self {
RequestType::Unary => REQUEST_TYPE_UNARY,
RequestType::Stream => REQUEST_TYPE_STREAM,
}
}
}
impl Status {
pub fn as_str(&self) -> &'static str {
match self {
Status::Success => REQUEST_STATUS_SUCCESS,
Status::Error => REQUEST_STATUS_ERROR,
}
}
}
/// Create a new router with the given path
pub fn router(registry: Registry, path: Option<String>) -> (Vec<RouteDoc>, Router) {
let registry = Arc::new(registry);
let path = path.unwrap_or_else(|| "/metrics".to_string());
let doc = RouteDoc::new(axum::http::Method::GET, &path);
let route = Router::new()
.route(&path, get(handler_metrics))
.with_state(registry);
(vec![doc], route)
}
/// Metrics Handler
async fn handler_metrics(State(registry): State<Arc<Registry>>) -> impl IntoResponse {
let encoder = prometheus::TextEncoder::new();
let metric_families = registry.gather();
let mut buffer = vec![];
if encoder.encode(&metric_families, &mut buffer).is_err() {
return (
StatusCode::INTERNAL_SERVER_ERROR,
"Failed to encode metrics",
)
.into_response();
}
let metrics = match String::from_utf8(buffer) {
Ok(metrics) => metrics,
Err(_) => {
return (
StatusCode::INTERNAL_SERVER_ERROR,
"Failed to encode metrics",
)
.into_response()
}
};
(StatusCode::OK, metrics).into_response()
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use axum::{
extract::State,
http::StatusCode,
response::{
sse::{Event, KeepAlive, Sse},
IntoResponse, Response,
},
routing::{get, post},
Json, Router,
};
use futures::{Stream, StreamExt};
use serde::{Deserialize, Serialize};
use std::{
collections::{HashMap, HashSet},
pin::Pin,
sync::Arc,
time::{SystemTime, UNIX_EPOCH},
};
use tokio_stream::wrappers::ReceiverStream;
use super::DeploymentState;
use super::{
error::HttpError,
metrics::{Endpoint, InflightGuard},
RouteDoc,
};
use crate::protocols::openai::{
chat_completions::ChatCompletionResponse, completions::CompletionResponse,
};
use crate::types::{
openai::{chat_completions::ChatCompletionRequest, completions::CompletionRequest},
Annotated,
};
use triton_distributed::pipeline::{AsyncEngineContext, Context};
#[derive(Serialize, Deserialize)]
pub(crate) struct ErrorResponse {
error: String,
}
impl ErrorResponse {
/// Not Found Error
pub fn model_not_found() -> (StatusCode, Json<ErrorResponse>) {
(
StatusCode::NOT_FOUND,
Json(ErrorResponse {
error: "Model not found".to_string(),
}),
)
}
/// Service Unavailable
/// This is returned when the service is live, but not ready.
pub fn _service_unavailable() -> (StatusCode, Json<ErrorResponse>) {
(
StatusCode::SERVICE_UNAVAILABLE,
Json(ErrorResponse {
error: "Service is not ready".to_string(),
}),
)
}
/// Internal Service Error
/// Return this error when the service encounters an internal error.
/// We should return a generic message to the client instead of the real error.
/// Internal Services errors are the result of misconfiguration or bugs in the service.
pub fn internal_server_error(msg: &str) -> (StatusCode, Json<ErrorResponse>) {
tracing::error!("Internal server error: {msg}");
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: msg.to_string(),
}),
)
}
/// The OAI endpoints call an [`triton_distributed::engine::AsyncEngine`] which are specialized to return
/// an [`anyhow::Error`]. This method will convert the [`anyhow::Error`] into an [`HttpError`].
/// If successful, it will return the [`HttpError`] as an [`ErrorResponse::internal_server_error`]
/// with the details of the error.
pub fn from_anyhow(err: anyhow::Error, alt_msg: &str) -> (StatusCode, Json<ErrorResponse>) {
match err.downcast::<HttpError>() {
Ok(http_error) => ErrorResponse::from_http_error(http_error),
Err(err) => ErrorResponse::internal_server_error(&format!("{alt_msg}: {err}")),
}
}
/// Implementers should only be able to throw 400-499 errors.
pub fn from_http_error(err: HttpError) -> (StatusCode, Json<ErrorResponse>) {
if err.code < 400 || err.code >= 500 {
return ErrorResponse::internal_server_error(&err.message);
}
match StatusCode::from_u16(err.code) {
Ok(code) => (code, Json(ErrorResponse { error: err.message })),
Err(_) => ErrorResponse::internal_server_error(&err.message),
}
}
}
impl From<HttpError> for ErrorResponse {
fn from(err: HttpError) -> Self {
ErrorResponse { error: err.message }
}
}
/// OpenAI Completions Request Handler
///
/// This method will handle the incoming request for the `/v1/completions endpoint`. The endpoint is a "source"
/// for an [`super::OpenAICompletionsStreamingEngine`] and will return a stream of
/// responses which will be forward to the client.
///
/// Note: For all requests, streaming or non-streaming, we always call the engine with streaming enabled. For
/// non-streaming requests, we will fold the stream into a single response as part of this handler.
#[tracing::instrument(skip_all)]
async fn completions(
State(state): State<Arc<DeploymentState>>,
Json(request): Json<CompletionRequest>,
) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
// return a 503 if the service is not ready
check_ready(&state)?;
// todo - extract distributed tracing id and context id from headers
let request_id = uuid::Uuid::new_v4().to_string();
// todo - decide on default
let streaming = request.stream.unwrap_or(false);
// update the request to always stream
let request = CompletionRequest {
stream: Some(true),
..request
};
// todo - make the protocols be optional for model name
// todo - when optional, if none, apply a default
let model = &request.model;
// todo - error handling should be more robust
let engine = state
.get_completions_engine(model)
.map_err(|_| ErrorResponse::model_not_found())?;
// this will increment the inflight gauge for the model
let mut inflight = state.create_inflight_guard(model, Endpoint::Completions, streaming);
// setup context
// todo - inherit request_id from distributed trace details
let request = Context::with_id(request, request_id.clone());
// issue the generate call on the engine
let stream = engine
.generate(request)
.await
.map_err(|e| ErrorResponse::from_anyhow(e, "Failed to generate completions"))?;
// capture the context to cancel the stream if the client disconnects
let ctx = stream.context();
// todo - tap the stream and propagate request level metrics
// note - we might do this as part of the post processing set to make it more generic
if streaming {
let stream = stream.map(|response| Event::try_from(EventConverter::from(response)));
let stream = monitor_for_disconnects(stream.boxed(), ctx, inflight).await;
Ok(Sse::new(stream)
.keep_alive(KeepAlive::default())
.into_response())
} else {
let response = CompletionResponse::from_annotated_stream(stream.into())
.await
.map_err(|e| {
tracing::error!(
"Failed to fold completions stream for {}: {:?}",
request_id,
e
);
ErrorResponse::internal_server_error("Failed to fold completions stream")
})?;
inflight.mark_ok();
Ok(Json(response).into_response())
}
}
/// OpenAI Chat Completions Request Handler
///
/// This method will handle the incoming request for the /v1/chat/completions endpoint. The endpoint is a "source"
/// for an [`super::OpenAIChatCompletionsStreamingEngine`] and will return a stream of responses which will be
/// forward to the client.
///
/// Note: For all requests, streaming or non-streaming, we always call the engine with streaming enabled. For
/// non-streaming requests, we will fold the stream into a single response as part of this handler.
#[tracing::instrument(skip_all)]
async fn chat_completions(
State(state): State<Arc<DeploymentState>>,
Json(request): Json<ChatCompletionRequest>,
) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
// return a 503 if the service is not ready
check_ready(&state)?;
// todo - extract distributed tracing id and context id from headers
let request_id = uuid::Uuid::new_v4().to_string();
// todo - decide on default
let streaming = request.stream.unwrap_or(false);
// update the request to always stream
let request = ChatCompletionRequest {
stream: Some(true),
..request
};
// todo - make the protocols be optional for model name
// todo - when optional, if none, apply a default
let model = &request.model;
// todo - determine the proper error code for when a request model is not present
tracing::trace!("Getting chat completions engine for model: {}", model);
let engine = state
.get_chat_completions_engine(model)
.map_err(|_| ErrorResponse::model_not_found())?;
// this will increment the inflight gauge for the model
let mut inflight = state.create_inflight_guard(model, Endpoint::ChatCompletions, streaming);
// setup context
// todo - inherit request_id from distributed trace details
let request = Context::with_id(request, request_id.clone());
tracing::trace!("Issuing generate call for chat completions");
// issue the generate call on the engine
let stream = engine
.generate(request)
.await
.map_err(|e| ErrorResponse::from_anyhow(e, "Failed to generate completions"))?;
// capture the context to cancel the stream if the client disconnects
let ctx = stream.context();
// todo - tap the stream and propagate request level metrics
// note - we might do this as part of the post processing set to make it more generic
if streaming {
let stream = stream.map(|response| Event::try_from(EventConverter::from(response)));
let stream = monitor_for_disconnects(stream.boxed(), ctx, inflight).await;
Ok(Sse::new(stream)
.keep_alive(KeepAlive::default())
.into_response())
} else {
let response = ChatCompletionResponse::from_annotated_stream(stream.into())
.await
.map_err(|e| {
tracing::error!(
request_id,
"Failed to fold chat completions stream for: {:?}",
e
);
ErrorResponse::internal_server_error(&format!(
"Failed to fold chat completions stream: {}",
e
))
})?;
inflight.mark_ok();
Ok(Json(response).into_response())
}
}
// todo - abstract this to the top level lib.rs to be reused
// todo - move the service_observer to its own state/arc
fn check_ready(_state: &Arc<DeploymentState>) -> Result<(), (StatusCode, Json<ErrorResponse>)> {
// if state.service_observer.stage() != ServiceStage::Ready {
// return Err(ErrorResponse::service_unavailable());
// }
Ok(())
}
/// list models handler, non-standard format
async fn list_models_custom(
State(state): State<Arc<DeploymentState>>,
) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
check_ready(&state)?;
let mut models = HashMap::new();
let chat_models = state
.chat_completion_engines
.lock()
.unwrap()
.engines
.keys()
.cloned()
.collect::<Vec<String>>();
let completion_models = state
.completion_engines
.lock()
.unwrap()
.engines
.keys()
.cloned()
.collect::<Vec<String>>();
models.insert("chat_completion_models", chat_models);
models.insert("completion_models", completion_models);
Ok(Json(models).into_response())
}
/// openai compatible format
/// Example:
/// {
/// "object": "list",
/// "data": [
/// {
/// "id": "model-id-0",
/// "object": "model",
/// "created": 1686935002,
/// "owned_by": "organization-owner"
/// },
/// ]
/// }
async fn list_models_openai(
State(state): State<Arc<DeploymentState>>,
) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
check_ready(&state)?;
let created = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs();
let mut data = Vec::new();
let models: HashSet<String> = state
.chat_completion_engines
.lock()
.unwrap()
.engines
.keys()
.chain(state.completion_engines.lock().unwrap().engines.keys())
.cloned()
.collect();
for model_id in models {
data.push(ModelListing {
id: model_id.clone(),
object: "object",
created, // Where would this come from? The GGUF?
owned_by: "nvidia".to_string(), // Get organization from GGUF
});
}
let out = ListModelOpenAI {
object: "list",
data,
};
Ok(Json(out).into_response())
}
#[derive(Serialize)]
struct ListModelOpenAI {
object: &'static str, // always "list"
data: Vec<ModelListing>,
}
#[derive(Serialize)]
struct ModelListing {
id: String,
object: &'static str, // always "object"
created: u64, // Seconds since epoch
owned_by: String,
}
/// This method will consume a stream of SSE events and forward them to a new stream defined by a tokio channel.
/// In this way, if the downstream is dropped, then the upstream will be unable to send any more events. This is
/// how we can monitor for disconnects and stop the generation of completions.
///
/// If a disconnect is detected, then the context will issue a `stop_generating` call to the context which will
/// propagate the cancellation signal to the backend.
async fn monitor_for_disconnects(
stream: Pin<
Box<dyn Stream<Item = Result<axum::response::sse::Event, axum::Error>> + std::marker::Send>,
>,
context: Arc<dyn AsyncEngineContext>,
inflight: InflightGuard,
) -> ReceiverStream<Result<Event, axum::Error>> {
let (tx, rx) = tokio::sync::mpsc::channel(8);
tokio::spawn(async move {
let mut inflight = inflight;
let mut stream = stream;
while let Some(event) = stream.next().await {
let event = match event {
Ok(event) => Ok(event),
Err(err) => Ok(Event::default().event("error").comment(err.to_string())),
};
if (tx.send(event).await).is_err() {
tracing::trace!("Forwarding SSE stream was dropped; breaking loop");
context.stop_generating();
break;
}
}
// the stream completed successfully - mark as ok
// this will increment the request counter with an "success" status
if tx.send(Ok(Event::default().data("[DONE]"))).await.is_ok() {
inflight.mark_ok();
}
});
ReceiverStream::new(rx)
}
struct EventConverter<T>(Annotated<T>);
impl<T> From<Annotated<T>> for EventConverter<T> {
fn from(annotated: Annotated<T>) -> Self {
EventConverter(annotated)
}
}
/// Convert an Annotated into an Event
/// If the Event represents an Error, then return an axum::Error
/// The [`monitor_for_disconnects`] method will handle the error, emit to the sse stream
/// then stop the generation of completions.
impl<T: Serialize> TryFrom<EventConverter<T>> for Event {
type Error = axum::Error;
fn try_from(annotated: EventConverter<T>) -> Result<Self, Self::Error> {
let annotated = annotated.0;
let mut event = Event::default();
if let Some(data) = annotated.data {
event = event.json_data(data)?;
}
if let Some(msg) = annotated.event {
if msg == "error" {
let msgs = annotated
.comment
.unwrap_or_else(|| vec!["unspecified error".to_string()]);
return Err(axum::Error::new(msgs.join(" -- ")));
}
event = event.event(msg);
}
if let Some(comments) = annotated.comment {
for comment in comments {
event = event.comment(comment);
}
}
Ok(event)
}
}
/// Create an Axum [`Router`] for the OpenAI API Completions endpoint
/// If not path is provided, the default path is `/v1/completions`
pub fn completions_router(
state: Arc<DeploymentState>,
path: Option<String>,
) -> (Vec<RouteDoc>, Router) {
let path = path.unwrap_or("/v1/completions".to_string());
let doc = RouteDoc::new(axum::http::Method::POST, &path);
let router = Router::new()
.route(&path, post(completions))
.with_state(state);
(vec![doc], router)
}
/// Create an Axum [`Router`] for the OpenAI API Chat Completions endpoint
/// If not path is provided, the default path is `/v1/chat/completions`
pub fn chat_completions_router(
state: Arc<DeploymentState>,
path: Option<String>,
) -> (Vec<RouteDoc>, Router) {
let path = path.unwrap_or("/v1/chat/completions".to_string());
let doc = RouteDoc::new(axum::http::Method::POST, &path);
let router = Router::new()
.route(&path, post(chat_completions))
.with_state(state);
(vec![doc], router)
}
/// List Models
pub fn list_models_router(
state: Arc<DeploymentState>,
path: Option<String>,
) -> (Vec<RouteDoc>, Router) {
// TODO: Why do we have this endpoint?
let custom_path = path.unwrap_or("/triton/alpha/list-models".to_string());
let doc_for_custom = RouteDoc::new(axum::http::Method::GET, &custom_path);
// Standard OpenAI compatible list models endpoint
let openai_path = "/v1/models".to_string();
let doc_for_openai = RouteDoc::new(axum::http::Method::GET, &openai_path);
let router = Router::new()
.route(&custom_path, get(list_models_custom))
.route(&openai_path, get(list_models_openai))
.with_state(state);
(vec![doc_for_custom, doc_for_openai], router)
}
#[cfg(test)]
mod tests {
use super::super::ServiceHttpError;
use super::*;
const BACKUP_ERROR_MESSAGE: &str = "Failed to generate completions";
fn http_error_from_engine(code: u16) -> Result<(), anyhow::Error> {
Err(HttpError {
code,
message: "custom error message".to_string(),
})?
}
fn other_error_from_engine() -> Result<(), anyhow::Error> {
Err(ServiceHttpError::ModelNotFound("foo".to_string()))?
}
#[test]
fn test_http_error_response_from_anyhow() {
let err = http_error_from_engine(400).unwrap_err();
let (status, response) = ErrorResponse::from_anyhow(err, BACKUP_ERROR_MESSAGE);
assert_eq!(status, StatusCode::BAD_REQUEST);
assert_eq!(response.error, "custom error message");
}
#[test]
fn test_error_response_from_anyhow_out_of_range() {
let err = http_error_from_engine(399).unwrap_err();
let (status, response) = ErrorResponse::from_anyhow(err, BACKUP_ERROR_MESSAGE);
assert_eq!(status, StatusCode::INTERNAL_SERVER_ERROR);
assert_eq!(response.error, "custom error message");
let err = http_error_from_engine(500).unwrap_err();
let (status, response) = ErrorResponse::from_anyhow(err, BACKUP_ERROR_MESSAGE);
assert_eq!(status, StatusCode::INTERNAL_SERVER_ERROR);
assert_eq!(response.error, "custom error message");
let err = http_error_from_engine(501).unwrap_err();
let (status, response) = ErrorResponse::from_anyhow(err, BACKUP_ERROR_MESSAGE);
assert_eq!(status, StatusCode::INTERNAL_SERVER_ERROR);
assert_eq!(response.error, "custom error message");
}
#[test]
fn test_other_error_response_from_anyhow() {
let err = other_error_from_engine().unwrap_err();
let (status, response) = ErrorResponse::from_anyhow(err, BACKUP_ERROR_MESSAGE);
assert_eq!(status, StatusCode::INTERNAL_SERVER_ERROR);
assert_eq!(
response.error,
format!(
"{}: {}",
BACKUP_ERROR_MESSAGE,
other_error_from_engine().unwrap_err()
)
);
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use super::metrics;
use super::ModelManager;
use derive_builder::Builder;
use tokio_util::sync::CancellationToken;
#[derive(Clone)]
pub struct HttpService {
models: ModelManager,
router: axum::Router,
port: u16,
}
#[derive(Clone, Builder)]
#[builder(build_fn(private, name = "build_internal"))]
pub struct HttpServiceConfig {
#[builder(default = "8787")]
port: u16,
// #[builder(default)]
// custom: Vec<axum::Router>
#[builder(default = "true")]
enable_chat_endpoints: bool,
#[builder(default = "true")]
enable_cmpl_endpoints: bool,
}
impl HttpService {
pub fn builder() -> HttpServiceConfigBuilder {
HttpServiceConfigBuilder::default()
}
pub fn model_manager(&self) -> &ModelManager {
&self.models
}
pub async fn run(&self, cancel_token: CancellationToken) -> anyhow::Result<()> {
let address = format!("0.0.0.0:{}", self.port);
tracing::info!(address, "Starting HTTP service on: {address}");
let listener = tokio::net::TcpListener::bind(address.as_str())
.await
.unwrap_or_else(|_| panic!("could not bind to address: {address}"));
let router = self.router.clone();
let observer = cancel_token.child_token();
Ok(axum::serve(listener, router)
.with_graceful_shutdown(observer.cancelled_owned())
.await
.inspect_err(|_| cancel_token.cancel())?)
}
}
impl HttpServiceConfigBuilder {
pub fn build(self) -> Result<HttpService, anyhow::Error> {
let config = self.build_internal()?;
let model_manager = ModelManager::new();
// enable prometheus metrics
let registry = metrics::Registry::new();
model_manager.metrics().register(&registry)?;
let mut router = axum::Router::new();
let mut all_docs = Vec::new();
let mut routes = vec![
metrics::router(registry, None),
super::openai::list_models_router(model_manager.state(), None),
];
if config.enable_chat_endpoints {
routes.push(super::openai::completions_router(
model_manager.state(),
None,
));
}
if config.enable_cmpl_endpoints {
routes.push(super::openai::chat_completions_router(
model_manager.state(),
None,
));
}
// for (route_docs, route) in routes.into_iter().chain(self.routes.into_iter()) {
// router = router.merge(route);
// all_docs.extend(route_docs);
// }
for (route_docs, route) in routes.into_iter() {
router = router.merge(route);
all_docs.extend(route_docs);
}
Ok(HttpService {
models: model_manager,
router,
port: config.port,
})
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! # Triton LLM
//!
//! The `triton-llm` crate is a Rust library that provides a set of traits and types for building
//! distributed LLM inference solutions.
pub mod http;
pub mod protocols;
pub mod types;
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! # Triton LLM Protocols
//!
//! This module contains the protocols, i.e. messages formats, used to exchange requests and responses
//! both publicly via the HTTP API and internally between Triton components.
//!
use futures::{Stream, StreamExt};
use serde::{Deserialize, Serialize};
use std::pin::Pin;
pub mod codec;
pub mod common;
pub mod openai;
/// The token ID type
pub type TokenIdType = u32;
pub type DataStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync>>;
// TODO: This is an awkward dependency that we need to address
// Originally, all the Annotated/SSE Codec bits where in the LLM protocol module; however, [Annotated]
// has become the common response envelope for triton-distributed.
// We may want to move the original Annotated back here and has a Infallible conversion to the the
// ResponseEnvelop in triton-distributed.
pub use triton_distributed::protocols::annotated::Annotated;
/// The LLM responses have multiple different fields and nests of objects to get to the actual
/// text completion returned. This trait can be applied to the `choice` level objects to extract
/// the completion text.
///
/// To avoid an optional, if no completion text is found, the [`ContentProvider::content`] should
/// return an empty string.
pub trait ContentProvider {
fn content(&self) -> String;
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Usage {
pub prompt_tokens: i32,
pub completion_tokens: i32,
pub total_tokens: i32,
}
/// Converts of a stream of [codec::Message]s into a stream of [Annotated]s.
pub fn convert_sse_stream<R>(
stream: DataStream<Result<codec::Message, codec::SseCodecError>>,
) -> DataStream<Annotated<R>>
where
R: for<'de> Deserialize<'de> + Serialize,
{
let stream = stream.map(|message| match message {
Ok(message) => {
let delta = Annotated::<R>::try_from(message);
match delta {
Ok(delta) => delta,
Err(e) => Annotated::from_error(e.to_string()),
}
}
Err(e) => Annotated::from_error(e.to_string()),
});
Box::pin(stream)
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! A module for parsing Server-Sent Events (SSE) streams according to the SSE specification.
//!
//! This module provides `SseLineCodec<T>`, a codec for decoding SSE streams into typed messages.
//! It handles parsing of `id`, `event`, `data`, and comments, and attempts to deserialize
//! the `data` field into the specified type `T`.
//!
// TODO: Determine if we should use an External EventSource crate. There appear to be several
// potential candidates.
use bytes::BytesMut;
use futures::Stream;
use serde::Deserialize;
use std::{io::Cursor, pin::Pin};
use tokio_util::codec::{Decoder, FramedRead, LinesCodec};
use super::Annotated;
/// An error that occurs when decoding an SSE stream.
#[derive(Debug, thiserror::Error)]
pub enum SseCodecError {
#[error("SseLineCodec decode error: {0}")]
DecodeError(String),
#[error("IO error: {0}")]
IoError(#[from] std::io::Error),
}
/// A codec for decoding SSE streams into `Message<T>` instances.
///
/// This codec parses SSE streams according to the SSE specification and attempts to deserialize
/// the `data` field into the specified type `T`.
///
/// # Type Parameters
///
/// * `T` - The type to deserialize the `data` field into.
pub struct SseLineCodec {
lines_codec: LinesCodec,
data_buffer: String,
event_type_buffer: String,
last_event_id_buffer: String,
comments_buffer: Vec<String>,
}
/// Represents a parsed SSE message.
///
/// The `Message` struct contains optional fields for `id`, `event`, `data`, and a vector of `comments`.
///
/// # Type Parameters
///
/// * `T` - The type to deserialize the `data` field into.
#[derive(Debug)]
pub struct Message {
pub id: Option<String>,
pub event: Option<String>,
pub data: Option<String>,
pub comments: Option<Vec<String>>,
}
impl Message {
/// Deserializes the `data` field into the specified type `T`.
///
/// # Errors
///
/// Returns an error if the `data` field is empty or if deserialization fails.
pub fn decode_data<T>(&self) -> Result<T, SseCodecError>
where
T: for<'de> Deserialize<'de>,
{
serde_json::from_str(self.data.as_ref().ok_or(SseCodecError::DecodeError(
"no data: message to decode".to_string(),
))?)
.map_err(|e| SseCodecError::DecodeError(format!("failed to deserialized data: {}", e)))
}
}
impl<T> TryFrom<Message> for Annotated<T>
where
T: for<'de> Deserialize<'de>,
{
type Error = String;
fn try_from(value: Message) -> Result<Annotated<T>, Self::Error> {
// determine if the message had an error
if let Some(event) = value.event.as_ref() {
if event == "error" {
let message = match &value.comments {
Some(comments) => comments.join("\n"),
None => "`event: error` detected, but no error message found".to_string(),
};
return Err(message);
}
}
// try to deserialize the data to T
let data: Option<T> = match &value.data {
Some(_) => value.decode_data().map_err(|e| e.to_string())?,
None => None,
};
Ok(Annotated {
data,
id: value.id,
event: value.event,
comment: value.comments,
})
}
}
impl SseLineCodec {
/// Creates a new `SseLineCodec<T>`.
pub fn new() -> Self {
Self::default()
}
}
impl Default for SseLineCodec {
fn default() -> Self {
Self {
lines_codec: LinesCodec::new(),
data_buffer: String::new(),
event_type_buffer: String::new(),
last_event_id_buffer: String::new(),
comments_buffer: Vec::new(),
}
}
}
impl Decoder for SseLineCodec {
type Item = Message;
type Error = SseCodecError;
fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
loop {
match self
.lines_codec
.decode(src)
.map_err(|e| SseCodecError::DecodeError(e.to_string()))?
{
Some(line) => {
let line = line.trim_end_matches(&['\r', '\n'][..]);
if line.is_empty() {
// End of event; dispatch
if !self.data_buffer.is_empty()
|| !self.event_type_buffer.is_empty()
|| !self.last_event_id_buffer.is_empty()
|| !self.comments_buffer.is_empty()
{
// Remove the last '\n' if present in data_buffer
if self.data_buffer.ends_with('\n') {
self.data_buffer.pop();
}
let data = if !self.data_buffer.is_empty() {
Some(std::mem::take(&mut self.data_buffer))
} else {
None
};
let message = Message {
id: if self.last_event_id_buffer.is_empty() {
None
} else {
Some(std::mem::take(&mut self.last_event_id_buffer))
},
event: if self.event_type_buffer.is_empty() {
None
} else {
Some(std::mem::take(&mut self.event_type_buffer))
},
data,
comments: if self.comments_buffer.is_empty() {
None
} else {
Some(std::mem::take(&mut self.comments_buffer))
},
};
// No need to clear the buffers; they've been replaced with empty values
return Ok(Some(message));
} else {
// No data to dispatch; continue
continue;
}
} else if let Some(comment) = line.strip_prefix(':') {
self.comments_buffer.push(comment.trim().into());
} else {
let (field_name, field_value) = if let Some(idx) = line.find(':') {
let (name, value) = line.split_at(idx);
let value = value[1..].trim_start_matches(' ');
(name, value)
} else {
(line, "")
};
match field_name {
"event" => {
self.event_type_buffer = field_value.to_string();
}
"data" => {
if field_value != "[DONE]" {
if !self.data_buffer.is_empty() {
self.data_buffer.push('\n');
}
self.data_buffer.push_str(field_value);
}
}
"id" => {
if !field_value.contains('\0') {
self.last_event_id_buffer = field_value.to_string();
}
}
"retry" => {
// For simplicity, we'll ignore retry in this implementation
}
_ => {
// Ignore unknown fields
}
}
}
}
None => {
// No more data available at the moment
return Ok(None);
}
}
}
}
fn decode_eof(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
// Attempt to process any remaining data
let result = self.decode(src)?;
if result.is_some() {
return Ok(result);
}
// If there's no data left to process, return None
if self.data_buffer.is_empty()
&& self.event_type_buffer.is_empty()
&& self.last_event_id_buffer.is_empty()
&& self.comments_buffer.is_empty()
{
Ok(None)
} else {
// Dispatch any remaining data as an event
if self.data_buffer.ends_with('\n') {
self.data_buffer.pop();
}
let data = if !self.data_buffer.is_empty() {
Some(std::mem::take(&mut self.data_buffer))
} else {
None
};
let message = Message {
id: if self.last_event_id_buffer.is_empty() {
None
} else {
Some(std::mem::take(&mut self.last_event_id_buffer))
},
event: if self.event_type_buffer.is_empty() {
None
} else {
Some(std::mem::take(&mut self.event_type_buffer))
},
data,
comments: if self.comments_buffer.is_empty() {
None
} else {
Some(std::mem::take(&mut self.comments_buffer))
},
};
// No need to clear the buffers; they've been replaced with empty values
Ok(Some(message))
}
}
}
/// Creates a stream of `Message` instances from a text stream of SSE events.
pub fn create_message_stream(
text: &str,
) -> Pin<Box<dyn Stream<Item = Result<Message, SseCodecError>> + Send + Sync>> {
let cursor = Cursor::new(text.to_string());
let framed = FramedRead::new(cursor, SseLineCodec::new());
Box::pin(framed)
}
#[cfg(test)]
mod tests {
use std::io::Cursor;
use futures::stream::StreamExt;
use tokio_util::codec::FramedRead;
use super::*;
#[derive(Deserialize, Debug, PartialEq)]
struct TestData {
message: String,
}
#[tokio::test]
async fn test_message_with_all_fields() {
let sample_data = r#"id: 123
event: test
data: {"message": "Hello World"}
: This is a comment
"#;
let cursor = Cursor::new(sample_data);
let mut framed = FramedRead::new(cursor, SseLineCodec::new());
if let Some(Ok(message)) = framed.next().await {
assert_eq!(message.id, Some("123".to_string()));
assert_eq!(message.event, Some("test".to_string()));
assert_eq!(
message.comments,
Some(vec!["This is a comment".to_string()])
);
let data: TestData = message.decode_data().unwrap();
assert_eq!(data.message, "Hello World".to_string());
} else {
panic!("Expected a message");
}
}
#[tokio::test]
async fn test_message_with_only_data() {
let sample_data = r#"data: {"message": "Just some data"}
"#;
let cursor = Cursor::new(sample_data);
let mut framed = FramedRead::new(cursor, SseLineCodec::new());
if let Some(Ok(message)) = framed.next().await {
assert!(message.id.is_none());
assert!(message.event.is_none());
assert!(message.comments.is_none());
let data: TestData = message.decode_data().unwrap();
assert_eq!(data.message, "Just some data".to_string());
} else {
panic!("Expected a message");
}
}
#[tokio::test]
async fn test_message_with_only_comment() {
let sample_data = r#": This is a comment
"#;
let cursor = Cursor::new(sample_data);
let mut framed = FramedRead::new(cursor, SseLineCodec::new());
if let Some(Ok(message)) = framed.next().await {
assert!(message.id.is_none());
assert!(message.event.is_none());
assert!(message.data.is_none());
assert_eq!(
message.comments,
Some(vec!["This is a comment".to_string()])
);
} else {
panic!("Expected a message");
}
}
#[tokio::test]
async fn test_message_with_multiple_comments() {
let sample_data = r#": First comment
: Second comment
"#;
let cursor = Cursor::new(sample_data);
let mut framed = FramedRead::new(cursor, SseLineCodec::new());
if let Some(Ok(message)) = framed.next().await {
assert!(message.id.is_none());
assert!(message.event.is_none());
assert!(message.data.is_none());
assert_eq!(
message.comments,
Some(vec![
"First comment".to_string(),
"Second comment".to_string()
])
);
} else {
panic!("Expected a message");
}
}
#[tokio::test]
async fn test_message_with_partial_fields() {
let sample_data = r#"id: 456
data: {"message": "Partial data"}
"#;
let cursor = Cursor::new(sample_data);
let mut framed = FramedRead::new(cursor, SseLineCodec::new());
if let Some(Ok(message)) = framed.next().await {
assert_eq!(message.id, Some("456".to_string()));
assert!(message.event.is_none());
assert!(message.comments.is_none());
let data: TestData = message.decode_data().unwrap();
assert_eq!(data.message, "Partial data".to_string());
} else {
panic!("Expected a message");
}
}
#[tokio::test]
async fn test_message_with_invalid_json_data() {
let sample_data = r#"data: {"message": "Invalid JSON
"#;
let cursor = Cursor::new(sample_data);
let mut framed = FramedRead::new(cursor, SseLineCodec::new());
if let Some(result) = framed.next().await {
match result {
Ok(message) => {
// got a message, but it has invalid json
let data = message.decode_data::<TestData>();
assert!(data.is_err(), "Expected an error; got {:?}", data);
}
_ => panic!("Expected a message"),
}
} else {
panic!("Expected an error");
}
}
#[tokio::test]
async fn test_message_with_missing_data_field() {
let sample_data = r#"id: 789
event: test_event
"#;
let cursor = Cursor::new(sample_data);
let mut framed = FramedRead::new(cursor, SseLineCodec::new());
if let Some(Ok(message)) = framed.next().await {
assert_eq!(message.id, Some("789".to_string()));
assert_eq!(message.event, Some("test_event".to_string()));
assert!(message.data.is_none());
assert!(message.comments.is_none());
} else {
panic!("Expected a message");
}
}
#[tokio::test]
async fn test_message_with_empty_data_field() {
let sample_data = r#"data:
"#;
let cursor = Cursor::new(sample_data);
let mut framed = FramedRead::new(cursor, SseLineCodec::new());
if let Some(result) = framed.next().await {
match result {
Ok(_) => {
panic!("Expected no message");
}
Err(e) => panic!("Unexpected error: {}", e),
}
} else {
// no message is emitted
}
}
#[tokio::test]
async fn test_message_with_multiple_data_lines() {
let sample_data = r#"data: {"message": "Line1"}
data: {"message": "Line2"}
"#;
let cursor = Cursor::new(sample_data);
let mut framed = FramedRead::new(cursor, SseLineCodec::new());
if let Some(result) = framed.next().await {
match result {
Ok(message) => {
// got a message with data, but the data is junk
let data = message.decode_data::<TestData>();
assert!(data.is_err(), "Expected an error; got {:?}", data);
}
_ => panic!("Expected a message"),
}
} else {
panic!("Expected an error");
}
}
#[tokio::test]
async fn test_message_with_unrecognized_field() {
let sample_data = r#"unknown: value
data: {"message": "Hello"}
"#;
let cursor = Cursor::new(sample_data);
let mut framed = FramedRead::new(cursor, SseLineCodec::new());
if let Some(Ok(message)) = framed.next().await {
// Unrecognized fields are ignored
assert!(message.id.is_none());
assert!(message.event.is_none());
assert!(message.comments.is_none());
let data: TestData = message.decode_data().unwrap();
assert_eq!(data.message, "Hello".to_string());
} else {
panic!("Expected a message");
}
}
// data recorded on 2024-09-30 from
// + curl https://integrate.api.nvidia.com/v1/chat/completions -H 'Content-Type: application/json' \
// -H 'Authorization: Bearer nvapi-<redacted>' -d '{
// "model": "mistralai/mixtral-8x22b-instruct-v0.1",
// "messages": [{"role":"user","content":"Write a limerick about the wonders of GPU computing."}],
// "temperature": 0.5,
// "top_p": 1,
// "max_tokens": 64,
// "stream": true
// }'
const SAMPLE_CHAT_DATA: &str = r#"
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":"assistant","content":null},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"A"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" GPU"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" so"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" swift"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" and"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" so"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" clever"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":","},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"\n"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"In"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" comput"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"ations"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" it"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"'"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"s"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" quite"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" the"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" ende"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"avor"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"."},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"\n"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"With"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" its"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" thousands"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" of"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" co"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"res"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":","},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"\n"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"On"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" complex"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" tasks"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" it"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" ro"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"ars"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":","},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"\n"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"S"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"olving"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" problems"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" like"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" never"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":","},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" forever"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"!"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":""},"logprobs":null,"finish_reason":"stop","stop_reason":null}]}
data: [DONE]
"#;
#[tokio::test]
async fn test_openai_chat_stream() {
use crate::protocols::openai::chat_completions::ChatCompletionResponseDelta;
// let cursor = Cursor::new(SAMPLE_CHAT_DATA);
// let mut framed = FramedRead::new(cursor, SseLineCodec::new());
let mut stream = create_message_stream(SAMPLE_CHAT_DATA);
let mut counter = 0;
loop {
match stream.next().await {
Some(Ok(message)) => {
let delta: ChatCompletionResponseDelta =
serde_json::from_str(&message.data.unwrap()).unwrap();
counter += 1;
println!("counter: {}", counter);
println!("delta: {:?}", delta);
}
Some(Err(e)) => {
panic!("Error: {:?}", e);
}
None => {
break;
}
}
}
assert_eq!(counter, 47);
}
#[test]
fn test_successful_conversion() {
let message = Message {
id: Some("123".to_string()),
event: Some("update".to_string()),
data: Some(r#"{"message": "Hello World"}"#.to_string()),
comments: Some(vec!["Some comment".to_string()]),
};
let annotated: Annotated<TestData> = message.try_into().unwrap();
assert_eq!(annotated.id, Some("123".to_string()));
assert_eq!(annotated.event, Some("update".to_string()));
assert_eq!(annotated.comment, Some(vec!["Some comment".to_string()]));
assert_eq!(
annotated.data,
Some(TestData {
message: "Hello World".to_string()
})
);
}
#[test]
fn test_error_event_with_comments() {
let message = Message {
id: Some("456".to_string()),
event: Some("error".to_string()),
data: Some("Error data".to_string()),
comments: Some(vec!["An error occurred".to_string()]),
};
let result: Result<Annotated<TestData>, _> = message.try_into();
assert!(result.is_err());
assert_eq!(result.unwrap_err(), "An error occurred".to_string());
}
#[test]
fn test_error_event_without_comments() {
let message = Message {
id: Some("789".to_string()),
event: Some("error".to_string()),
data: Some("Error data".to_string()),
comments: None,
};
let result: Result<Annotated<TestData>, _> = message.try_into();
assert!(result.is_err());
}
#[test]
fn test_invalid_json_data() {
let message = Message {
id: None,
event: Some("update".to_string()),
data: Some("Invalid JSON".to_string()),
comments: None,
};
let result: Result<Annotated<TestData>, _> = message.try_into();
assert!(result.is_err());
}
#[test]
fn test_missing_data_field() {
let message = Message {
id: None,
event: Some("update".to_string()),
data: None,
comments: None,
};
let result: Result<Annotated<TestData>, _> = message.try_into();
assert!(result.is_ok());
let annotated = result.unwrap();
assert!(annotated.data.is_none());
assert_eq!(annotated.event, Some("update".to_string()));
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Engine Protocols
//! ================
//!
//! This module contains the protocols in public API for the LLM Engine and AsyncEngine facades.
//!
//! The core components are the `CompletionRequest` and `StreamingCompletionResponse` objects.
//!
//! The `StreamingCompletionResponse` objects are the outputs of the LLM Engine; however, we
//! need some additional information to propagate intermediate results for improved observability.
//! The metadata is transferred via the other arms of the `StreamingResponse` enum.
//!
use anyhow::Result;
use derive_builder::Builder;
use serde::ser::SerializeStruct;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::collections::HashMap;
use std::time::SystemTime;
use super::TokenIdType;
pub mod kv_routing;
pub mod llm_backend;
pub mod postprocessor;
pub mod preprocessor;
/// SamplingOptionsProvider is a trait that allows the caller to extract the sampling options from
/// the object that implements it. This will mutate the object.
pub trait SamplingOptionsProvider {
fn extract_sampling_options(&self) -> Result<SamplingOptions>;
}
pub trait StopConditionsProvider {
fn extract_stop_conditions(&self) -> Result<StopConditions>;
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
pub enum FinishReason {
#[serde(rename = "eos")]
EoS,
#[serde(rename = "length")]
Length,
#[serde(rename = "stop")]
Stop,
#[serde(rename = "error")]
Error(String),
#[serde(rename = "cancelled")]
Cancelled,
}
/// LLM Inference Engines can accept a variety of input types. Not all Engines will support all
/// input types. For example, the trtllm::AsyncEngine only supports `PromptType::Tokens` as an
/// input type. The higher-level `Backend` class is a general wrapper around Engines that will
/// enable many of the input options that require pre/postprocessing.
#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
pub enum PromptType {
/// If allowed, this input type allowed the caller to pass a list of token_ids directly to the
/// inference engine. This is an advanced feature that requires the caller to handle all of the
/// necessary prompt formatting and tokenization.
#[serde(rename = "token_ids")]
TokenIds(Vec<TokenIdType>),
/// If allowed, the raw text will be tokenized and converted to token_ids without any additional
/// preprocessing. This is an advanced features that requires the caller to correctly format the
/// prompt as defined by the model.
#[serde(rename = "raw")]
Raw(String),
/// If allowed, the `CompletionContext` will be preprocessed server-side. If the `Model` trait
/// `requires_prompt_template` returns true then the `CompletionContext` will be used to
/// to render the formatted prompt from the template. `Completion` is the preferred `PromptType`
/// for single turn completions.
#[serde(rename = "completion")]
Completion(CompletionContext),
/// If allowed, the `ChatContext` will be preprocessed server-side. Most chat models will have
/// a predefined prompt format/structure. If the `Model` trait `requires_prompt_template` returns
/// true then the `ChatContext` will be used to to render the formatted prompt from the template.
/// `ChatCompletion` is the preferred `PromptType` for multi-turn completions.
#[serde(rename = "chat_completion")]
ChatCompletion(ChatContext),
/// If allowed, then `Model::requires_prompt_template()` must also return true. The `serde_json::Value`
/// will be passed directly the prompt template. This allows for a complete generic data model and
/// prompt template to be passed to be defined and used by the server.
#[serde(rename = "custom_json")]
CustomJson(serde_json::Value),
}
/// TensorRT LLM does not perform preprocessing or postprocessing. The input_ids / token_ids
/// are expected to be preprocessed by the client. The client is responsible for constructing
/// the model specific prompt template and applying the tokenizer.
///
/// TensorRT LLM will perform some server side postprocessing to ensure that generation is
/// efficiently stopped. See `StopConditions` below.
#[derive(Serialize, Deserialize, Debug, Clone, Builder)]
pub struct CompletionRequest {
/// Type of prompt
pub prompt: PromptType,
/// StopConditions are conditions that the inference engine will use to stop generation.
pub stop_conditions: StopConditions,
/// SamplingOptions directs the inference engine to use sampling instead of greedy decoding.
/// More documentation on how and on the order in which sampling options are applied
/// are needed.
pub sampling_options: SamplingOptions,
/// The computed checksum of the Model Deployment Card (MDC).
#[builder(default)]
pub mdc_sum: Option<String>,
/// User requested annotations for the request
#[builder(default)]
pub annotations: Option<Vec<String>>,
}
impl CompletionRequest {
pub fn builder() -> CompletionRequestBuilder {
CompletionRequestBuilder::default()
}
}
#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
/// Defines the prompt template and system prompt for a completion request.
/// If the model does not support prompt templates, the system_prompt will be ignored.
pub struct CompletionContext {
/// Prompt sent by the user
pub prompt: String,
/// Optional system_prompt for models that support prompt templates with system_prompts.
pub system_prompt: Option<String>,
}
/// ChatTurn is a struct that contains the user and assistant messages in a chat.
#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
pub struct ChatTurn {
/// The user message
pub user: String,
/// The assistant response
pub assistant: String,
}
/// ChatContext is a struct that contains the role and context of a chat message
/// along with a flattened CompletionContext.
#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
pub struct ChatContext {
/// CompletionContext for this chat turn
#[serde(flatten)]
pub completion: CompletionContext,
/// The history/context of the user and assistant messages in the chat context
pub context: Vec<ChatTurn>,
}
/// TensorRT LLM server-side stop conditions. These options allow for the server to evaluate
/// the generated sequence and stop generation if the sequence meets a stop condition.
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct StopConditions {
/// The maximum number of tokens to generate
pub max_tokens: Option<u32>,
/// List of strings that stop the generation when they are generated.
/// The returned output will not contain the stop strings.
pub stop: Option<Vec<String>>,
/// List of tokens that stop the generation when they are
/// generated. The returned output will NOT contain the stop tokens.
pub stop_token_ids_hidden: Option<Vec<TokenIdType>>,
/// The minimum number of tokens to generate
/// To ignore_eos, set min_tokens to max_tokens
pub min_tokens: Option<u32>,
/// Whether to ignore the EOS token and continue generating
/// tokens after the EOS token is generated.
// TODO(ignore_eos) - improve this my masking the EOS token with logit bias
pub ignore_eos: Option<bool>,
}
impl StopConditions {
pub fn apply_ignore_eos(&mut self) {
if self.ignore_eos.unwrap_or(false) {
self.min_tokens = self.max_tokens;
self.stop = None;
self.stop_token_ids_hidden = None;
}
}
}
/// Temperature range for sampling.
pub const TEMPERATURE_RANGE: (f32, f32) = (0.0, 1.0);
/// Top P range for sampling.
pub const TOP_P_RANGE: (f32, f32) = (0.0, 1.0);
/// Frequency Penalty range for sampling.
pub const FREQUENCY_PENALTY_RANGE: (f32, f32) = (-1.0, 1.0);
/// Collection of options that control the sampling behavior of the inference engine.
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct SamplingOptions {
/// Number of output sequences to return for the given prompt
pub n: Option<i32>,
/// Number of output sequences that are generated from the prompt.
/// From these `best_of` sequences, the top `n` sequences are returned.
/// `best_of` must be greater than or equal to `n`. This is treated as
/// the beam width when `use_beam_search` is True. By default, `best_of`
/// is set to `n`.
pub best_of: Option<i32>,
/// Float that penalizes new tokens based on whether they
/// appear in the generated text so far. Values > 0 encourage the model
/// to use new tokens, while values < 0 encourage the model to repeat
/// tokens.
pub presence_penalty: Option<f32>,
/// Float that penalizes new tokens based on their
/// frequency in the generated text so far. Values > 0 encourage the
/// model to use new tokens, while values < 0 encourage the model to
/// repeat tokens.
pub frequency_penalty: Option<f32>,
/// Float that penalizes new tokens based on whether
/// they appear in the prompt and the generated text so far. Values > 1
/// encourage the model to use new tokens, while values < 1 encourage
/// the model to repeat tokens.
pub repetition_penalty: Option<f32>,
/// Float that controls the randomness of the sampling. Lower
/// values make the model more deterministic, while higher values make
/// the model more random. Zero means greedy sampling.
pub temperature: Option<f32>,
/// Float that controls the cumulative probability of the top tokens
/// to consider. Must be in (0, 1]. Set to 1 to consider all tokens.
pub top_p: Option<f32>,
/// Integer that controls the number of top tokens to consider. Set
/// to -1 to consider all tokens.
pub top_k: Option<i32>,
/// Float that represents the minimum probability for a token to be
/// considered, relative to the probability of the most likely token.
/// Must be in [0, 1]. Set to 0 to disable this.
pub min_p: Option<f32>,
/// Whether to use beam search instead of sampling.
pub use_beam_search: Option<bool>,
/// Float that penalizes sequences based on their length.
/// Used in beam search.
pub length_penalty: Option<f32>,
/// The seed to use when sampling
pub seed: Option<i64>,
}
impl SamplingOptions {
pub fn force_greedy(&mut self) {
self.presence_penalty = None;
self.frequency_penalty = None;
self.repetition_penalty = None;
self.temperature = None;
self.top_p = None;
self.top_k = None;
self.min_p = None;
}
}
/// Collection of options that control what information the inference engine returns in the response.
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct OutputOptions {
/// Number of log probabilities to return per output token.
/// Note that the implementation follows the OpenAI API: The return
/// result includes the log probabilities on the `logprobs` most likely
/// tokens, as well the chosen tokens. The API will always return the
/// log probability of the sampled token, so there may be up to
/// `logprobs+1` elements in the response
pub logprobs: Option<u32>,
/// Number of log probabilities to return per prompt token.
pub prompt_logprobs: Option<u32>,
/// Whether to skip special tokens in the output.
/// spaces_between_special_tokens: Whether to add spaces between special
/// tokens in the output. Defaults to True.
pub skip_special_tokens: Option<bool>,
/// If true, the Context object will contain the prompt that was pass to
/// the tokenizer. This is useful for inspecting the behavior of prompt
/// templates that are applied during the backend preprocessing.
pub formatted_prompt: Option<bool>,
}
// Struct for log probability information
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct ChatCompletionLogprobs {
/// A list of message content tokens with log probability information.
#[serde(skip_serializing_if = "Option::is_none")]
pub content: Option<Vec<ChatCompletionTokenLogprob>>,
/// A list of message refusal tokens with log probability information.
#[serde(skip_serializing_if = "Option::is_none")]
pub refusal: Option<Vec<ChatCompletionTokenLogprob>>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct ChatCompletionTokenLogprob {
/// The token.
pub token: String,
/// The log probability of this token, if it is within the top 20 most likely tokens.
/// Otherwise, the value `-9999.0` signifies that the token is very unlikely.
pub logprob: f64,
/// A list of integers representing the UTF-8 bytes representation of the token.
/// Useful in instances where characters are represented by multiple tokens and their
/// byte representations must be combined to generate the correct text representation.
/// Can be `None` if there is no bytes representation for the token.
pub bytes: Option<Vec<u8>>,
/// List of the most likely tokens and their log probability, at this token position.
/// In rare cases, there may be fewer than the requested number of `top_logprobs` returned.
pub top_logprobs: Vec<TopLogprob>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct TopLogprob {
/// The token.
pub token: String,
/// The log probability of this token.
pub logprob: f64,
/// A list of integers representing the UTF-8 bytes representation of the token.
/// Can be `None` if there is no bytes representation for the token.
pub bytes: Option<Vec<u8>>,
}
// /// UserData is a struct that contains user-defined data that can be passed to the inference engine.
// /// This information will be use to annotate the distributed traces for improved observability.
// #[derive(Serialize, Deserialize, Debug, Clone, Default)]
// pub struct UserData {
// /// Apply server-side prompt template to the request
// pub request_uuid: Option<uuid::Uuid>,
// }
/// StreamingResponse is the primary response object for the LLM Engine. The response stream
/// can emit three different types of messages. The Initialize and Finalize messages are optional
/// and primarily used over disaggreated transports to move states from the server to the client.
#[derive(Serialize, Deserialize, Debug)]
pub enum StreamingResponse {
/// Initialize transports a Prologue object which communication the LLM Engine Context
Initialize(Option<Prologue>),
/// Step is the primary data in the response stream. It contains the StreamingCompletionResponse
Step(Box<StreamingCompletionResponse>),
/// Finalize is an optional final message in the response stream. It contains the Epilogue object which
/// is used to communicate extra information about the completion and the engine statistics.
Finalize(Option<Epilogue>),
}
// TODO(ryan) - this should be part of the internal api as it is not deserializble
// the public API should drop the Option<Arc<Stats>> in favor of Option<Stats>
// the two variants both serialize to the same json; however, the internal version
// can not be deserialized directly.
// we use the internal one on the server side to avoid the cost of cloning the Stats
// object; however, client side, we should always fully materialize the Stats object.
//
// TODO(ryan) - update this object to use an enum where we have the current definition be the
// StepResponse arm; then we will add the following arms:
// - Initialize(Prologue)
// - Step()
// - Finalize(Epilogue)
/// This is the first message that will be emitted by an Engine Response Stream
/// It indicates that the request has been preprocessed and queued for execution on the backend.
#[derive(Serialize, Deserialize, Debug)]
pub struct Prologue {
/// If the request was preprocessed with a prompt template, this will contain the formatted prompt
pub formatted_prompt: Option<String>,
/// If the request did not contain TokenIds, this will contain the token_ids that were generated
/// from tokenizing the prompt.
pub input_token_ids: Option<Vec<TokenIdType>>,
}
/// This is the final message that will be emitted by a Engine Response Stream when it
/// finishes without error. In some cases, the engine may emit an error which will indicate
/// the end of the steam. Another case in which an Finalize(Epilogue) will not be emitted is
/// if the response handler has stalled and too many responses
#[derive(Serialize, Deserialize, Debug)]
pub struct Epilogue {}
#[derive(Debug)]
pub struct StreamingCompletionResponse {
pub delta: Delta,
pub logprobs: Option<ChatCompletionLogprobs>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub enum StreamState {
Active,
Finished(FinishReason),
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "snake_case")]
pub enum Logits {
All(Vec<f32>),
Sparse(Vec<(u32, f32)>),
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "snake_case")]
pub enum LogProbs {
Normalized(Logits),
Raw(Logits),
}
/// At each SequencePosition we hold position specific data
pub struct SequencePositionData {
pub token_id: TokenIdType,
/// The log probability of the token
pub logprobs: Option<LogProbs>,
}
// todo(ryan) - we need to create a DeltaBuilder which is a mutable object that can be passed
// around from the low-level compute engine to the high-level api. The DeltaBuilder will allow
// us to construct the Delta object at multiple layers in the streaming response path.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct Delta {
pub is_complete: bool,
pub finish_reason: Option<FinishReason>,
// new token_ids
pub token_ids: Option<Vec<u32>>,
// tokens
pub tokens: Option<Vec<String>>,
// decoded text
pub text: Option<String>,
// current sequence length
// when stream, we expect this to increase by 1 on each response
pub sequence_length: Option<usize>,
// if the number of slots for a given request is greater than 1
// this indicates the index of the slot for the response
pub index: Option<usize>,
/// cumulative log probabilities
pub cum_log_probs: Option<f64>,
/// error message from engine
/// if this is set, is_complete should also be true
pub err_msg: Option<String>,
/// usage info
pub usage: Option<Usage>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct Usage {
pub input_tokens_count: usize,
pub output_tokens_count: usize,
}
// todo(ryan) - we need to update this object to make it more generic
// we need to define a set of generic stats traits that allow those stats to be None
// then back them by a concrete implementation like a TrtllmStats object
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct Stats {
/// Time since the last Epoch/Forward Pass in microseconds (us).
/// This is measured and recorded by the Response Router rather then the
/// Inference Engine. Note, when evaluating the responses, if the this
/// values is greater then the stream's measured value, then there was a gap
/// between forward passes. In normal operation, the value of this field should
/// be less than the recorded value on the response stream.
pub time_since_last_forward_pass_us: Option<u64>,
pub request_active_count: u32,
pub request_context_count: u32,
pub request_generation_count: u32,
pub request_scheduled_count: u32,
pub request_max_count: u32,
pub kv_free_cache_blocks: u64,
pub kv_max_cache_blocks: u64,
pub kv_used_cache_blocks: u64,
pub kv_tokens_per_cache_block: u64,
pub runtime_cpu_memory_usage: u64,
pub runtime_gpu_memory_usage: u64,
pub runtime_pinned_memory_usage: u64,
pub iteration_counter: u64,
pub microbatch_id: u64,
pub total_context_tokens: u32,
pub timestamp: String,
}
impl Serialize for StreamingCompletionResponse {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut state = serializer.serialize_struct("StreamingCompletionResponse", 2)?;
// Serialize `delta` field
state.serialize_field("delta", &self.delta)?;
state.end()
}
}
impl<'de> Deserialize<'de> for StreamingCompletionResponse {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
// Create a temporary struct for deserialization
#[derive(Deserialize)]
struct TempResponse {
delta: Delta,
logprobs: Option<ChatCompletionLogprobs>,
}
let TempResponse { delta, logprobs } = TempResponse::deserialize(deserializer)?;
Ok(StreamingCompletionResponse { delta, logprobs })
}
}
#[derive(Serialize, Deserialize, Debug)]
pub struct ScatterData<T> {
pub x: Vec<T>,
pub y: Vec<T>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Trace {
pub time_to_first_token: u64,
pub token_to_token: Vec<u64>,
pub start: SystemTime,
pub complete: SystemTime,
pub initial_tokens: u32,
pub max_tokens: u32,
pub t2ft_iteration_count: u64,
pub t2t_iteration_count: Vec<u64>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct PerformanceModel {
// linear regression parameters fitting t2ft vs. initial tokens
pub t2ft_intercept: f64,
pub t2ft_slope: f64,
// linear regression parameters fitting t2tl vs. initial tokens
pub t2tl_intercept: f64,
pub t2tl_slope: f64,
// r2 values from the regression
pub t2ft_fit_r2: f64,
pub t2tl_fit_r2: f64,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct CalibrationResults {
pub effective_flops: f64,
pub effective_memory_bandwidth: f64,
pub max_q: u32,
pub performance_model: PerformanceModel,
pub traces: Vec<Trace>,
pub t2ft_scatter_data: ScatterData<f64>,
pub t2tl_scatter_data: ScatterData<f64>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct LoadgenResults {
pub stats_by_iteration: HashMap<u64, Stats>,
pub traces: Vec<Trace>,
}
impl CompletionContext {
/// Create a new CompletionContext
pub fn new(prompt: String, system_prompt: Option<String>) -> Self {
Self {
prompt,
system_prompt,
}
}
/// Create a new CompletionContext with only a prompt
pub fn from_prompt(prompt: String) -> Self {
Self {
prompt,
system_prompt: None,
}
}
/// Create a new CompletionContext with a prompt and system prompt
pub fn with_system_prompt(prompt: String, system_prompt: String) -> Self {
Self {
prompt,
system_prompt: Some(system_prompt),
}
}
}
// todo(ryan) - create a builder for chat context
impl From<CompletionContext> for PromptType {
fn from(context: CompletionContext) -> Self {
PromptType::Completion(context)
}
}
#[cfg(test)]
mod tests {
use serde_json;
use super::*;
#[test]
fn test_completion_context_new() {
let prompt = "Hello, world!".to_string();
let system_prompt = Some("This is a system prompt.".to_string());
let context = CompletionContext::new(prompt.clone(), system_prompt.clone());
assert_eq!(context.prompt, prompt);
assert_eq!(context.system_prompt, system_prompt);
}
#[test]
fn test_completion_context_from_prompt() {
let prompt = "Hello, world!".to_string();
let context = CompletionContext::from_prompt(prompt.clone());
assert_eq!(context.prompt, prompt);
assert_eq!(context.system_prompt, None);
}
#[test]
fn test_completion_context_with_system_prompt() {
let prompt = "Hello, world!".to_string();
let system_prompt = "This is a system prompt.".to_string();
let context = CompletionContext::with_system_prompt(prompt.clone(), system_prompt.clone());
assert_eq!(context.prompt, prompt);
assert_eq!(context.system_prompt, Some(system_prompt));
}
#[test]
fn test_completion_context_into_prompt_type() {
let prompt = "Hello, world!".to_string();
let system_prompt = "This is a system prompt.".to_string();
let context = CompletionContext::with_system_prompt(prompt.clone(), system_prompt.clone());
let prompt_type: PromptType = context.into();
if let PromptType::Completion(completion_context) = prompt_type {
assert_eq!(completion_context.prompt, prompt);
assert_eq!(completion_context.system_prompt, Some(system_prompt));
} else {
panic!("Expected a Completion variant");
}
}
// #[test]
// fn test_serialize_with_stats() {
// let response = StreamingCompletionResponse {
// delta: Delta {
// is_complete: true,
// finish_reason: Some(FinishReason::Length),
// token_ids: Some(vec![101, 102, 103]),
// tokens: Some(vec!["token1".to_string(), "token2".to_string()]),
// text: Some("example text".to_string()),
// sequence_length: Some(3),
// index: Some(0),
// cum_log_probs: Some(-0.5),
// err_msg: None,
// usage: None,
// },
// logprobs: None,
// };
// // Serialize the response
// let serialized = serde_json::to_string(&response).expect("Failed to serialize");
// // Expected JSON string (simplified)
// let expected = r#"{
// "delta": {
// "is_complete": true,
// "finish_reason": "length",
// "token_ids": [101, 102, 103],
// "tokens": ["token1", "token2"],
// "text": "example text",
// "sequence_length": 3,
// "index": 0,
// "cum_log_probs": -0.5,
// "err_msg": null,
// "usage": null
// },
// "stats": {
// "time_since_last_forward_pass_us": 1000,
// "request_active_count": 2,
// "request_context_count": 1,
// "request_generation_count": 3,
// "request_scheduled_count": 1,
// "request_max_count": 10,
// "kv_free_cache_blocks": 500,
// "kv_max_cache_blocks": 1000,
// "kv_used_cache_blocks": 500,
// "kv_tokens_per_cache_block": 10,
// "runtime_cpu_memory_usage": 5000,
// "runtime_gpu_memory_usage": 2000,
// "runtime_pinned_memory_usage": 1000,
// "iteration_counter": 5,
// "microbatch_id": 12345,
// "total_context_tokens": 256,
// "timestamp": "2024-01-01T00:00:00Z"
// }
// }"#;
// assert_eq!(
// serde_json::from_str::<serde_json::Value>(&serialized).unwrap(),
// serde_json::from_str::<serde_json::Value>(expected).unwrap()
// );
// }
#[test]
fn test_serialize_without_stats() {
let response = StreamingCompletionResponse {
delta: Delta {
is_complete: false,
finish_reason: None,
token_ids: None,
tokens: None,
text: None,
sequence_length: None,
index: None,
cum_log_probs: None,
err_msg: None,
usage: None,
},
logprobs: None,
};
// Serialize the response
let serialized = serde_json::to_string(&response).expect("Failed to serialize");
// Expected JSON string
let expected = r#"{
"delta": {
"is_complete": false,
"finish_reason": null,
"token_ids": null,
"tokens": null,
"text": null,
"sequence_length": null,
"index": null,
"cum_log_probs": null,
"err_msg": null,
"usage": null
}
}"#;
assert_eq!(
serde_json::from_str::<serde_json::Value>(&serialized).unwrap(),
serde_json::from_str::<serde_json::Value>(expected).unwrap()
);
}
// #[test]
// fn test_deserialize_with_stats() {
// let json_data = r#"{
// "delta": {
// "is_complete": true,
// "finish_reason": "length",
// "token_ids": [101, 102, 103],
// "tokens": ["token1", "token2"],
// "text": "example text",
// "sequence_length": 3,
// "index": 0,
// "cum_log_probs": -0.5,
// "err_msg": null,
// "usage": null
// },
// "stats": {
// "time_since_last_forward_pass_us": 1000,
// "request_active_count": 2,
// "request_context_count": 1,
// "request_generation_count": 3,
// "request_scheduled_count": 1,
// "request_max_count": 10,
// "kv_free_cache_blocks": 500,
// "kv_max_cache_blocks": 1000,
// "kv_used_cache_blocks": 500,
// "kv_tokens_per_cache_block": 10,
// "runtime_cpu_memory_usage": 5000,
// "runtime_gpu_memory_usage": 2000,
// "runtime_pinned_memory_usage": 1000,
// "iteration_counter": 5,
// "microbatch_id": 12345,
// "total_context_tokens": 256,
// "timestamp": "2024-01-01T00:00:00Z"
// }
// }"#;
// // Deserialize the JSON string
// let deserialized: StreamingCompletionResponse =
// serde_json::from_str(json_data).expect("Failed to deserialize");
// // Expected response object
// let expected = StreamingCompletionResponse {
// delta: Delta {
// is_complete: true,
// finish_reason: Some(FinishReason::Length),
// token_ids: Some(vec![101, 102, 103]),
// tokens: Some(vec!["token1".to_string(), "token2".to_string()]),
// text: Some("example text".to_string()),
// sequence_length: Some(3),
// index: Some(0),
// cum_log_probs: Some(-0.5),
// err_msg: None,
// usage: None,
// },
// logprobs: None,
// };
// // This is wieldy but we can no longer do assert_eq!(deserialized, expected);
// // because the struct no longer has the PartialEq trait
// assert_eq!(deserialized.delta.is_complete, expected.delta.is_complete);
// assert_eq!(
// deserialized.delta.finish_reason,
// expected.delta.finish_reason
// );
// assert_eq!(deserialized.delta.token_ids, expected.delta.token_ids);
// assert_eq!(deserialized.delta.tokens, expected.delta.tokens);
// assert_eq!(deserialized.delta.text, expected.delta.text);
// assert_eq!(
// deserialized.delta.sequence_length,
// expected.delta.sequence_length
// );
// assert_eq!(deserialized.delta.index, expected.delta.index);
// assert_eq!(
// deserialized.delta.cum_log_probs,
// expected.delta.cum_log_probs
// );
// assert_eq!(deserialized.delta.err_msg, expected.delta.err_msg);
// assert_eq!(deserialized.delta.usage, expected.delta.usage);
// assert_eq!(
// deserialized_stats.time_since_last_forward_pass_us,
// expected_stats.time_since_last_forward_pass_us
// );
// assert_eq!(
// deserialized_stats.request_active_count,
// expected_stats.request_active_count
// );
// assert_eq!(
// deserialized_stats.request_context_count,
// expected_stats.request_context_count
// );
// assert_eq!(
// deserialized_stats.request_generation_count,
// expected_stats.request_generation_count
// );
// assert_eq!(
// deserialized_stats.request_scheduled_count,
// expected_stats.request_scheduled_count
// );
// assert_eq!(
// deserialized_stats.request_max_count,
// expected_stats.request_max_count
// );
// assert_eq!(
// deserialized_stats.kv_free_cache_blocks,
// expected_stats.kv_free_cache_blocks
// );
// assert_eq!(
// deserialized_stats.kv_max_cache_blocks,
// expected_stats.kv_max_cache_blocks
// );
// assert_eq!(
// deserialized_stats.kv_used_cache_blocks,
// expected_stats.kv_used_cache_blocks
// );
// assert_eq!(
// deserialized_stats.kv_tokens_per_cache_block,
// expected_stats.kv_tokens_per_cache_block
// );
// assert_eq!(
// deserialized_stats.runtime_cpu_memory_usage,
// expected_stats.runtime_cpu_memory_usage
// );
// assert_eq!(
// deserialized_stats.runtime_gpu_memory_usage,
// expected_stats.runtime_gpu_memory_usage
// );
// assert_eq!(
// deserialized_stats.runtime_pinned_memory_usage,
// expected_stats.runtime_pinned_memory_usage
// );
// assert_eq!(
// deserialized_stats.iteration_counter,
// expected_stats.iteration_counter
// );
// assert_eq!(
// deserialized_stats.microbatch_id,
// expected_stats.microbatch_id
// );
// assert_eq!(
// deserialized_stats.total_context_tokens,
// expected_stats.total_context_tokens
// );
// assert_eq!(deserialized_stats.timestamp, expected_stats.timestamp);
// }
#[test]
fn test_deserialize_without_stats() {
let json_data = r#"{
"delta": {
"is_complete": false,
"finish_reason": null,
"token_ids": null,
"tokens": null,
"text": null,
"sequence_length": null,
"index": null,
"cum_log_probs": null,
"err_msg": null,
"usage": null
}
}"#;
// Deserialize the JSON string
let deserialized: StreamingCompletionResponse =
serde_json::from_str(json_data).expect("Failed to deserialize");
// Expected response object
let expected = StreamingCompletionResponse {
delta: Delta {
is_complete: false,
finish_reason: None,
token_ids: None,
tokens: None,
text: None,
sequence_length: None,
index: None,
cum_log_probs: None,
err_msg: None,
usage: None,
},
logprobs: None,
};
// This is wieldy but we can no longer do assert_eq!(deserialized, expected);
// because the struct no longer has the PartialEq trait
assert_eq!(deserialized.delta.is_complete, expected.delta.is_complete);
assert_eq!(
deserialized.delta.finish_reason,
expected.delta.finish_reason
);
assert_eq!(deserialized.delta.token_ids, expected.delta.token_ids);
assert_eq!(deserialized.delta.tokens, expected.delta.tokens);
assert_eq!(deserialized.delta.text, expected.delta.text);
assert_eq!(
deserialized.delta.sequence_length,
expected.delta.sequence_length
);
assert_eq!(deserialized.delta.index, expected.delta.index);
assert_eq!(
deserialized.delta.cum_log_probs,
expected.delta.cum_log_probs
);
assert_eq!(deserialized.delta.err_msg, expected.delta.err_msg);
assert_eq!(deserialized.delta.usage, expected.delta.usage);
}
#[test]
fn test_serialize_delta_and_none_stats() {
let response = StreamingCompletionResponse {
delta: Delta {
is_complete: true,
finish_reason: Some(FinishReason::Length),
token_ids: Some(vec![101, 102, 103]),
tokens: Some(vec!["token1".to_string(), "token2".to_string()]),
text: Some("example text".to_string()),
sequence_length: Some(3),
index: Some(0),
cum_log_probs: Some(-0.5),
err_msg: None,
usage: None,
},
logprobs: None,
};
// Serialize the response
let serialized = serde_json::to_string(&response).expect("Failed to serialize");
// Expected JSON string where stats is null
let expected_json = r#"{
"delta": {
"is_complete": true,
"finish_reason": "length",
"token_ids": [101, 102, 103],
"tokens": ["token1", "token2"],
"text": "example text",
"sequence_length": 3,
"index": 0,
"cum_log_probs": -0.5,
"err_msg": null,
"usage": null
}
}"#;
// Parse both the serialized response and the expected JSON as serde_json::Value for easy comparison
assert_eq!(
serde_json::from_str::<serde_json::Value>(&serialized).unwrap(),
serde_json::from_str::<serde_json::Value>(expected_json).unwrap()
);
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ForwardPassMetrics {
pub request_active_slots: u64,
pub request_total_slots: u64,
pub kv_active_blocks: u64,
pub kv_total_blocks: u64,
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use serde::{Deserialize, Serialize};
use crate::protocols::TokenIdType;
pub type TokenType = Option<String>;
pub type LogProbs = Vec<f64>;
pub use super::preprocessor::PreprocessedRequest as BackendInput;
pub use super::FinishReason;
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct BackendOutput {
/// New token_ids generated from the LLM Engine
pub token_ids: Vec<TokenIdType>,
/// Unlike [`LLMEngineOutput::tokens`], this is a vector of tokens, not an optional.
/// The size of this vector should be the same as the size of `token_ids`.
pub tokens: Vec<TokenType>,
/// Decoded text from the list tokens.
pub text: Option<String>,
/// Optional cumulative log probabilities
pub cum_log_probs: Option<f64>,
/// Optional log probabilities
pub log_probs: Option<LogProbs>,
// TODO: Enrich this with more information as can apply our first-level postprocessing
// logic and return more detailed information
pub finish_reason: Option<FinishReason>,
/// Model Deployment Card checksum
pub mdcsum: String,
}
/// The LLM engine and backnd with manage it's own state, specifically translating how a
/// given request/slot is managed on that particular backend.
///
/// For nvLLM's purpose, it has a single tracable request_id as part of it's context that
/// has propaged through the service pipeline to the backend.
///
/// This is the minimal raw output from the LLM engine. The Backend may then apply multiple
/// levels of post-processing before the BackendOutput is returns
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct LLMEngineOutput {
// new token_ids
pub token_ids: Vec<TokenIdType>,
/// If the LLM Engine performs the detokenization, then this will have a Some of the detokenized
/// text/tokens. If this value is None, then the Backend is responsible for detokenization.
pub tokens: Option<Vec<TokenType>>,
// decoded text -
pub text: Option<String>,
/// cumulative log probabilities
pub cum_log_probs: Option<f64>,
/// Optional log probabilities
pub log_probs: Option<LogProbs>,
// TODO: Enrich this with more information as can apply our first-level postprocessing
// logic and return more detailed information
pub finish_reason: Option<FinishReason>,
}
impl LLMEngineOutput {
pub fn cancelled() -> Self {
LLMEngineOutput {
token_ids: vec![],
tokens: None,
text: None,
cum_log_probs: None,
log_probs: None,
finish_reason: Some(FinishReason::Cancelled),
}
}
pub fn stop() -> Self {
LLMEngineOutput {
token_ids: vec![],
tokens: None,
text: None,
cum_log_probs: None,
log_probs: None,
finish_reason: Some(FinishReason::Stop),
}
}
pub fn length() -> Self {
LLMEngineOutput {
token_ids: vec![],
tokens: None,
text: None,
cum_log_probs: None,
log_probs: None,
finish_reason: Some(FinishReason::Length),
}
}
pub fn error(err_msg: String) -> Self {
LLMEngineOutput {
token_ids: vec![],
tokens: None,
text: None,
cum_log_probs: None,
log_probs: None,
finish_reason: Some(FinishReason::Error(err_msg)),
}
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use serde::{Deserialize, Serialize};
use super::FinishReason;
use crate::protocols::TokenIdType;
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct PostprocessedResponse {
/// Model Deployment Card checksum
pub mdcsum: String,
// if the number of slots for a given request is greater than 1
// this indicates the index of the slot for the response
pub index: Option<usize>,
pub finish_reason: Option<FinishReason>,
// new token_ids
pub token_ids: Vec<TokenIdType>,
// tokens
pub tokens: Option<Vec<Option<String>>>,
// decoded text
pub text: Option<String>,
/// cumulative log probabilities
pub cum_log_probs: Option<f64>,
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use derive_builder::Builder;
use serde::{Deserialize, Serialize};
use super::{SamplingOptions, StopConditions};
use crate::protocols::TokenIdType;
/// [`PreprocessedRequest`] is the internal representation of an LLM request. The [`triton-llm-preprocessor`]
/// crate is responsible for converting request from the public APIs to this internal representation.
#[derive(Serialize, Deserialize, Debug, Clone, Builder)]
pub struct PreprocessedRequest {
/// Type of prompt
pub token_ids: Vec<TokenIdType>,
/// StopConditions are conditions that the inference engine will use to stop generation.
pub stop_conditions: StopConditions,
/// SamplingOptions directs the inference engine to use sampling instead of greedy decoding.
/// More documentation on how and on the order in which sampling options are applied
/// are needed.
pub sampling_options: SamplingOptions,
/// The EOS token ID(s) for the Model
/// Not every backend needs this, but those that do can find it here.
/// TODO - refactor this to a better location
#[builder(default)]
pub eos_token_ids: Vec<TokenIdType>,
/// The computed checksum of the Model Deployment Card (MDC).
#[builder(default)]
pub mdc_sum: Option<String>,
/// User requested annotations for the request
#[builder(default)]
pub annotations: Vec<String>,
}
impl PreprocessedRequest {
pub fn has_annotation(&self, annotation: &str) -> bool {
self.annotations.contains(&annotation.to_string())
}
}
impl PreprocessedRequest {
pub fn builder() -> PreprocessedRequestBuilder {
PreprocessedRequestBuilder::default()
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/// Forward openai_api_rs::v1 to triton_llm::protocols::openai::v1
pub mod chat_completions;
pub mod completions;
pub mod models;
pub mod nvext;
use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::{
collections::HashMap,
fmt::Display,
ops::{Add, Div, Mul, Sub},
};
use validator::ValidationError;
use super::{
common::{self, SamplingOptionsProvider, StopConditionsProvider},
ContentProvider,
};
/// Minimum allowed value for OpenAI's `temperature` sampling option
pub const MIN_TEMPERATURE: f32 = 0.0;
/// Maximum allowed value for OpenAI's `temperature` sampling option
pub const MAX_TEMPERATURE: f32 = 2.0;
/// Allowed range of values for OpenAI's `temperature`` sampling option
pub const TEMPERATURE_RANGE: (f32, f32) = (MIN_TEMPERATURE, MAX_TEMPERATURE);
/// Minimum allowed value for OpenAI's `top_p` sampling option
pub const MIN_TOP_P: f32 = 0.0;
/// Maximum allowed value for OpenAI's `top_p` sampling option
pub const MAX_TOP_P: f32 = 1.0;
/// Allowed range of values for OpenAI's `top_p` sampling option
pub const TOP_P_RANGE: (f32, f32) = (MIN_TOP_P, MAX_TOP_P);
/// Minimum allowed value for OpenAI's `frequency_penalty` sampling option
pub const MIN_FREQUENCY_PENALTY: f32 = -2.0;
/// Maximum allowed value for OpenAI's `frequency_penalty` sampling option
pub const MAX_FREQUENCY_PENALTY: f32 = 2.0;
/// Allowed range of values for OpenAI's `frequency_penalty` sampling option
pub const FREQUENCY_PENALTY_RANGE: (f32, f32) = (MIN_FREQUENCY_PENALTY, MAX_FREQUENCY_PENALTY);
/// Minimum allowed value for OpenAI's `presence_penalty` sampling option
pub const MIN_PRESENCE_PENALTY: f32 = -2.0;
/// Maximum allowed value for OpenAI's `presence_penalty` sampling option
pub const MAX_PRESENCE_PENALTY: f32 = 2.0;
/// Allowed range of values for OpenAI's `presence_penalty` sampling option
pub const PRESENCE_PENALTY_RANGE: (f32, f32) = (MIN_PRESENCE_PENALTY, MAX_PRESENCE_PENALTY);
/// Usage statistics for the completion request
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct CompletionUsage {
/// Number of tokens in the generated completion.
pub completion_tokens: i32,
/// Number of tokens in the prompt.
pub prompt_tokens: i32,
/// Total number of tokens used in the request (prompt + completion).
pub total_tokens: i32,
/// Breakdown of tokens used in a completion, optional.
#[serde(skip_serializing_if = "Option::is_none")]
pub completion_tokens_details: Option<CompletionTokensDetails>,
/// Breakdown of tokens used in the prompt, optional.
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_tokens_details: Option<PromptTokensDetails>,
}
// Struct for details on completion tokens
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct CompletionTokensDetails {
/// Audio input tokens generated by the model.
pub audio_tokens: Option<i32>,
/// Tokens generated by the model for reasoning.
pub reasoning_tokens: Option<i32>,
}
// Struct for details on prompt tokens
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct PromptTokensDetails {
/// Audio input tokens present in the prompt.
pub audio_tokens: Option<i32>,
/// Cached tokens present in the prompt.
pub cached_tokens: Option<i32>,
}
/// Represents a streaming response from the OpenAI API
/// The object is generalized on R, which is the type of the response.
/// For SSE streaming responses, the expected `data: ` field is always a JSON
/// object corresponding to `R`; however, the comments in the SSE stream `: `
/// may correspond to other types of information, such as performance metrics,
/// as represented by other arms of this enum.
///
/// This is part of the common API as both the client and service need to agree
/// on the format of the streaming responses.
#[derive(Serialize, Deserialize, Debug)]
pub enum StreamingDelta<R> {
/// Represents a response delta from the API
Delta(R),
Comment(String),
}
#[derive(Serialize, Deserialize, Debug)]
pub struct AnnotatedDelta<R> {
pub delta: R,
pub id: Option<String>,
pub event: Option<String>,
pub comment: Option<String>,
}
trait OpenAISamplingOptionsProvider {
fn get_temperature(&self) -> Option<f32>;
fn get_top_p(&self) -> Option<f32>;
fn get_frequency_penalty(&self) -> Option<f32>;
fn get_presence_penalty(&self) -> Option<f32>;
fn nvext(&self) -> Option<&nvext::NvExt>;
}
trait OpenAIStopConditionsProvider {
fn get_max_tokens(&self) -> Option<i32>;
fn get_min_tokens(&self) -> Option<i32>;
fn get_stop(&self) -> Option<Vec<String>>;
fn nvext(&self) -> Option<&nvext::NvExt>;
}
impl<T: OpenAISamplingOptionsProvider> SamplingOptionsProvider for T {
fn extract_sampling_options(&self) -> Result<common::SamplingOptions> {
// let result = self.validate();
// if let Err(e) = result {
// return Err(format!("Error validating sampling options: {}", e));
// }
let mut temperature = validate_range(self.get_temperature(), &TEMPERATURE_RANGE)
.map_err(|e| anyhow::anyhow!("Error validating temperature: {}", e))?;
let mut top_p = validate_range(self.get_top_p(), &TOP_P_RANGE)
.map_err(|e| anyhow::anyhow!("Error validating top_p: {}", e))?;
let frequency_penalty =
validate_range(self.get_frequency_penalty(), &FREQUENCY_PENALTY_RANGE)
.map_err(|e| anyhow::anyhow!("Error validating frequency_penalty: {}", e))?;
let presence_penalty = validate_range(self.get_presence_penalty(), &PRESENCE_PENALTY_RANGE)
.map_err(|e| anyhow::anyhow!("Error validating presence_penalty: {}", e))?;
if let Some(nvext) = self.nvext() {
let greedy = nvext.greed_sampling.unwrap_or(false);
if greedy {
top_p = None;
temperature = None;
}
}
Ok(common::SamplingOptions {
n: None,
best_of: None,
frequency_penalty,
presence_penalty,
repetition_penalty: None,
temperature,
top_p,
top_k: None,
min_p: None,
seed: None,
use_beam_search: None,
length_penalty: None,
})
}
}
impl<T: OpenAIStopConditionsProvider> StopConditionsProvider for T {
fn extract_stop_conditions(&self) -> Result<common::StopConditions> {
let max_tokens = self.get_max_tokens().map(|x| x as u32);
let min_tokens = self.get_min_tokens();
let stop = self.get_stop();
if let Some(stop) = &stop {
if stop.len() > 4 {
anyhow::bail!("stop conditions must be less than 4")
}
}
let mut ignore_eos = None;
if let Some(nvext) = self.nvext() {
ignore_eos = nvext.ignore_eos;
}
Ok(common::StopConditions {
max_tokens,
min_tokens: min_tokens.map(|v| v as u32),
stop,
stop_token_ids_hidden: None,
ignore_eos,
})
}
}
/// Common structure for chat completion responses; the only delta is the type of choices which differs
/// between streaming and non-streaming requests.
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct GenericCompletionResponse<C>
// where
// C: Serialize + Clone,
{
/// A unique identifier for the chat completion.
pub id: String,
/// A list of chat completion choices. Can be more than one if n is greater than 1.
pub choices: Vec<C>,
/// The Unix timestamp (in seconds) of when the chat completion was created.
pub created: u64,
/// The model used for the chat completion.
pub model: String,
/// The object type, which is `chat.completion` if the type of `Choice` is `ChatCompletionChoice`,
/// or is `chat.completion.chunk` if the type of `Choice` is `ChatCompletionChoiceDelta`.
pub object: String,
pub usage: Option<CompletionUsage>,
/// This fingerprint represents the backend configuration that the model runs with.
///
/// Can be used in conjunction with the seed request parameter to understand when backend changes
/// have been made that might impact determinism.
///
/// NIM Compatibility:
/// This field is not supported by the NIM; however it will be added in the future.
/// The optional nature of this field will be relaxed when it is supported.
pub system_fingerprint: Option<String>,
// TODO() - add NvResponseExtention
}
fn validate_logit_bias(logit_bias: &HashMap<String, i32>) -> Result<(), ValidationError> {
for key in logit_bias.keys() {
if key.parse::<i32>().is_err() {
return Err(
ValidationError::new("logit_bias").with_message("Keys must be integers".into())
);
}
}
Ok(())
}
// todo - move to common location
fn validate_range<T>(value: Option<T>, range: &(T, T)) -> Result<Option<T>>
where
T: PartialOrd + Display,
{
if value.is_none() {
return Ok(None);
}
let value = value.unwrap();
if value < range.0 || value > range.1 {
anyhow::bail!("Value {} is out of range [{}, {}]", value, range.0, range.1);
}
Ok(Some(value))
}
// todo - move to common location
/// scale value in `src` range to `dst` range
pub fn scale_value<T>(value: &T, src: &(T, T), dst: &(T, T)) -> Result<T>
where
T: Copy
+ PartialOrd
+ Add<Output = T>
+ Sub<Output = T>
+ Mul<Output = T>
+ Div<Output = T>
+ From<f32>,
{
let dst_range = dst.1 - dst.0;
let src_range = src.1 - src.0;
if dst_range == T::from(0.0) {
anyhow::bail!("dst range is 0");
}
if src_range == T::from(0.0) {
anyhow::bail!("src range is 0");
}
let value_scaled = (*value - src.0) / src_range;
Ok(dst.0 + (value_scaled * dst_range))
}
pub trait DeltaGeneratorExt<ResponseType: Send + Sync + 'static + std::fmt::Debug>:
Send + Sync + 'static
{
fn choice_from_postprocessor(
&mut self,
response: common::llm_backend::BackendOutput,
) -> Result<ResponseType>;
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_validate_range() {
assert_eq!(validate_range(Some(0.5), &(0.0, 1.0)).unwrap(), Some(0.5));
assert_eq!(validate_range(Some(0.0), &(0.0, 1.0)).unwrap(), Some(0.0));
assert_eq!(validate_range(Some(1.0), &(1.0, 1.0)).unwrap(), Some(1.0));
assert_eq!(validate_range(Some(1_i32), &(1, 1)).unwrap(), Some(1));
assert_eq!(
validate_range(Some(1.1), &(0.0, 1.0))
.unwrap_err()
.to_string(),
"Value 1.1 is out of range [0, 1]"
);
assert_eq!(
validate_range(Some(-0.1), &(0.0, 1.0))
.unwrap_err()
.to_string(),
"Value -0.1 is out of range [0, 1]"
);
}
#[test]
fn test_scaled_value() {
assert_eq!(scale_value(&0.5, &(0.0, 1.0), &(0.0, 2.0)).unwrap(), 1.0);
assert_eq!(scale_value(&0.0, &(0.0, 1.0), &(0.0, 2.0)).unwrap(), 0.0);
assert_eq!(scale_value(&-1.0, &(-2.0, 2.0), &(1.0, 2.0)).unwrap(), 1.25);
assert!(scale_value(&1.0, &(1.0, 1.0), &(0.0, 2.0)).is_err());
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::collections::VecDeque;
use std::fmt;
use std::fmt::Display;
use derive_builder::Builder;
use serde::de::{self, SeqAccess, Visitor};
use serde::ser::SerializeMap;
use serde::{Deserialize, Serialize};
use serde::{Deserializer, Serializer};
use serde_json::Value;
use validator::Validate;
mod aggregator;
mod delta;
use super::nvext::NvExtProvider;
pub use super::{CompletionTokensDetails, CompletionUsage, PromptTokensDetails};
// pub use aggregator::DeltaAggregator;
pub use delta::DeltaGenerator;
use super::{
common::{self, ChatCompletionLogprobs, SamplingOptionsProvider, StopConditionsProvider},
nvext::NvExt,
validate_logit_bias, ContentProvider, OpenAISamplingOptionsProvider,
OpenAIStopConditionsProvider,
};
// use crate::AnnotationsProvider;
/// Request object which is used to generate chat completions.
#[derive(Serialize, Deserialize, Builder, Validate, Debug, Clone)]
#[builder(build_fn(private, name = "build_internal", validate = "Self::validate"))]
pub struct ChatCompletionRequest {
/// Multi-turn chat messages.
///
/// NIM Compatibility:
/// Multi-turn chat models vary, some of which work with the OpenAI ChatGPT format, while others
/// will require `NvExt`.
pub messages: Vec<ChatCompletionMessage>,
/// Name of the model
#[builder(setter(into))]
pub model: String,
/// The maximum number of tokens that can be generated in the completion.
/// The token count of your prompt plus max_tokens cannot exceed the model's context length.
#[serde(skip_serializing_if = "Option::is_none")]
#[builder(default, setter(into, strip_option))]
#[validate(range(min = 1))]
pub max_tokens: Option<i32>,
/// The minimum number of tokens to generate. We ignore stop tokens until we see this many
/// tokens. Leave this None unless you are working on the pre-processor.
#[serde(skip_serializing_if = "Option::is_none")]
#[builder(default, setter(into, strip_option))]
pub min_tokens: Option<i32>,
/// If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only
/// server-sent events as they become available, with the stream terminated by a data: \[DONE\]
///
/// NIM Compatibility:
/// The NIM SDK can send extra meta data in the SSE stream using the `:` comment, `event:`,
/// or `id:` fields. See the `enable_sse_metadata` field in the NvExt object.
#[serde(skip_serializing_if = "Option::is_none")]
#[builder(default, setter(strip_option))]
pub stream: Option<bool>,
/// How many chat completion choices to generate for each input message.
///
/// NIM Compatibility:
/// Values greater than 1 are not currently supported by NIM.
#[serde(skip_serializing_if = "Option::is_none")]
#[builder(default, setter(into, strip_option))]
pub n: Option<i32>,
/// What sampling `temperature` to use, between 0 and 2. Higher values like 0.8 will make the
/// output more random, while lower values like 0.2 will make it more focused and deterministic.
/// OpenAI defaults to 1.0; however, in this crate, the default is None, and model-specific defaults
/// can be applied later as part of associating the request with a given model.
///
/// OpenAI generally recommend altering this or `top_p` but not both.
///
/// TODO(): Add a model specific validation which could enforce only a single type of sampling can be used.
#[serde(skip_serializing_if = "Option::is_none")]
#[validate(range(min = "super::MIN_TEMPERATURE", max = "super::MAX_TEMPERATURE"))]
#[builder(default, setter(into, strip_option))]
pub temperature: Option<f32>,
/// An alternative to sampling with `temperature`, called nucleus sampling, where the model
/// considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens
/// comprising the top 10% probability mass are considered.
///
/// We generally recommend altering this or `temperature` but not both.
#[serde(skip_serializing_if = "Option::is_none")]
#[validate(range(min = "super::MIN_TOP_P", max = "super::MAX_TOP_P"))]
#[builder(default, setter(into, strip_option))]
pub top_p: Option<f32>,
/// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency
/// in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
#[serde(skip_serializing_if = "Option::is_none")]
#[validate(range(
min = "super::MIN_FREQUENCY_PENALTY",
max = "super::MAX_FREQUENCY_PENALTY"
))]
#[builder(default, setter(into, strip_option))]
pub frequency_penalty: Option<f32>,
/// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in
/// the text so far, increasing the model's likelihood to talk about new topics.
#[serde(skip_serializing_if = "Option::is_none")]
#[validate(range(
min = "super::MIN_PRESENCE_PENALTY",
max = "super::MAX_PRESENCE_PENALTY"
))]
#[builder(default, setter(into, strip_option))]
pub presence_penalty: Option<f32>,
/// OpenAI specific API fields:
/// See: <https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format>
///
/// NIM Compatibility:
/// This option is not currently supported by NIM LLM. An error will be returned if this field is set.
#[serde(skip_serializing_if = "Option::is_none")]
#[builder(default)]
pub response_format: Option<Value>,
/// Up to 4 sequences where the API will stop generating further tokens.
#[serde(skip_serializing_if = "Option::is_none")]
#[validate(length(max = 4))]
#[builder(default, setter(into, strip_option))]
pub stop: Option<Vec<String>>,
/// Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities
/// of each output token returned in the content of message.
///
/// Not all models support logprobs. If logprobs is set to true for a model that does not support it,
/// the request will be processed as if logprobs is set to false.
///
/// NIM Compatibility:
/// TODO - Add a NvExt `strict` object which will disable relaxing of model specific limitations; meaning,
/// if the user requests `logprobs` and the model does not support them, the request will fail wth an error.
#[serde(skip_serializing_if = "Option::is_none")]
#[builder(default, setter(strip_option))]
pub logprobs: Option<bool>,
/// An integer between 0 and 20 specifying the number of most likely tokens to return at each token position,
/// each with an associated log probability. logprobs must be set to true if this parameter is used.
#[serde(skip_serializing_if = "Option::is_none")]
#[validate(range(min = 0, max = 20))]
#[builder(default, setter(into, strip_option))]
pub top_logprobs: Option<i32>,
/// Modify the likelihood of specified tokens appearing in the completion.
///
/// Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an
/// associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs.
/// Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact
/// effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of
/// selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
///
/// As specified in the OpenAI examples, this is a map of tokens_ids as strings to a bias value that
/// is an integer.
///
/// However, the OpenAI blog using the SDK shows that it can also be specified more accurately as a
/// map of token_ids as ints to a bias value that is also an int.
///
/// NIM Compatibility:
/// In the conversion of the OpenAI request to the internal NIM format, the keys of this map will be
/// validated to ensure they are integers. Since different models may have different tokenizers, the
/// range and values will again be validated on the compute backend to ensure they map to valid tokens
/// in the vocabulary of the model.
///
/// ```
/// use triton_llm::protocols::openai::completions::CompletionRequest;
///
/// let request = CompletionRequest::builder()
/// .prompt("What is the meaning of life?")
/// .model("meta/llama-3.1-8b-instruct")
/// .add_logit_bias(1337, -100) // using an int as a key is ok
/// .add_logit_bias("42", 100) // using a string as a key is also ok
/// .build()
/// .expect("Should not fail");
///
/// assert!(CompletionRequest::builder()
/// .prompt("What is the meaning of life?")
/// .model("meta/llama-3.1-8b-instruct")
/// .add_logit_bias("some non int", -100)
/// .build()
/// .is_err());
/// ```
#[serde(skip_serializing_if = "Option::is_none")]
#[validate(custom(function = "validate_logit_bias"))]
#[builder(default, setter(into, strip_option))]
pub logit_bias: Option<HashMap<String, i32>>,
/// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
///
/// NIM Compatibility:
/// If provided, then the value of this field will be included in the trace metadata and the accounting
/// data (if enabled).
#[serde(skip_serializing_if = "Option::is_none")]
#[builder(default, setter(into, strip_option))]
pub user: Option<String>,
/// If specified, our system will make a best effort to sample deterministically, such that repeated
/// requests with the same seed and parameters should return the same result. Determinism is not guaranteed,
/// and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.
#[serde(skip_serializing_if = "Option::is_none")]
#[builder(default, setter(into, strip_option))]
pub seed: Option<i64>,
/// A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
/// provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.
///
/// NIM Compatibility:
/// This field is not currently supported by NIM LLM. An error will be returned if this field is set.
#[serde(skip_serializing_if = "Option::is_none")]
#[builder(default)]
pub tools: Option<Vec<Tool>>,
/// Controls which (if any) function is called by the model. none means the model will not call a function
/// and instead generates a message. auto means the model can pick between generating a message or calling
/// a function. Specifying a particular function via {"type": "function", "function": {"name": "my_function"}}
/// forces the model to call that function.
///
/// `none` is the default when no functions are present. `auto` is the default if functions are present.
///
/// NIM Compatibility:
/// This field is not currently supported by NIM LLM. An error will be returned if this field is set.
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(serialize_with = "serialize_tool_choice")]
#[builder(default)]
pub tool_choice: Option<ToolChoiceType>,
/// Additional parameters supported by NIM backends
#[serde(skip_serializing_if = "Option::is_none")]
#[builder(default, setter(strip_option))]
pub nvext: Option<NvExt>,
}
impl ChatCompletionRequest {
pub fn builder() -> ChatCompletionRequestBuilder {
ChatCompletionRequestBuilder::default()
}
}
impl ChatCompletionRequestBuilder {
// This is a pre-build validate function
// This is called before the generated build method, in this case build_internal, is called
// This has access to the internal state of the builder
fn validate(&self) -> Result<(), String> {
Ok(())
}
/// Builds and validates the ChatCompletionRequest
///
/// ```rust
/// use triton_llm::protocols::openai::chat_completions::ChatCompletionRequest;
///
/// let request = ChatCompletionRequest::builder()
/// .model("mixtral-8x7b-instruct-v0.1")
/// .add_user_message("Hello")
/// .max_tokens(16)
/// .build()
/// .expect("Failed to build ChatCompletionRequest");
/// ```
pub fn build(&self) -> anyhow::Result<ChatCompletionRequest> {
// Calls the build_private, validates the result, then performs addition
// post build validation where we are looking a mutually exclusive fields
// and ensuring that there are not mutually exclusive collisions.
let request = self
.build_internal()
.map_err(|e| anyhow::anyhow!("Failed to build ChatCompletionRequest: {}", e))?;
request
.validate()
.map_err(|e| anyhow::anyhow!("Failed to validate ChatCompletionRequest: {}", e))?;
// check mutually exclusive fields
if request.top_logprobs.is_some() {
if request.logprobs.is_none() {
anyhow::bail!("top_logprobs requires logprobs to be set to true");
}
if let Some(logprobs) = request.logprobs {
if !logprobs {
anyhow::bail!("top_logprobs requires logprobs to be set to true");
}
}
}
Ok(request)
}
/// Add a message to the `Vec<ChatCompletionMessage>` in the ChatCompletionRequest
/// This will either create or append to the `Vec<ChatCompletionMessage>`
pub fn add_message(&mut self, message: ChatCompletionMessage) -> &mut Self {
// If messages exist we get them or we create new messages with Vec::new
self.messages.get_or_insert_with(Vec::new).push(message);
self
}
/// Add a user message to the `Vec<ChatCompletionMessage>` in the ChatCompletionRequest
pub fn add_user_message(&mut self, content: impl Into<String>) -> &mut Self {
self.add_message(ChatCompletionMessage {
role: MessageRole::user,
content: Content::Text(content.into()),
name: None,
})
}
/// Add an assistant message to the `Vec<ChatCompletionMessage>` in the ChatCompletionRequest
pub fn add_assistant_message(&mut self, content: impl Into<String>) -> &mut Self {
self.add_message(ChatCompletionMessage {
role: MessageRole::assistant,
content: Content::Text(content.into()),
name: None,
})
}
/// Add a system message to the `Vec<ChatCompletionMessage>` in the ChatCompletionRequest
pub fn add_system_message(&mut self, content: impl Into<String>) -> &mut Self {
self.add_message(ChatCompletionMessage {
role: MessageRole::system,
content: Content::Text(content.into()),
name: None,
})
}
/// Add a stop condition to the `Vec<String>` in the ChatCompletionRequest
/// This will either create or append to the `Vec<String>`
pub fn add_stop(&mut self, stop: impl Into<String>) -> &mut Self {
self.stop
.get_or_insert_with(|| Some(vec![]))
.as_mut()
.expect("stop should always be Some(Vec)")
.push(stop.into());
self
}
/// Add a token and bias to the `HashMap<String, i32>` in the ChatCompletionRequest
/// This will either create or update the `HashMap<String, i32>`
/// See: [`ChatCompletionRequest::logit_bias`] for more details
pub fn add_logit_bias<T>(&mut self, token_id: T, bias: i32) -> &mut Self
where
T: std::fmt::Display,
{
self.logit_bias
.get_or_insert_with(|| Some(HashMap::new()))
.as_mut()
.expect("logit_bias should always be Some(HashMap)")
.insert(token_id.to_string(), bias);
self
}
}
/// Each turn in a conversation is represented by a ChatCompletionMessage.
#[derive(Builder, Debug, Deserialize, Serialize, Clone)]
pub struct ChatCompletionMessage {
pub role: MessageRole,
#[serde(deserialize_with = "deserialize_content")]
pub content: Content,
#[serde(skip_serializing_if = "Option::is_none", default)]
#[builder(default)]
pub name: Option<String>,
}
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
#[allow(non_camel_case_types)]
pub enum MessageRole {
user,
system,
assistant,
function,
}
impl Display for MessageRole {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
use MessageRole::*;
let s = match self {
user => "user",
system => "system",
assistant => "assistant",
function => "function",
};
write!(f, "{s}")
}
}
#[derive(Debug, Deserialize, Clone, PartialEq, Eq)]
pub enum Content {
Text(String),
ImageUrl(Vec<ImageUrl>),
}
impl serde::Serialize for Content {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match *self {
Content::Text(ref text) => serializer.serialize_str(text),
Content::ImageUrl(ref image_url) => image_url.serialize(serializer),
}
}
}
fn deserialize_content<'de, D>(deserializer: D) -> Result<Content, D::Error>
where
D: Deserializer<'de>,
{
struct ContentVisitor;
impl<'de> Visitor<'de> for ContentVisitor {
type Value = Content;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a string or an array of content parts")
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Content::Text(value.to_owned()))
}
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
A: SeqAccess<'de>,
{
let mut parts = Vec::new();
while let Some(value) = seq.next_element::<String>()? {
if value.starts_with("http://") || value.starts_with("https://") {
parts.push(ImageUrl {
r#type: ContentType::image_url,
text: None,
image_url: Some(ImageUrlType { url: value }),
});
} else {
parts.push(ImageUrl {
r#type: ContentType::text,
text: Some(value),
image_url: None,
});
}
}
Ok(Content::ImageUrl(parts))
}
}
deserializer.deserialize_any(ContentVisitor)
}
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
#[allow(non_camel_case_types)]
pub enum ContentType {
text,
image_url,
}
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
#[allow(non_camel_case_types)]
pub struct ImageUrlType {
pub url: String,
}
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
#[allow(non_camel_case_types)]
pub struct ImageUrl {
pub r#type: ContentType,
#[serde(skip_serializing_if = "Option::is_none")]
pub text: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub image_url: Option<ImageUrlType>,
}
/// Represents a chat completion response returned by model, based on the provided input.
pub type ChatCompletionResponse = ChatCompletionGeneric<ChatCompletionChoice>;
/// Represents a streamed chunk of a chat completion response returned by model, based on the provided input.
pub type ChatCompletionResponseDelta = ChatCompletionGeneric<ChatCompletionChoiceDelta>;
/// Common structure for chat completion responses; the only delta is the type of choices which differs
/// between streaming and non-streaming requests.
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct ChatCompletionGeneric<C>
where
C: Serialize + Clone + ContentProvider,
{
/// A unique identifier for the chat completion.
pub id: String,
/// A list of chat completion choices. Can be more than one if n is greater than 1.
pub choices: Vec<C>,
/// The Unix timestamp (in seconds) of when the chat completion was created.
pub created: u64,
/// The model used for the chat completion.
pub model: String,
/// The object type, which is `chat.completion` if the type of `Choice` is `ChatCompletionChoice`,
/// or is `chat.completion.chunk` if the type of `Choice` is `ChatCompletionChoiceDelta`.
pub object: String,
/// Usage information for the completion request.
pub usage: Option<CompletionUsage>,
/// The service tier used for processing the request, optional.
#[serde(skip_serializing_if = "Option::is_none")]
pub service_tier: Option<ServiceTier>,
/// This fingerprint represents the backend configuration that the model runs with.
///
/// Can be used in conjunction with the seed request parameter to understand when backend changes
/// have been made that might impact determinism.
///
/// NIM Compatibility:
/// This field is not supported by the NIM; however it will be added in the future.
/// The optional nature of this field will be relaxed when it is supported.
pub system_fingerprint: Option<String>,
// TODO() - add NvResponseExtention
}
// Enum for service tier, either "scale" or "default"
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "snake_case")]
pub enum ServiceTier {
Auto,
Scale,
Default,
}
#[derive(Deserialize, Serialize, Debug, Clone)]
pub struct ChatCompletionChoice {
/// A chat completion message generated by the model.
pub message: ChatCompletionContent,
/// The index of the choice in the list of choices.
pub index: u64,
/// The reason the model stopped generating tokens. This will be `stop` if the model hit a natural
/// stop point or a provided stop sequence, `length` if the maximum number of tokens specified
/// in the request was reached, `content_filter` if content was omitted due to a flag from our content
/// filters, `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called
/// a function.
///
/// NIM Compatibility:
/// Only `stop` and `length` are currently supported by NIM.
/// NIM may also provide additional reasons in the future, such as `error`, `timeout` or `cancelation`.
pub finish_reason: FinishReason,
/// Log probability information for the choice, optional field.
#[serde(skip_serializing_if = "Option::is_none")]
pub logprobs: Option<ChatCompletionLogprobs>,
}
impl ContentProvider for ChatCompletionChoice {
fn content(&self) -> String {
self.message.content()
}
}
/// Same as ChatCompletionMessage, but received during a response stream.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ChatCompletionChoiceDelta {
/// The index of the choice in the list of choices.
pub index: u64,
/// The reason the model stopped generating tokens. This will be `stop` if the model hit a natural
/// stop point or a provided stop sequence, `length` if the maximum number of tokens specified
/// in the request was reached, `content_filter` if content was omitted due to a flag from our content
/// filters, `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called
/// a function.
///
/// NIM Compatibility:
/// Only `stop` and `length` are currently supported by NIM.
/// NIM may also provide additional reasons in the future, such as `error`, `timeout` or `cancelation`.
pub finish_reason: Option<FinishReason>,
/// A chat completion delta generated by streamed model responses.
pub delta: ChatCompletionContent,
/// Log probability information for the choice, optional field.
#[serde(skip_serializing_if = "Option::is_none")]
pub logprobs: Option<ChatCompletionLogprobs>,
}
impl ContentProvider for ChatCompletionChoiceDelta {
fn content(&self) -> String {
self.delta.content()
}
}
/// A chat completion message generated by the model.
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct ChatCompletionContent {
/// The role of the author of this message.
#[serde(skip_serializing_if = "Option::is_none")]
pub role: Option<MessageRole>,
/// The contents of the message.
#[serde(skip_serializing_if = "Option::is_none")]
pub content: Option<String>,
/// Tool calls made by the model.
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_calls: Option<Vec<ToolCall>>,
}
impl ContentProvider for ChatCompletionContent {
fn content(&self) -> String {
self.content.clone().unwrap_or("".to_string())
}
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum ToolChoiceType {
None,
Auto,
ToolChoice { tool: Tool },
}
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
pub struct Function {
pub name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
pub parameters: FunctionParameters,
}
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum JSONSchemaType {
Object,
Number,
String,
Array,
Null,
Boolean,
}
#[derive(Debug, Deserialize, Serialize, Clone, Default, PartialEq, Eq)]
pub struct JSONSchemaDefine {
#[serde(rename = "type")]
pub schema_type: Option<JSONSchemaType>,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub enum_values: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub properties: Option<HashMap<String, Box<JSONSchemaDefine>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub required: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub items: Option<Box<JSONSchemaDefine>>,
}
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
pub struct FunctionParameters {
#[serde(rename = "type")]
pub schema_type: JSONSchemaType,
#[serde(skip_serializing_if = "Option::is_none")]
pub properties: Option<HashMap<String, Box<JSONSchemaDefine>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub required: Option<Vec<String>>,
}
#[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq, Eq)]
#[allow(non_camel_case_types)]
pub enum FinishReason {
stop,
length,
content_filter,
tool_calls,
cancelled,
null,
}
/// from_str trait
impl std::str::FromStr for FinishReason {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"stop" => Ok(FinishReason::stop),
"length" => Ok(FinishReason::length),
"content_filter" => Ok(FinishReason::content_filter),
"tool_calls" => Ok(FinishReason::tool_calls),
"null" => Ok(FinishReason::null),
_ => Err(format!("Unknown FinishReason: {}", s)),
}
}
}
impl std::fmt::Display for FinishReason {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
FinishReason::stop => write!(f, "stop"),
FinishReason::length => write!(f, "length"),
FinishReason::content_filter => write!(f, "content_filter"),
FinishReason::tool_calls => write!(f, "tool_calls"),
FinishReason::cancelled => write!(f, "cancelled"),
FinishReason::null => write!(f, "null"),
}
}
}
#[derive(Debug, Deserialize, Serialize)]
#[allow(non_camel_case_types)]
pub struct FinishDetails {
pub r#type: FinishReason,
pub stop: String,
}
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct ToolCall {
pub id: String,
pub r#type: String,
pub function: ToolCallFunction,
}
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct ToolCallFunction {
#[serde(skip_serializing_if = "Option::is_none")]
pub name: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub arguments: Option<String>,
}
fn serialize_tool_choice<S>(
value: &Option<ToolChoiceType>,
serializer: S,
) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match value {
Some(ToolChoiceType::None) => serializer.serialize_str("none"),
Some(ToolChoiceType::Auto) => serializer.serialize_str("auto"),
Some(ToolChoiceType::ToolChoice { tool }) => {
let mut map = serializer.serialize_map(Some(2))?;
map.serialize_entry("type", &tool.r#type)?;
map.serialize_entry("function", &tool.function)?;
map.end()
}
None => serializer.serialize_none(),
}
}
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
pub struct Tool {
pub r#type: ToolType,
pub function: Function,
}
#[derive(Debug, Deserialize, Serialize, Copy, Clone, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ToolType {
Function,
}
impl ChatCompletionRequest {}
impl NvExtProvider for ChatCompletionRequest {
fn nvext(&self) -> Option<&NvExt> {
self.nvext.as_ref()
}
fn raw_prompt(&self) -> Option<String> {
None
}
}
// impl AnnotationsProvider for ChatCompletionRequest {
// fn annotations(&self) -> Option<Vec<String>> {
// self.nvext
// .as_ref()
// .and_then(|nvext| nvext.annotations.clone())
// }
// fn has_annotation(&self, annotation: &str) -> bool {
// self.nvext
// .as_ref()
// .and_then(|nvext| nvext.annotations.as_ref())
// .map(|annotations| annotations.contains(&annotation.to_string()))
// .unwrap_or(false)
// }
// }
impl OpenAISamplingOptionsProvider for ChatCompletionRequest {
fn get_temperature(&self) -> Option<f32> {
self.temperature
}
fn get_top_p(&self) -> Option<f32> {
self.top_p
}
fn get_frequency_penalty(&self) -> Option<f32> {
self.frequency_penalty
}
fn get_presence_penalty(&self) -> Option<f32> {
self.presence_penalty
}
fn nvext(&self) -> Option<&NvExt> {
self.nvext.as_ref()
}
}
impl OpenAIStopConditionsProvider for ChatCompletionRequest {
fn get_max_tokens(&self) -> Option<i32> {
self.max_tokens
}
fn get_min_tokens(&self) -> Option<i32> {
self.min_tokens
}
fn get_stop(&self) -> Option<Vec<String>> {
self.stop.clone()
}
fn nvext(&self) -> Option<&NvExt> {
self.nvext.as_ref()
}
}
/// Implements TryFrom for converting an OpenAI's ChatCompletionRequest to an Engine's CompletionRequest
impl TryFrom<ChatCompletionRequest> for common::CompletionRequest {
type Error = anyhow::Error;
fn try_from(request: ChatCompletionRequest) -> Result<Self, Self::Error> {
// openai_api_rs::v1::chat_completion
// pub struct ChatCompletionRequest {
// NA pub model: String,
// L pub messages: Vec<ChatCompletionMessage, Global>,
// SO pub temperature: Option<f32>,
// SO pub top_p: Option<f32>,
// SO pub n: Option<i32>,
// ** pub response_format: Option<Value>,
// NA pub stream: Option<bool>, // See Issue #8
// SC pub stop: Option<Vec<String, Global>>,
// SC pub max_tokens: Option<i32>,
// SO pub presence_penalty: Option<f32>,
// SO pub frequency_penalty: Option<f32>,
// ** pub logit_bias: Option<HashMap<String, i32, RandomState>>,
// ** pub user: Option<String>,
// SO pub seed: Option<i64>,
// ** pub tools: Option<Vec<Tool, Global>>,
// ** pub tool_choice: Option<ToolChoiceType>,
// }
//
// ** not supported
// NA not applicable
// L local in this method
// SO extract_sampling_options
// SC extract_stop_conditions
// first we validate the OpenAI request
// we can not validate everything as some fields require backend awareness
// however, we can validate against the public OpenAI limit
request
.validate()
.map_err(|e| anyhow::anyhow!("Failed to validate ChatCompletionRequest: {}", e))?;
// todo(ryan) - open a ticket to support this
if request.logit_bias.is_some() {
anyhow::bail!("logit_bias is not supported");
}
// todo(ryan) - add support for user
if request.user.is_some() {
anyhow::bail!("user is not supported");
}
if request.response_format.is_some() {
anyhow::bail!("response_format is not supported");
}
if request.tools.is_some() {
anyhow::bail!("tools is not supported");
}
if request.tool_choice.is_some() {
anyhow::bail!("tool_choice is not supported");
}
// sampling options
let sampling_options = request
.extract_sampling_options()
.map_err(|e| anyhow::anyhow!("Failed to extract SamplingOptions: {}", e))?;
// stop conditions
let stop_conditions = request
.extract_stop_conditions()
.map_err(|e| anyhow::anyhow!("Failed to extract StopConditions: {}", e))?;
// first we need to process the messages
let prompt = common::PromptType::ChatCompletion(
validate_and_collect_chat_messages(request.messages)
.map_err(|e| anyhow::anyhow!("Failed to validate chat messages: {}", e))?,
);
// return the completion request
Ok(common::CompletionRequest {
prompt,
stop_conditions,
sampling_options,
mdc_sum: None,
annotations: None,
})
}
}
impl TryFrom<common::StreamingCompletionResponse> for ChatCompletionChoice {
type Error = anyhow::Error;
fn try_from(response: common::StreamingCompletionResponse) -> Result<Self, Self::Error> {
let choice = ChatCompletionChoice {
index: response.delta.index.unwrap_or(0) as u64,
message: ChatCompletionContent {
role: Some(MessageRole::assistant),
content: response.delta.text,
tool_calls: None,
},
finish_reason: match &response.delta.finish_reason {
Some(common::FinishReason::EoS) => FinishReason::stop,
Some(common::FinishReason::Stop) => FinishReason::stop,
Some(common::FinishReason::Length) => FinishReason::length,
Some(common::FinishReason::Error(err_msg)) => {
return Err(anyhow::anyhow!("finish_reason::error = {}", err_msg));
}
Some(common::FinishReason::Cancelled) => FinishReason::null,
None => FinishReason::null,
},
logprobs: response.logprobs,
};
Ok(choice)
}
}
impl TryFrom<common::StreamingCompletionResponse> for ChatCompletionChoiceDelta {
type Error = anyhow::Error;
fn try_from(response: common::StreamingCompletionResponse) -> Result<Self, Self::Error> {
let choice = ChatCompletionChoiceDelta {
index: response.delta.index.unwrap_or(0) as u64,
delta: ChatCompletionContent {
role: Some(MessageRole::assistant),
content: response.delta.text,
tool_calls: None,
},
finish_reason: match &response.delta.finish_reason {
Some(common::FinishReason::EoS) => Some(FinishReason::stop),
Some(common::FinishReason::Stop) => Some(FinishReason::stop),
Some(common::FinishReason::Length) => Some(FinishReason::length),
Some(common::FinishReason::Error(err_msg)) => {
return Err(anyhow::anyhow!("finish_reason::error = {}", err_msg));
}
Some(common::FinishReason::Cancelled) => Some(FinishReason::null),
None => None,
},
logprobs: response.logprobs,
};
Ok(choice)
}
}
fn validate_and_collect_chat_messages(
messages: Vec<ChatCompletionMessage>,
) -> Result<common::ChatContext, anyhow::Error> {
let mut system_prompt = None;
let mut turns = VecDeque::new();
let mut last_role = MessageRole::assistant;
for message in messages {
match message.role {
MessageRole::system => {
if system_prompt.is_some() {
return Err(anyhow::anyhow!("More than one system message found"));
}
system_prompt = Some(message.content);
}
MessageRole::user | MessageRole::assistant => {
if last_role == message.role {
if turns.is_empty() {
return Err(anyhow::anyhow!("First message must be a user message"));
}
return Err(anyhow::anyhow!(
"User and assistant messages must alternate"
));
}
last_role = message.role.clone();
turns.push_back(message);
}
MessageRole::function => {} // Ignoring function messages as per assumption.
}
}
if let Some(first) = turns.front() {
if let MessageRole::assistant = first.role {
return Err(anyhow::anyhow!("Sequence must start with a user message"));
}
}
if turns.len() % 2 == 0 {
return Err(anyhow::anyhow!("Sequence must end with a user message"));
}
let mut context = Vec::new();
while turns.len() >= 2 {
let user = turns.pop_front().unwrap();
let asst = turns.pop_front().unwrap();
let user = match user.content {
Content::Text(text) => text,
_ => return Err(anyhow::anyhow!("User message must be text")),
};
let asst = match asst.content {
Content::Text(text) => text,
_ => return Err(anyhow::anyhow!("Assistant message must be text")),
};
context.push(common::ChatTurn {
user,
assistant: asst,
});
}
let prompt = turns.pop_back().unwrap();
let prompt = match prompt.content {
Content::Text(text) => text,
_ => return Err(anyhow::anyhow!("Prompt message must be text")),
};
let system_prompt = match system_prompt {
Some(Content::Text(text)) => Some(text),
Some(_) => return Err(anyhow::anyhow!("System prompt must be text")),
None => None,
};
Ok(common::ChatContext {
completion: common::CompletionContext {
prompt,
system_prompt,
},
context,
})
}
#[cfg(test)]
mod tests {
use anyhow::Result;
use serde_json::json;
use std::error::Error;
use super::*;
#[test]
fn test_chat_completions_valid_request_minimal() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct")
.add_user_message("Hello!")
.build();
assert!(
request.is_ok(),
"Request should succeed with minimal fields"
);
Ok(())
}
#[test]
fn test_chat_completions_valid_request_full() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct")
.add_user_message("Hello!")
.max_tokens(50)
.stream(true)
.n(1)
.temperature(1.0)
.top_p(0.9)
.frequency_penalty(0.5)
.presence_penalty(0.5)
.stop(vec!["The end.".to_string()])
.logprobs(true)
.top_logprobs(5)
.logit_bias(HashMap::new())
.user("test_user")
.seed(1234)
.build();
println!("{:?}", request);
assert!(
request.is_ok(),
"Request should succeed with all fields set"
);
Ok(())
}
#[test]
fn test_chat_completions_top_logprobs_requires_logprobs() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct")
.add_user_message("Hello!")
.top_logprobs(5) // logprobs is not set to true
.build();
assert!(
request.is_err(),
"Request should fail when top_logprobs is set without logprobs being true"
);
Ok(())
}
#[ignore]
#[test]
fn test_chat_completions_max_tokens_out_of_range() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct")
.add_user_message("Hello!")
.max_tokens(4097) // assuming the model has a max context length of 4096
.build();
assert!(
request.is_err(),
"Request should fail when max_tokens exceeds model's context length"
);
Ok(())
}
#[test]
fn test_chat_completions_invalid_top_p() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct")
.add_user_message("Hello!")
.top_p(1.5) // Invalid, should be between 0 and 1
.build();
assert!(
request.is_err(),
"Request should fail with invalid top_p value"
);
Ok(())
}
#[test]
fn test_chat_completions_missing_messages() -> Result<(), Box<dyn Error>> {
// Missing messages field in the request
let request_result = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct") // Valid model
.build(); // This should fail because no messages are provided.
assert!(
request_result.is_err(),
"Expected request to fail without messages."
);
if let Err(e) = request_result {
println!("Expected error: {}", e); // Optionally print the error for debugging
}
Ok(())
}
#[test]
fn test_chat_completions_negative_max_tokens() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct")
.add_user_message("Hello, world!")
.max_tokens(-10)
.build();
assert!(
request.is_err(),
"Request should fail with negative max_tokens"
);
Ok(())
}
#[ignore]
#[test]
fn test_chat_completions_unsupported_logit_bias() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct")
.add_user_message("Hello, world!")
.add_logit_bias("50256", -100)
.build();
assert!(request.is_err(), "Request should fail with logit_bias");
Ok(())
}
#[test]
fn test_chat_completions_invalid_temperature() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct")
.add_user_message("Hello!")
.temperature(2.5) // Invalid, should be between 0 and 2
.build();
assert!(
request.is_err(),
"Request should fail with invalid temperature"
);
Ok(())
}
#[test]
fn test_chat_completions_max_stop_sequences() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct")
.add_user_message("Tell me a story.")
.stop(vec![
"The end.".to_string(),
"Once upon a time,".to_string(),
"And then,".to_string(),
"They lived happily ever after.".to_string(),
]) // 4 stop sequences, valid
.build();
assert!(
request.is_ok(),
"Request should succeed with 4 stop sequences"
);
Ok(())
}
#[test]
fn test_chat_completions_large_stop_sequences() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct")
.add_user_message("Tell me a story.")
.stop(vec![
"The end.".to_string(),
"And so,".to_string(),
"Once upon a time,".to_string(),
"They lived happily ever after.".to_string(),
"Unexpected stop.".to_string(),
])
.build();
assert!(
request.is_err(),
"Request should fail with too many stop sequences"
);
Ok(())
}
#[ignore]
#[test]
fn test_chat_completions_invalid_stop_sequences() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct")
.add_user_message("Tell me a joke.")
.stop(vec!["".to_string()])
.build();
assert!(
request.is_err(),
"Request should fail with invalid stop sequences"
);
Ok(())
}
#[ignore]
#[test]
fn test_chat_completions_presence_penalty_out_of_range() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct")
.add_user_message("What's up?")
.presence_penalty(3.0) // Out of valid range (-2.0 to 2.0)
.build();
assert!(
request.is_err(),
"Request should fail with invalid presence_penalty"
);
Ok(())
}
#[test]
fn test_chat_completions_invalid_presence_penalty() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct")
.add_user_message("What's up?")
.presence_penalty(-2.5) // Invalid, should be between -2.0 and 2.0
.build();
assert!(
request.is_err(),
"Request should fail with invalid presence_penalty"
);
Ok(())
}
#[ignore]
#[tokio::test]
async fn test_chat_completions_with_user_field() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct")
.add_user_message("Hi there!")
.user("test_user")
.build()
.unwrap();
// assert!(request.is_err(), "Request should fail with 'user' field");
let result: Result<common::CompletionRequest> = request.try_into();
assert!(
result.is_err(),
"Conversion should fail with 'user' field set",
);
Ok(())
}
#[test]
fn test_chat_completions_valid_with_seed() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("meta/llama-3.1-8b-instruct")
.add_user_message("Repeatable result")
.seed(12345)
.build();
assert!(
request.is_ok(),
"Request should succeed with seed value for determinism"
);
Ok(())
}
#[test]
fn test_validate_chat_messages_multiple_system_messages() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("test-model")
.add_system_message("System message 1")
.add_system_message("System message 2")
.add_user_message("Hello!")
.build()?;
let result = validate_and_collect_chat_messages(request.messages.clone());
assert!(result.is_err());
if let Err(e) = result {
assert_eq!(e.to_string(), "More than one system message found");
}
Ok(())
}
#[test]
fn test_validate_chat_messages_user_messages_do_not_alternate() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("test-model")
.add_user_message("Hello!")
.add_user_message("How are you?")
.build()?;
let result = validate_and_collect_chat_messages(request.messages.clone());
assert!(result.is_err());
if let Err(e) = result {
assert_eq!(e.to_string(), "User and assistant messages must alternate");
}
Ok(())
}
#[ignore]
#[test]
fn test_validate_chat_messages_user_message_not_text() -> Result<(), Box<dyn Error>> {
let message = ChatCompletionMessage {
role: MessageRole::user,
content: Content::ImageUrl(vec![ImageUrl {
r#type: ContentType::image_url,
text: None,
image_url: Some(ImageUrlType {
url: "http://example.com/image.png".to_string(),
}),
}]),
name: None,
};
let request = ChatCompletionRequest::builder()
.model("test-model")
.add_message(message)
.build()?;
let result = validate_and_collect_chat_messages(request.messages.clone());
assert!(result.is_err());
if let Err(e) = result {
assert_eq!(e.to_string(), "Generic error: User message must be text");
}
Ok(())
}
#[test]
fn test_try_from_chat_completion_request_with_unsupported_fields() -> Result<(), Box<dyn Error>>
{
let request = ChatCompletionRequest::builder()
.model("test-model")
.add_user_message("Hello!")
.response_format(Some(json!({"format": "unsupported"})))
.tools(Some(vec![Tool {
r#type: ToolType::Function,
function: Function {
name: "test_function".to_string(),
description: None,
parameters: FunctionParameters {
schema_type: JSONSchemaType::Object,
properties: None,
required: None,
},
},
}]))
.tool_choice(Some(ToolChoiceType::Auto))
.build()?;
let result: Result<common::CompletionRequest> = request.try_into();
assert!(
result.is_err(),
"Conversion should fail with unsupported fields"
);
Ok(())
}
#[test]
fn test_deserialize_content_with_image_urls() {
let json_data = r#"
{
"role": "assistant",
"content": [
"This is a text message.",
"https://example.com/image1.png",
"Another text message.",
"https://example.com/image2.png"
]
}
"#;
let message: ChatCompletionMessage =
serde_json::from_str(json_data).expect("Deserialization failed");
if let Content::ImageUrl(parts) = message.content {
assert_eq!(parts.len(), 4);
assert_eq!(parts[0].r#type, ContentType::text);
assert_eq!(parts[0].text.as_ref().unwrap(), "This is a text message.");
assert_eq!(parts[1].r#type, ContentType::image_url);
assert_eq!(
parts[1].image_url.as_ref().unwrap().url,
"https://example.com/image1.png"
);
} else {
panic!("Expected Content::ImageUrl");
}
}
#[test]
fn test_try_from_chat_completion_request_success() -> Result<(), Box<dyn Error>> {
let request = ChatCompletionRequest::builder()
.model("test-model")
.add_user_message("Hello!")
.add_assistant_message("Hi there!")
.add_user_message("How are you?")
.build()?;
let completion_request: common::CompletionRequest = request.try_into()?;
assert!(matches!(
completion_request.prompt,
common::PromptType::ChatCompletion(_)
));
Ok(())
}
#[test]
fn test_chat_completion_sampling_params_with_valid_nvext() {
let nvext = NvExt {
ignore_eos: Some(true),
repetition_penalty: Some(0.6),
top_k: Some(3),
use_raw_prompt: None,
greed_sampling: None,
annotations: None,
};
let request = ChatCompletionRequest::builder()
.nvext(nvext)
.model("foo")
.add_system_message("Hello!")
.build()
.expect("Failed to build request with valid nvext");
assert_eq!(request.nvext.as_ref().unwrap().ignore_eos, Some(true));
assert_eq!(
request.nvext.as_ref().unwrap().repetition_penalty,
Some(0.6)
);
assert_eq!(request.nvext.as_ref().unwrap().top_k, Some(3));
}
#[test]
fn test_completion_sampling_params_without_nvext() {
let request = ChatCompletionRequest::builder()
.model("foo")
.add_user_message("Test")
.build()
.unwrap();
assert_eq!(request.frequency_penalty, None);
assert_eq!(request.logprobs, None);
}
#[test]
fn test_completion_sampling_params_with_valid_nvext() {
let nvext = NvExt {
ignore_eos: Some(true),
repetition_penalty: Some(0.6),
top_k: Some(3),
..Default::default()
};
let request = ChatCompletionRequest::builder()
.nvext(nvext)
.model("foo")
.add_user_message("Test")
.build()
.expect("Failed to build request with valid nvext");
assert_eq!(request.nvext.as_ref().unwrap().ignore_eos, Some(true));
assert_eq!(
request.nvext.as_ref().unwrap().repetition_penalty,
Some(0.6)
);
assert_eq!(request.nvext.as_ref().unwrap().top_k, Some(3));
}
// #[test]
// fn test_normalize_unicode_characters() {
// let str = "Hello there how are you\u{E0020}?".to_string();
// let normalized = str.sanitize_text();
// assert_eq!(normalized, "Hello there how are you?");
// }
// #[tokio::test]
// async fn test_chat_completion_request_filtered() {
// // Define input messages with Unicode character to filter
// let messages = vec![
// ChatCompletionMessage {
// role: MessageRole::user,
// content: Content::Text(
// "Hello there how are you\u{E0020}?"
// .to_string()
// .normalize_unicode_characters(),
// ),
// name: None,
// },
// ChatCompletionMessage {
// role: MessageRole::assistant,
// content: Content::Text("How may I help you?".to_string()),
// name: None,
// },
// ChatCompletionMessage {
// role: MessageRole::user,
// content: Content::Text("Do something for me?".to_string()),
// name: None,
// },
// ];
// // Define expected filtered messages
// let expected = vec![
// ChatCompletionMessage {
// role: MessageRole::user,
// content: Content::Text("Hello there how are you?".to_string()),
// name: None,
// },
// ChatCompletionMessage {
// role: MessageRole::assistant,
// content: Content::Text("How may I help you?".to_string()),
// name: None,
// },
// ChatCompletionMessage {
// role: MessageRole::user,
// content: Content::Text("Do something for me?".to_string()),
// name: None,
// },
// ];
// // Build ChatCompletionRequest with filtering applied
// let request = ChatCompletionRequest::builder()
// .model("foo")
// .messages(messages)
// .build()
// .expect("Failed to build ChatCompletionRequest");
// // Validate each message matches the expected filtered content
// for (i, message) in request.messages.iter().enumerate() {
// assert_eq!(message.role, expected[i].role);
// if let Content::Text(ref content) = message.content {
// if let Content::Text(ref expected_content) = expected[i].content {
// assert_eq!(content, expected_content);
// }
// }
// }
// }
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment