feat: OpenAI compatible http service (#123)

Signed-off-by: Ryan Olson <ryanolson@users.noreply.github.com> Co-authored-by: Ryan McCormick <rmccormick@nvidia.com> Co-authored-by: Neelay Shah <neelays@nvidia.com>

feat: OpenAI compatible http service (#123)
Signed-off-by: Ryan Olson <ryanolson@users.noreply.github.com> Co-authored-by: Ryan McCormick <rmccormick@nvidia.com> Co-authored-by: Neelay Shah <neelays@nvidia.com>
ffc6dde1 · Ryan Olson · GitHub · 9d6643b7 · ffc6dde1 · ffc6dde1
Commit ffc6dde1 authored Feb 10, 2025 by Ryan Olson Committed by GitHub Feb 10, 2025
20 changed files
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -43,7 +43,7 @@ repos:
  - id: codespell
    additional_dependencies: [tomli]
    args: ["--toml", "pyproject.toml"]
-    exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$)
+    exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$|.*tests/data/replays.*)
 # More details about these pre-commit hooks here:
 # https://pre-commit.com/hooks.html
 - repo: https://github.com/pre-commit/pre-commit-hooks

--- a/llm/rust/Cargo.lock
+++ b/llm/rust/Cargo.lock
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+[[package]]
+name = "addr2line"
+version = "0.24.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
+dependencies = [
+ "gimli",
+]
+[[package]]
+name = "adler2"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+[[package]]
+name = "android-tzdata"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+[[package]]
+name = "anyhow"
+version = "1.0.95"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
+[[package]]
+name = "arrayref"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
+[[package]]
+name = "arrayvec"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
+[[package]]
+name = "async-nats"
+version = "0.38.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76433c4de73442daedb3a59e991d94e85c14ebfc33db53dfcd347a21cd6ef4f8"
+dependencies = [
+ "base64",
+ "bytes",
+ "futures",
+ "memchr",
+ "nkeys",
+ "nuid",
+ "once_cell",
+ "pin-project",
+ "portable-atomic",
+ "rand",
+ "regex",
+ "ring",
+ "rustls-native-certs 0.7.3",
+ "rustls-pemfile",
+ "rustls-webpki",
+ "serde",
+ "serde_json",
+ "serde_nanos",
+ "serde_repr",
+ "thiserror 1.0.69",
+ "time",
+ "tokio",
+ "tokio-rustls",
+ "tokio-util",
+ "tokio-websockets",
+ "tracing",
+ "tryhard",
+ "url",
+]
+[[package]]
+name = "async-once-cell"
+version = "0.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a"
+[[package]]
+name = "async-stream"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476"
+dependencies = [
+ "async-stream-impl",
+ "futures-core",
+ "pin-project-lite",
+]
+[[package]]
+name = "async-stream-impl"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "async-trait"
+version = "0.1.86"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "atomic"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8d818003e740b63afc82337e3160717f4f63078720a810b7b903e70a5d1d2994"
+dependencies = [
+ "bytemuck",
+]
+[[package]]
+name = "atomic-waker"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+[[package]]
+name = "autocfg"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
+[[package]]
+name = "axum"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
+dependencies = [
+ "async-trait",
+ "axum-core 0.4.5",
+ "bytes",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "itoa",
+ "matchit 0.7.3",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustversion",
+ "serde",
+ "sync_wrapper",
+ "tower 0.5.2",
+ "tower-layer",
+ "tower-service",
+]
+[[package]]
+name = "axum"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d6fd624c75e18b3b4c6b9caf42b1afe24437daaee904069137d8bab077be8b8"
+dependencies = [
+ "axum-core 0.5.0",
+ "bytes",
+ "form_urlencoded",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-util",
+ "itoa",
+ "matchit 0.8.4",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustversion",
+ "serde",
+ "serde_json",
+ "serde_path_to_error",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "tokio",
+ "tower 0.5.2",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+[[package]]
+name = "axum-core"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "rustversion",
+ "sync_wrapper",
+ "tower-layer",
+ "tower-service",
+]
+[[package]]
+name = "axum-core"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df1362f362fd16024ae199c1970ce98f9661bf5ef94b9808fee734bc3698b733"
+dependencies = [
+ "bytes",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "rustversion",
+ "sync_wrapper",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+[[package]]
+name = "backtrace"
+version = "0.3.74"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
+dependencies = [
+ "addr2line",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+ "windows-targets",
+]
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+[[package]]
+name = "base64ct"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
+[[package]]
+name = "bit-set"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
+dependencies = [
+ "bit-vec",
+]
+[[package]]
+name = "bit-vec"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
+[[package]]
+name = "bitflags"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
+[[package]]
+name = "blake3"
+version = "1.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e"
+dependencies = [
+ "arrayref",
+ "arrayvec",
+ "cc",
+ "cfg-if",
+ "constant_time_eq",
+]
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+[[package]]
+name = "bstr"
+version = "1.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0"
+dependencies = [
+ "memchr",
+ "serde",
+]
+[[package]]
+name = "bumpalo"
+version = "3.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
+[[package]]
+name = "bytemuck"
+version = "1.21.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3"
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+[[package]]
+name = "bytes"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
+dependencies = [
+ "serde",
+]
+[[package]]
+name = "cc"
+version = "1.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "755717a7de9ec452bf7f3f1a3099085deabd7f2962b861dae91ecd7a365903d2"
+dependencies = [
+ "shlex",
+]
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+[[package]]
+name = "cfg_aliases"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
+[[package]]
+name = "chrono"
+version = "0.4.39"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825"
+dependencies = [
+ "android-tzdata",
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "wasm-bindgen",
+ "windows-targets",
+]
+[[package]]
+name = "console"
+version = "0.15.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b"
+dependencies = [
+ "encode_unicode",
+ "libc",
+ "once_cell",
+ "windows-sys 0.59.0",
+]
+[[package]]
+name = "const-oid"
+version = "0.9.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
+[[package]]
+name = "constant_time_eq"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
+[[package]]
+name = "core-foundation"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+[[package]]
+name = "core-foundation"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b55271e5c8c478ad3f38ad24ef34923091e0548492a266d19b3c0b4d82574c63"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+[[package]]
+name = "cpufeatures"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
+dependencies = [
+ "libc",
+]
+[[package]]
+name = "crypto-common"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+[[package]]
+name = "curve25519-dalek"
+version = "4.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "curve25519-dalek-derive",
+ "digest",
+ "fiat-crypto",
+ "rustc_version",
+ "subtle",
+]
+[[package]]
+name = "curve25519-dalek-derive"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "darling"
+version = "0.20.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989"
+dependencies = [
+ "darling_core",
+ "darling_macro",
+]
+[[package]]
+name = "darling_core"
+version = "0.20.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5"
+dependencies = [
+ "fnv",
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim",
+ "syn 2.0.98",
+]
+[[package]]
+name = "darling_macro"
+version = "0.20.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806"
+dependencies = [
+ "darling_core",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "data-encoding"
+version = "2.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e60eed09d8c01d3cee5b7d30acb059b76614c918fa0f992e0dd6eeb10daad6f"
+[[package]]
+name = "der"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f55bf8e7b65898637379c1b74eb1551107c8294ed26d855ceb9fd1a09cfc9bc0"
+dependencies = [
+ "const-oid",
+ "pem-rfc7468",
+ "zeroize",
+]
+[[package]]
+name = "deranged"
+version = "0.3.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4"
+dependencies = [
+ "powerfmt",
+ "serde",
+]
+[[package]]
+name = "derive-getters"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74ef43543e701c01ad77d3a5922755c6a1d71b22d942cb8042be4994b380caff"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "derive_builder"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
+dependencies = [
+ "derive_builder_macro",
+]
+[[package]]
+name = "derive_builder_core"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
+dependencies = [
+ "darling",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "derive_builder_macro"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
+dependencies = [
+ "derive_builder_core",
+ "syn 2.0.98",
+]
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer",
+ "crypto-common",
+]
+[[package]]
+name = "displaydoc"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "ed25519"
+version = "2.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53"
+dependencies = [
+ "signature",
+]
+[[package]]
+name = "ed25519-dalek"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a3daa8e81a3963a60642bcc1f90a670680bd4a77535faa384e9d1c79d620871"
+dependencies = [
+ "curve25519-dalek",
+ "ed25519",
+ "sha2",
+ "signature",
+ "subtle",
+]
+[[package]]
+name = "educe"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d7bc049e1bd8cdeb31b68bbd586a9464ecf9f3944af3958a7a9d0f8b9799417"
+dependencies = [
+ "enum-ordinalize",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "either"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
+dependencies = [
+ "serde",
+]
+[[package]]
+name = "encode_unicode"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
+[[package]]
+name = "enum-ordinalize"
+version = "4.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fea0dcfa4e54eeb516fe454635a95753ddd39acda650ce703031c6973e315dd5"
+dependencies = [
+ "enum-ordinalize-derive",
+]
+[[package]]
+name = "enum-ordinalize-derive"
+version = "4.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "equivalent"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
+[[package]]
+name = "errno"
+version = "0.3.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d"
+dependencies = [
+ "libc",
+ "windows-sys 0.59.0",
+]
+[[package]]
+name = "etcd-client"
+version = "0.14.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0452bcc559431b16f472b7ab86e2f9ccd5f3c2da3795afbd6b773665e047fe"
+dependencies = [
+ "http",
+ "prost",
+ "tokio",
+ "tokio-stream",
+ "tonic",
+ "tonic-build",
+ "tower 0.4.13",
+ "tower-service",
+]
+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+[[package]]
+name = "fiat-crypto"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d"
+[[package]]
+name = "figment"
+version = "0.10.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3"
+dependencies = [
+ "atomic",
+ "parking_lot",
+ "pear",
+ "serde",
+ "serde_json",
+ "tempfile",
+ "toml",
+ "uncased",
+ "version_check",
+]
+[[package]]
+name = "fixedbitset"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+[[package]]
+name = "form_urlencoded"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
+dependencies = [
+ "percent-encoding",
+]
+[[package]]
+name = "futures"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+[[package]]
+name = "futures-channel"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+[[package]]
+name = "futures-core"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+[[package]]
+name = "futures-executor"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+[[package]]
+name = "futures-io"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
+[[package]]
+name = "futures-macro"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "futures-sink"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
+[[package]]
+name = "futures-task"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+[[package]]
+name = "futures-util"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "pin-utils",
+ "slab",
+]
+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+[[package]]
+name = "getrandom"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "libc",
+ "wasi 0.11.0+wasi-snapshot-preview1",
+ "wasm-bindgen",
+]
+[[package]]
+name = "getrandom"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi 0.13.3+wasi-0.2.2",
+ "windows-targets",
+]
+[[package]]
+name = "gimli"
+version = "0.31.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
+[[package]]
+name = "globset"
+version = "0.4.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19"
+dependencies = [
+ "aho-corasick",
+ "bstr",
+ "log",
+ "regex-automata",
+ "regex-syntax",
+]
+[[package]]
+name = "h2"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "http",
+ "indexmap 2.7.1",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+[[package]]
+name = "hashbrown"
+version = "0.15.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+[[package]]
+name = "http"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea"
+dependencies = [
+ "bytes",
+ "fnv",
+ "itoa",
+]
+[[package]]
+name = "http-body"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
+dependencies = [
+ "bytes",
+ "http",
+]
+[[package]]
+name = "http-body-util"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f"
+dependencies = [
+ "bytes",
+ "futures-util",
+ "http",
+ "http-body",
+ "pin-project-lite",
+]
+[[package]]
+name = "httparse"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2d708df4e7140240a16cd6ab0ab65c972d7433ab77819ea693fde9c43811e2a"
+[[package]]
+name = "httpdate"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
+[[package]]
+name = "hyper"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80"
+dependencies = [
+ "bytes",
+ "futures-channel",
+ "futures-util",
+ "h2",
+ "http",
+ "http-body",
+ "httparse",
+ "httpdate",
+ "itoa",
+ "pin-project-lite",
+ "smallvec",
+ "tokio",
+ "want",
+]
+[[package]]
+name = "hyper-rustls"
+version = "0.27.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2"
+dependencies = [
+ "futures-util",
+ "http",
+ "hyper",
+ "hyper-util",
+ "rustls",
+ "rustls-pki-types",
+ "tokio",
+ "tokio-rustls",
+ "tower-service",
+ "webpki-roots",
+]
+[[package]]
+name = "hyper-timeout"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0"
+dependencies = [
+ "hyper",
+ "hyper-util",
+ "pin-project-lite",
+ "tokio",
+ "tower-service",
+]
+[[package]]
+name = "hyper-util"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4"
+dependencies = [
+ "bytes",
+ "futures-channel",
+ "futures-util",
+ "http",
+ "http-body",
+ "hyper",
+ "pin-project-lite",
+ "socket2",
+ "tokio",
+ "tower-service",
+ "tracing",
+]
+[[package]]
+name = "iana-time-zone"
+version = "0.1.61"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "wasm-bindgen",
+ "windows-core",
+]
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+[[package]]
+name = "icu_collections"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+[[package]]
+name = "icu_locid"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
+dependencies = [
+ "displaydoc",
+ "litemap",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+[[package]]
+name = "icu_locid_transform"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
+dependencies = [
+ "displaydoc",
+ "icu_locid",
+ "icu_locid_transform_data",
+ "icu_provider",
+ "tinystr",
+ "zerovec",
+]
+[[package]]
+name = "icu_locid_transform_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
+[[package]]
+name = "icu_normalizer"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
+dependencies = [
+ "displaydoc",
+ "icu_collections",
+ "icu_normalizer_data",
+ "icu_properties",
+ "icu_provider",
+ "smallvec",
+ "utf16_iter",
+ "utf8_iter",
+ "write16",
+ "zerovec",
+]
+[[package]]
+name = "icu_normalizer_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
+[[package]]
+name = "icu_properties"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
+dependencies = [
+ "displaydoc",
+ "icu_collections",
+ "icu_locid_transform",
+ "icu_properties_data",
+ "icu_provider",
+ "tinystr",
+ "zerovec",
+]
+[[package]]
+name = "icu_properties_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
+[[package]]
+name = "icu_provider"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
+dependencies = [
+ "displaydoc",
+ "icu_locid",
+ "icu_provider_macros",
+ "stable_deref_trait",
+ "tinystr",
+ "writeable",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+[[package]]
+name = "icu_provider_macros"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "ident_case"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
+[[package]]
+name = "idna"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
+dependencies = [
+ "idna_adapter",
+ "smallvec",
+ "utf8_iter",
+]
+[[package]]
+name = "idna_adapter"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
+dependencies = [
+ "icu_normalizer",
+ "icu_properties",
+]
+[[package]]
+name = "indexmap"
+version = "1.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
+dependencies = [
+ "autocfg",
+ "hashbrown 0.12.3",
+]
+[[package]]
+name = "indexmap"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.15.2",
+]
+[[package]]
+name = "inlinable_string"
+version = "0.1.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
+[[package]]
+name = "insta"
+version = "1.42.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71c1b125e30d93896b365e156c33dadfffab45ee8400afcbba4752f59de08a86"
+dependencies = [
+ "console",
+ "globset",
+ "linked-hash-map",
+ "once_cell",
+ "pest",
+ "pest_derive",
+ "pin-project",
+ "serde",
+ "similar",
+ "walkdir",
+]
+[[package]]
+name = "ipnet"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
+[[package]]
+name = "itertools"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
+dependencies = [
+ "either",
+]
+[[package]]
+name = "itoa"
+version = "1.0.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
+[[package]]
+name = "js-sys"
+version = "0.3.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+[[package]]
+name = "libc"
+version = "0.2.169"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
+[[package]]
+name = "linked-hash-map"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
+[[package]]
+name = "linux-raw-sys"
+version = "0.4.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
+[[package]]
+name = "litemap"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
+[[package]]
+name = "local-ip-address"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3669cf5561f8d27e8fc84cc15e58350e70f557d4d65f70e3154e54cd2f8e1782"
+dependencies = [
+ "libc",
+ "neli",
+ "thiserror 1.0.69",
+ "windows-sys 0.59.0",
+]
+[[package]]
+name = "lock_api"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
+dependencies = [
+ "autocfg",
+ "scopeguard",
+]
+[[package]]
+name = "log"
+version = "0.4.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f"
+[[package]]
+name = "matchit"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
+[[package]]
+name = "matchit"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
+[[package]]
+name = "memchr"
+version = "2.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
+[[package]]
+name = "mime"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
+[[package]]
+name = "miniz_oxide"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924"
+dependencies = [
+ "adler2",
+]
+[[package]]
+name = "mio"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
+dependencies = [
+ "libc",
+ "wasi 0.11.0+wasi-snapshot-preview1",
+ "windows-sys 0.52.0",
+]
+[[package]]
+name = "multimap"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03"
+[[package]]
+name = "neli"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93062a0dce6da2517ea35f301dfc88184ce18d3601ec786a727a87bf535deca9"
+dependencies = [
+ "byteorder",
+ "libc",
+ "log",
+ "neli-proc-macros",
+]
+[[package]]
+name = "neli-proc-macros"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c8034b7fbb6f9455b2a96c19e6edf8dc9fc34c70449938d8ee3b4df363f61fe"
+dependencies = [
+ "either",
+ "proc-macro2",
+ "quote",
+ "serde",
+ "syn 1.0.109",
+]
+[[package]]
+name = "nid"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4abdf1789932b85dc39446e27f45a1064a30f9e19a2b872b1d09bd59283f85f3"
+dependencies = [
+ "rand",
+ "serde",
+ "thiserror 1.0.69",
+]
+[[package]]
+name = "nix"
+version = "0.29.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
+dependencies = [
+ "bitflags",
+ "cfg-if",
+ "cfg_aliases",
+ "libc",
+]
+[[package]]
+name = "nkeys"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f49e787f4c61cbd0f9320b31cc26e58719f6aa5068e34697dd3aea361412fe3"
+dependencies = [
+ "data-encoding",
+ "ed25519",
+ "ed25519-dalek",
+ "getrandom 0.2.15",
+ "log",
+ "rand",
+ "signatory",
+]
+[[package]]
+name = "nuid"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc895af95856f929163a0aa20c26a78d26bfdc839f51b9d5aa7a5b79e52b7e83"
+dependencies = [
+ "rand",
+]
+[[package]]
+name = "num-conv"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+[[package]]
+name = "object"
+version = "0.36.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
+dependencies = [
+ "memchr",
+]
+[[package]]
+name = "once_cell"
+version = "1.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
+[[package]]
+name = "openssl-probe"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
+[[package]]
+name = "parking_lot"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+[[package]]
+name = "parking_lot_core"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-targets",
+]
+[[package]]
+name = "pear"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467"
+dependencies = [
+ "inlinable_string",
+ "pear_codegen",
+ "yansi",
+]
+[[package]]
+name = "pear_codegen"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147"
+dependencies = [
+ "proc-macro2",
+ "proc-macro2-diagnostics",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "pem-rfc7468"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412"
+dependencies = [
+ "base64ct",
+]
+[[package]]
+name = "percent-encoding"
+version = "2.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
+[[package]]
+name = "pest"
+version = "2.7.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b7cafe60d6cf8e62e1b9b2ea516a089c008945bb5a275416789e7db0bc199dc"
+dependencies = [
+ "memchr",
+ "thiserror 2.0.11",
+ "ucd-trie",
+]
+[[package]]
+name = "pest_derive"
+version = "2.7.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "816518421cfc6887a0d62bf441b6ffb4536fcc926395a69e1a85852d4363f57e"
+dependencies = [
+ "pest",
+ "pest_generator",
+]
+[[package]]
+name = "pest_generator"
+version = "2.7.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d1396fd3a870fc7838768d171b4616d5c91f6cc25e377b673d714567d99377b"
+dependencies = [
+ "pest",
+ "pest_meta",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "pest_meta"
+version = "2.7.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1e58089ea25d717bfd31fb534e4f3afcc2cc569c70de3e239778991ea3b7dea"
+dependencies = [
+ "once_cell",
+ "pest",
+ "sha2",
+]
+[[package]]
+name = "petgraph"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
+dependencies = [
+ "fixedbitset",
+ "indexmap 2.7.1",
+]
+[[package]]
+name = "pin-project"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfe2e71e1471fe07709406bf725f710b02927c9c54b2b5b2ec0e8087d97c327d"
+dependencies = [
+ "pin-project-internal",
+]
+[[package]]
+name = "pin-project-internal"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6e859e6e5bd50440ab63c47e3ebabc90f26251f7c73c3d3e837b74a1cc3fa67"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+[[package]]
+name = "pkcs8"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
+dependencies = [
+ "der",
+ "spki",
+]
+[[package]]
+name = "portable-atomic"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+[[package]]
+name = "ppv-lite86"
+version = "0.2.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
+dependencies = [
+ "zerocopy",
+]
+[[package]]
+name = "prettyplease"
+version = "0.2.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac"
+dependencies = [
+ "proc-macro2",
+ "syn 2.0.98",
+]
+[[package]]
+name = "proc-macro-error-attr2"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+]
+[[package]]
+name = "proc-macro-error2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802"
+dependencies = [
+ "proc-macro-error-attr2",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "proc-macro2"
+version = "1.0.93"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
+dependencies = [
+ "unicode-ident",
+]
+[[package]]
+name = "proc-macro2-diagnostics"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+ "version_check",
+ "yansi",
+]
+[[package]]
+name = "prometheus"
+version = "0.13.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1"
+dependencies = [
+ "cfg-if",
+ "fnv",
+ "lazy_static",
+ "memchr",
+ "parking_lot",
+ "protobuf",
+ "thiserror 1.0.69",
+]
+[[package]]
+name = "proptest"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14cae93065090804185d3b75f0bf93b8eeda30c7a9b4a33d3bdb3988d6229e50"
+dependencies = [
+ "bit-set",
+ "bit-vec",
+ "bitflags",
+ "lazy_static",
+ "num-traits",
+ "rand",
+ "rand_chacha",
+ "rand_xorshift",
+ "regex-syntax",
+ "rusty-fork",
+ "tempfile",
+ "unarray",
+]
+[[package]]
+name = "prost"
+version = "0.13.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec"
+dependencies = [
+ "bytes",
+ "prost-derive",
+]
+[[package]]
+name = "prost-build"
+version = "0.13.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0f3e5beed80eb580c68e2c600937ac2c4eedabdfd5ef1e5b7ea4f3fba84497b"
+dependencies = [
+ "heck",
+ "itertools",
+ "log",
+ "multimap",
+ "once_cell",
+ "petgraph",
+ "prettyplease",
+ "prost",
+ "prost-types",
+ "regex",
+ "syn 2.0.98",
+ "tempfile",
+]
+[[package]]
+name = "prost-derive"
+version = "0.13.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3"
+dependencies = [
+ "anyhow",
+ "itertools",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "prost-types"
+version = "0.13.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc2f1e56baa61e93533aebc21af4d2134b70f66275e0fcdf3cbe43d77ff7e8fc"
+dependencies = [
+ "prost",
+]
+[[package]]
+name = "protobuf"
+version = "2.28.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
+[[package]]
+name = "quick-error"
+version = "1.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
+[[package]]
+name = "quinn"
+version = "0.11.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef"
+dependencies = [
+ "bytes",
+ "pin-project-lite",
+ "quinn-proto",
+ "quinn-udp",
+ "rustc-hash",
+ "rustls",
+ "socket2",
+ "thiserror 2.0.11",
+ "tokio",
+ "tracing",
+]
+[[package]]
+name = "quinn-proto"
+version = "0.11.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d"
+dependencies = [
+ "bytes",
+ "getrandom 0.2.15",
+ "rand",
+ "ring",
+ "rustc-hash",
+ "rustls",
+ "rustls-pki-types",
+ "slab",
+ "thiserror 2.0.11",
+ "tinyvec",
+ "tracing",
+ "web-time",
+]
+[[package]]
+name = "quinn-udp"
+version = "0.5.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1c40286217b4ba3a71d644d752e6a0b71f13f1b6a2c5311acfcbe0c2418ed904"
+dependencies = [
+ "cfg_aliases",
+ "libc",
+ "once_cell",
+ "socket2",
+ "tracing",
+ "windows-sys 0.52.0",
+]
+[[package]]
+name = "quote"
+version = "1.0.38"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
+dependencies = [
+ "proc-macro2",
+]
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom 0.2.15",
+]
+[[package]]
+name = "rand_xorshift"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f"
+dependencies = [
+ "rand_core",
+]
+[[package]]
+name = "redox_syscall"
+version = "0.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
+dependencies = [
+ "bitflags",
+]
+[[package]]
+name = "regex"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+[[package]]
+name = "regex-automata"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+[[package]]
+name = "regex-syntax"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
+[[package]]
+name = "reqwest"
+version = "0.12.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da"
+dependencies = [
+ "base64",
+ "bytes",
+ "futures-core",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-rustls",
+ "hyper-util",
+ "ipnet",
+ "js-sys",
+ "log",
+ "mime",
+ "once_cell",
+ "percent-encoding",
+ "pin-project-lite",
+ "quinn",
+ "rustls",
+ "rustls-pemfile",
+ "rustls-pki-types",
+ "serde",
+ "serde_json",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "tokio",
+ "tokio-rustls",
+ "tokio-util",
+ "tower 0.5.2",
+ "tower-service",
+ "url",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "wasm-streams",
+ "web-sys",
+ "webpki-roots",
+ "windows-registry",
+]
+[[package]]
+name = "ring"
+version = "0.17.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "getrandom 0.2.15",
+ "libc",
+ "spin",
+ "untrusted",
+ "windows-sys 0.52.0",
+]
+[[package]]
+name = "rustc-demangle"
+version = "0.1.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
+[[package]]
+name = "rustc-hash"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497"
+[[package]]
+name = "rustc_version"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
+dependencies = [
+ "semver",
+]
+[[package]]
+name = "rustix"
+version = "0.38.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.59.0",
+]
+[[package]]
+name = "rustls"
+version = "0.23.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fb9263ab4eb695e42321db096e3b8fbd715a59b154d5c88d82db2175b681ba7"
+dependencies = [
+ "once_cell",
+ "ring",
+ "rustls-pki-types",
+ "rustls-webpki",
+ "subtle",
+ "zeroize",
+]
+[[package]]
+name = "rustls-native-certs"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5"
+dependencies = [
+ "openssl-probe",
+ "rustls-pemfile",
+ "rustls-pki-types",
+ "schannel",
+ "security-framework 2.11.1",
+]
+[[package]]
+name = "rustls-native-certs"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3"
+dependencies = [
+ "openssl-probe",
+ "rustls-pki-types",
+ "schannel",
+ "security-framework 3.2.0",
+]
+[[package]]
+name = "rustls-pemfile"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50"
+dependencies = [
+ "rustls-pki-types",
+]
+[[package]]
+name = "rustls-pki-types"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c"
+dependencies = [
+ "web-time",
+]
+[[package]]
+name = "rustls-webpki"
+version = "0.102.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9"
+dependencies = [
+ "ring",
+ "rustls-pki-types",
+ "untrusted",
+]
+[[package]]
+name = "rustversion"
+version = "1.0.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4"
+[[package]]
+name = "rusty-fork"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f"
+dependencies = [
+ "fnv",
+ "quick-error",
+ "tempfile",
+ "wait-timeout",
+]
+[[package]]
+name = "ryu"
+version = "1.0.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd"
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+[[package]]
+name = "schannel"
+version = "0.1.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d"
+dependencies = [
+ "windows-sys 0.59.0",
+]
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+[[package]]
+name = "security-framework"
+version = "2.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
+dependencies = [
+ "bitflags",
+ "core-foundation 0.9.4",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+[[package]]
+name = "security-framework"
+version = "3.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316"
+dependencies = [
+ "bitflags",
+ "core-foundation 0.10.0",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+[[package]]
+name = "security-framework-sys"
+version = "2.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+[[package]]
+name = "semver"
+version = "1.0.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03"
+[[package]]
+name = "serde"
+version = "1.0.217"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
+dependencies = [
+ "serde_derive",
+]
+[[package]]
+name = "serde_derive"
+version = "1.0.217"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "serde_json"
+version = "1.0.138"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+]
+[[package]]
+name = "serde_nanos"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a93142f0367a4cc53ae0fead1bcda39e85beccfad3dcd717656cacab94b12985"
+dependencies = [
+ "serde",
+]
+[[package]]
+name = "serde_path_to_error"
+version = "0.1.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6"
+dependencies = [
+ "itoa",
+ "serde",
+]
+[[package]]
+name = "serde_repr"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "serde_spanned"
+version = "0.6.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1"
+dependencies = [
+ "serde",
+]
+[[package]]
+name = "serde_urlencoded"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
+dependencies = [
+ "form_urlencoded",
+ "itoa",
+ "ryu",
+ "serde",
+]
+[[package]]
+name = "sha2"
+version = "0.10.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1"
+dependencies = [
+ "libc",
+]
+[[package]]
+name = "signatory"
+version = "0.27.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1e303f8205714074f6068773f0e29527e0453937fe837c9717d066635b65f31"
+dependencies = [
+ "pkcs8",
+ "rand_core",
+ "signature",
+ "zeroize",
+]
+[[package]]
+name = "signature"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
+dependencies = [
+ "digest",
+ "rand_core",
+]
+[[package]]
+name = "similar"
+version = "2.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
+[[package]]
+name = "slab"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
+dependencies = [
+ "autocfg",
+]
+[[package]]
+name = "smallvec"
+version = "1.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
+[[package]]
+name = "socket2"
+version = "0.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+[[package]]
+name = "spin"
+version = "0.9.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
+[[package]]
+name = "spki"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d"
+dependencies = [
+ "base64ct",
+ "der",
+]
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+[[package]]
+name = "subtle"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+[[package]]
+name = "syn"
+version = "2.0.98"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+[[package]]
+name = "sync_wrapper"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
+dependencies = [
+ "futures-core",
+]
+[[package]]
+name = "synstructure"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "tempfile"
+version = "3.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38c246215d7d24f48ae091a2902398798e05d978b24315d6efbc00ede9a8bb91"
+dependencies = [
+ "cfg-if",
+ "fastrand",
+ "getrandom 0.3.1",
+ "once_cell",
+ "rustix",
+ "windows-sys 0.59.0",
+]
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl 1.0.69",
+]
+[[package]]
+name = "thiserror"
+version = "2.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc"
+dependencies = [
+ "thiserror-impl 2.0.11",
+]
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "thiserror-impl"
+version = "2.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "time"
+version = "0.3.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21"
+dependencies = [
+ "deranged",
+ "itoa",
+ "num-conv",
+ "powerfmt",
+ "serde",
+ "time-core",
+ "time-macros",
+]
+[[package]]
+name = "time-core"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
+[[package]]
+name = "time-macros"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de"
+dependencies = [
+ "num-conv",
+ "time-core",
+]
+[[package]]
+name = "tinystr"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
+dependencies = [
+ "displaydoc",
+ "zerovec",
+]
+[[package]]
+name = "tinyvec"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8"
+dependencies = [
+ "tinyvec_macros",
+]
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+[[package]]
+name = "tokio"
+version = "1.43.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e"
+dependencies = [
+ "backtrace",
+ "bytes",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.52.0",
+]
+[[package]]
+name = "tokio-macros"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "tokio-rustls"
+version = "0.26.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37"
+dependencies = [
+ "rustls",
+ "tokio",
+]
+[[package]]
+name = "tokio-stream"
+version = "0.1.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047"
+dependencies = [
+ "futures-core",
+ "pin-project-lite",
+ "tokio",
+]
+[[package]]
+name = "tokio-util"
+version = "0.7.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "pin-project-lite",
+ "tokio",
+]
+[[package]]
+name = "tokio-websockets"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f591660438b3038dd04d16c938271c79e7e06260ad2ea2885a4861bfb238605d"
+dependencies = [
+ "base64",
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "http",
+ "httparse",
+ "rand",
+ "ring",
+ "rustls-native-certs 0.8.1",
+ "rustls-pki-types",
+ "tokio",
+ "tokio-rustls",
+ "tokio-util",
+]
+[[package]]
+name = "toml"
+version = "0.8.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148"
+dependencies = [
+ "serde",
+ "serde_spanned",
+ "toml_datetime",
+ "toml_edit",
+]
+[[package]]
+name = "toml_datetime"
+version = "0.6.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
+dependencies = [
+ "serde",
+]
+[[package]]
+name = "toml_edit"
+version = "0.22.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02a8b472d1a3d7c18e2d61a489aee3453fd9031c33e4f55bd533f4a7adca1bee"
+dependencies = [
+ "indexmap 2.7.1",
+ "serde",
+ "serde_spanned",
+ "toml_datetime",
+ "winnow",
+]
+[[package]]
+name = "tonic"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
+dependencies = [
+ "async-stream",
+ "async-trait",
+ "axum 0.7.9",
+ "base64",
+ "bytes",
+ "h2",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-timeout",
+ "hyper-util",
+ "percent-encoding",
+ "pin-project",
+ "prost",
+ "socket2",
+ "tokio",
+ "tokio-stream",
+ "tower 0.4.13",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+[[package]]
+name = "tonic-build"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11"
+dependencies = [
+ "prettyplease",
+ "proc-macro2",
+ "prost-build",
+ "prost-types",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "tower"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "indexmap 1.9.3",
+ "pin-project",
+ "pin-project-lite",
+ "rand",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+[[package]]
+name = "tower"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "pin-project-lite",
+ "sync_wrapper",
+ "tokio",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+[[package]]
+name = "tower-layer"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
+[[package]]
+name = "tower-service"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
+[[package]]
+name = "tracing"
+version = "0.1.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
+dependencies = [
+ "log",
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+[[package]]
+name = "tracing-attributes"
+version = "0.1.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "tracing-core"
+version = "0.1.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c"
+dependencies = [
+ "once_cell",
+]
+[[package]]
+name = "triton-distributed"
+version = "0.1.3"
+dependencies = [
+ "anyhow",
+ "async-nats",
+ "async-once-cell",
+ "async-stream",
+ "async-trait",
+ "blake3",
+ "bytes",
+ "derive-getters",
+ "derive_builder",
+ "educe",
+ "either",
+ "etcd-client",
+ "figment",
+ "futures",
+ "local-ip-address",
+ "nid",
+ "nix",
+ "nuid",
+ "once_cell",
+ "prometheus",
+ "rand",
+ "regex",
+ "serde",
+ "serde_json",
+ "thiserror 1.0.69",
+ "tokio",
+ "tokio-stream",
+ "tokio-util",
+ "tracing",
+ "uuid",
+ "validator",
+ "xxhash-rust",
+]
+[[package]]
+name = "triton-llm"
+version = "0.1.3"
+dependencies = [
+ "anyhow",
+ "async-stream",
+ "async-trait",
+ "axum 0.8.1",
+ "bytes",
+ "chrono",
+ "derive_builder",
+ "futures",
+ "insta",
+ "prometheus",
+ "proptest",
+ "regex",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.11",
+ "tokio",
+ "tokio-stream",
+ "tokio-util",
+ "tracing",
+ "triton-distributed",
+ "unicode-segmentation",
+ "uuid",
+ "validator",
+]
+[[package]]
+name = "try-lock"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
+[[package]]
+name = "tryhard"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c9f0a709784e86923586cff0d872dba54cd2d2e116b3bc57587d15737cfce9d"
+dependencies = [
+ "futures",
+ "pin-project-lite",
+ "tokio",
+]
+[[package]]
+name = "typenum"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
+[[package]]
+name = "ucd-trie"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
+[[package]]
+name = "unarray"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
+[[package]]
+name = "uncased"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697"
+dependencies = [
+ "version_check",
+]
+[[package]]
+name = "unicode-ident"
+version = "1.0.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
+[[package]]
+name = "unicode-segmentation"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
+[[package]]
+name = "untrusted"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
+[[package]]
+name = "url"
+version = "2.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+]
+[[package]]
+name = "utf16_iter"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
+[[package]]
+name = "utf8_iter"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
+[[package]]
+name = "uuid"
+version = "1.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ced87ca4be083373936a67f8de945faa23b6b42384bd5b64434850802c6dccd0"
+dependencies = [
+ "getrandom 0.3.1",
+ "serde",
+]
+[[package]]
+name = "validator"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43fb22e1a008ece370ce08a3e9e4447a910e92621bb49b85d6e48a45397e7cfa"
+dependencies = [
+ "idna",
+ "once_cell",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "url",
+ "validator_derive",
+]
+[[package]]
+name = "validator_derive"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7df16e474ef958526d1205f6dda359fdfab79d9aa6d54bafcb92dcd07673dca"
+dependencies = [
+ "darling",
+ "once_cell",
+ "proc-macro-error2",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+[[package]]
+name = "wait-timeout"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
+dependencies = [
+ "libc",
+]
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+[[package]]
+name = "want"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
+dependencies = [
+ "try-lock",
+]
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+[[package]]
+name = "wasi"
+version = "0.13.3+wasi-0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
+dependencies = [
+ "wit-bindgen-rt",
+]
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+]
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
+dependencies = [
+ "bumpalo",
+ "log",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+ "wasm-bindgen-shared",
+]
+[[package]]
+name = "wasm-bindgen-futures"
+version = "0.4.50"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "once_cell",
+ "wasm-bindgen",
+ "web-sys",
+]
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
+dependencies = [
+ "unicode-ident",
+]
+[[package]]
+name = "wasm-streams"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
+dependencies = [
+ "futures-util",
+ "js-sys",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+]
+[[package]]
+name = "web-sys"
+version = "0.3.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+[[package]]
+name = "web-time"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+[[package]]
+name = "webpki-roots"
+version = "0.26.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2210b291f7ea53617fbafcc4939f10914214ec15aace5ba62293a668f322c5c9"
+dependencies = [
+ "rustls-pki-types",
+]
+[[package]]
+name = "winapi-util"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
+dependencies = [
+ "windows-sys 0.59.0",
+]
+[[package]]
+name = "windows-core"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
+dependencies = [
+ "windows-targets",
+]
+[[package]]
+name = "windows-registry"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0"
+dependencies = [
+ "windows-result",
+ "windows-strings",
+ "windows-targets",
+]
+[[package]]
+name = "windows-result"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
+dependencies = [
+ "windows-targets",
+]
+[[package]]
+name = "windows-strings"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
+dependencies = [
+ "windows-result",
+ "windows-targets",
+]
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets",
+]
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+[[package]]
+name = "winnow"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86e376c75f4f43f44db463cf729e0d3acbf954d13e22c51e26e4c264b4ab545f"
+dependencies = [
+ "memchr",
+]
+[[package]]
+name = "wit-bindgen-rt"
+version = "0.33.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
+dependencies = [
+ "bitflags",
+]
+[[package]]
+name = "write16"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
+[[package]]
+name = "writeable"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
+[[package]]
+name = "xxhash-rust"
+version = "0.8.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
+[[package]]
+name = "yansi"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
+[[package]]
+name = "yoke"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40"
+dependencies = [
+ "serde",
+ "stable_deref_trait",
+ "yoke-derive",
+ "zerofrom",
+]
+[[package]]
+name = "yoke-derive"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+ "synstructure",
+]
+[[package]]
+name = "zerocopy"
+version = "0.7.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
+dependencies = [
+ "byteorder",
+ "zerocopy-derive",
+]
+[[package]]
+name = "zerocopy-derive"
+version = "0.7.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+[[package]]
+name = "zerofrom"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e"
+dependencies = [
+ "zerofrom-derive",
+]
+[[package]]
+name = "zerofrom-derive"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+ "synstructure",
+]
+[[package]]
+name = "zeroize"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
+[[package]]
+name = "zerovec"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
+dependencies = [
+ "yoke",
+ "zerofrom",
+ "zerovec-derive",
+]
+[[package]]
+name = "zerovec-derive"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
--- a/llm/rust/Cargo.toml
+++ b/llm/rust/Cargo.toml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+[workspace]
+members = [
+    "triton-llm",
+]
+resolver = "2"
+[workspace.package]
+version = "0.1.3"
+edition = "2021"
+authors = ["NVIDIA"]
+license = "Apache-2.0"
+homepage = "https://github.com/triton-inference-server/triton_distributed"
+repository = "https://github.com/triton-inference-server/triton_distributed"
+[workspace.dependencies]
+# local or crates.io
+triton-distributed = { version = "0.1.3", path = "../../runtime/rust" }
+# crates.io
+anyhow = { version = "1" }
+async-stream = { version = "0.3" }
+async-trait = { version = "0.1" }
+bytes = "1"
+derive_builder = "0.20"
+futures = "0.3"
+serde = { version = "1", features = ["derive"] }
+thiserror = { version = "2.0.11" }
+tokio = { version = "1", features = ["full"] }
+tokio-stream = { version = "0.1" }
+tokio-util = { version = "0.7", features = ["codec", "net"] }
+tracing = { version = "0.1" }
+validator = { version = "0.20.0", features = ["derive"] }
+uuid = { version = "1", features = ["v4", "serde"] }
--- a/llm/rust/triton-llm/Cargo.toml
+++ b/llm/rust/triton-llm/Cargo.toml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+[package]
+name = "triton-llm"
+version.workspace = true
+edition.workspace = true
+authors.workspace = true
+license.workspace = true
+homepage.workspace = true
+[dependencies]
+# repo
+triton-distributed = { workspace = true }
+# workspace
+anyhow = { workspace = true }
+async-stream = { workspace = true }
+async-trait = { workspace = true }
+bytes = { workspace = true }
+derive_builder = {workspace = true }
+futures =  { workspace = true }
+serde = { workspace = true }
+thiserror = { workspace = true }
+tokio = { workspace = true }
+tokio-stream = { workspace = true }
+tokio-util = { workspace = true }
+tracing = { workspace = true }
+validator = { workspace = true }
+uuid = { workspace = true }
+# protocols
+chrono = { version = "0.4" }
+serde_json = { version = "1" }
+regex = "1"
+unicode-segmentation = "1.12"
+# http-service
+axum = "0.8"
+prometheus = { version = "0.13" }
+[dev-dependencies]
+insta = { version = "1.41", features = ["glob", "json", "redactions"]}
+proptest = "1.5.0"
+reqwest = { version = "0.12", default-features = false, features = ["json", "stream", "rustls-tls"] }
--- a/llm/rust/triton-llm/src/http.rs
+++ b/llm/rust/triton-llm/src/http.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+pub mod service;
--- a/llm/rust/triton-llm/src/http/service.rs
+++ b/llm/rust/triton-llm/src/http/service.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//! HTTP Service for Nova LLM
+//!
+//! The primary purpose of this crate is to service the nova-llm-protocols via OpenAI compatible HTTP endpoints. This component
+//! is meant to be a gateway/ingress into the Nova LLM Distributed Runtime.
+//!
+//! In order to create a common pattern, the HttpService forwards the incoming OAI Chat Request or OAI Completion Request to the
+//! to a model-specific engines.  The engines can be attached and detached dynamically using the [`ModelManager`].
+//!
+//! Note: All requests, whether the client requests `stream=true` or `stream=false`, are propagated downstream as `stream=true`.
+//! This enables use to handle only 1 pattern of request-response in the downstream services. Non-streaming user requests are
+//! aggregated by the HttpService and returned as a single response.
+//!
+//! TODO(): Add support for model-specific metadata and status. Status will allow us to return a 503 when the model is supposed
+//! to be ready, but there is a problem with the model.
+//!
+//! The [`service::HttpService`] can be further extended to host any [`axum::Router`] using the [`service::HttpServiceBuilder`].
+mod openai;
+pub mod error;
+pub mod metrics;
+pub mod service_v2;
+// #[cfg(feature = "py3")]
+// pub mod py3;
+pub use async_trait::async_trait;
+pub use axum;
+pub use error::ServiceHttpError;
+pub use metrics::Metrics;
+use crate::types::openai::{
+    chat_completions::OpenAIChatCompletionsStreamingEngine,
+    completions::OpenAICompletionsStreamingEngine,
+};
+use std::{
+    collections::HashMap,
+    sync::{Arc, Mutex},
+};
+#[derive(Clone)]
+pub struct ModelManager {
+    state: Arc<DeploymentState>,
+}
+impl Default for ModelManager {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+impl ModelManager {
+    pub fn new() -> Self {
+        let state = Arc::new(DeploymentState::new());
+        Self { state }
+    }
+    pub fn state(&self) -> Arc<DeploymentState> {
+        self.state.clone()
+    }
+    pub fn has_model_any(&self, model: &str) -> bool {
+        self.state
+            .chat_completion_engines
+            .lock()
+            .unwrap()
+            .contains(model)
+            || self
+                .state
+                .completion_engines
+                .lock()
+                .unwrap()
+                .contains(model)
+    }
+    pub fn list_chat_completions_models(&self) -> Vec<String> {
+        self.state.chat_completion_engines.lock().unwrap().list()
+    }
+    pub fn list_completions_models(&self) -> Vec<String> {
+        self.state.completion_engines.lock().unwrap().list()
+    }
+    pub fn add_completions_model(
+        &self,
+        model: &str,
+        engine: OpenAICompletionsStreamingEngine,
+    ) -> Result<(), ServiceHttpError> {
+        let mut clients = self.state.completion_engines.lock().unwrap();
+        clients.add(model, engine)
+    }
+    pub fn add_chat_completions_model(
+        &self,
+        model: &str,
+        engine: OpenAIChatCompletionsStreamingEngine,
+    ) -> Result<(), ServiceHttpError> {
+        let mut clients = self.state.chat_completion_engines.lock().unwrap();
+        clients.add(model, engine)
+    }
+    pub fn remove_completions_model(&self, model: &str) -> Result<(), ServiceHttpError> {
+        let mut clients = self.state.completion_engines.lock().unwrap();
+        clients.remove(model)
+    }
+    pub fn remove_chat_completions_model(&self, model: &str) -> Result<(), ServiceHttpError> {
+        let mut clients = self.state.chat_completion_engines.lock().unwrap();
+        clients.remove(model)
+    }
+    /// Get the Prometheus [`Metrics`] object which tracks request counts and inflight requests
+    pub fn metrics(&self) -> Arc<Metrics> {
+        self.state.metrics.clone()
+    }
+}
+struct ModelEngines<E> {
+    /// Optional default model name
+    default: Option<String>,
+    engines: HashMap<String, E>,
+}
+impl<E> Default for ModelEngines<E> {
+    fn default() -> Self {
+        Self {
+            default: None,
+            engines: HashMap::new(),
+        }
+    }
+}
+impl<E> ModelEngines<E> {
+    #[allow(dead_code)]
+    fn set_default(&mut self, model: &str) {
+        self.default = Some(model.to_string());
+    }
+    #[allow(dead_code)]
+    fn clear_default(&mut self) {
+        self.default = None;
+    }
+    fn add(&mut self, model: &str, engine: E) -> Result<(), ServiceHttpError> {
+        if self.engines.contains_key(model) {
+            return Err(ServiceHttpError::ModelAlreadyExists(model.to_string()));
+        }
+        self.engines.insert(model.to_string(), engine);
+        Ok(())
+    }
+    fn remove(&mut self, model: &str) -> Result<(), ServiceHttpError> {
+        if self.engines.remove(model).is_none() {
+            return Err(ServiceHttpError::ModelNotFound(model.to_string()));
+        }
+        Ok(())
+    }
+    fn get(&self, model: &str) -> Option<&E> {
+        self.engines.get(model)
+    }
+    fn contains(&self, model: &str) -> bool {
+        self.engines.contains_key(model)
+    }
+    fn list(&self) -> Vec<String> {
+        self.engines.keys().map(|k| k.to_owned()).collect()
+    }
+}
+/// The DeploymentState is a global state that is shared across all the workers
+/// this provides set of known clients to Engines
+pub struct DeploymentState {
+    completion_engines: Arc<Mutex<ModelEngines<OpenAICompletionsStreamingEngine>>>,
+    chat_completion_engines: Arc<Mutex<ModelEngines<OpenAIChatCompletionsStreamingEngine>>>,
+    metrics: Arc<Metrics>,
+}
+impl DeploymentState {
+    fn new() -> Self {
+        Self {
+            completion_engines: Arc::new(Mutex::new(ModelEngines::default())),
+            chat_completion_engines: Arc::new(Mutex::new(ModelEngines::default())),
+            metrics: Arc::new(Metrics::default()),
+        }
+    }
+    fn get_completions_engine(
+        &self,
+        model: &str,
+    ) -> Result<OpenAICompletionsStreamingEngine, ServiceHttpError> {
+        self.completion_engines
+            .lock()
+            .unwrap()
+            .get(model)
+            .cloned()
+            .ok_or(ServiceHttpError::ModelNotFound(model.to_string()))
+    }
+    fn get_chat_completions_engine(
+        &self,
+        model: &str,
+    ) -> Result<OpenAIChatCompletionsStreamingEngine, ServiceHttpError> {
+        self.chat_completion_engines
+            .lock()
+            .unwrap()
+            .get(model)
+            .cloned()
+            .ok_or(ServiceHttpError::ModelNotFound(model.to_string()))
+    }
+}
+/// Documentation for a route
+#[derive(Debug)]
+pub struct RouteDoc {
+    method: axum::http::Method,
+    path: String,
+}
+impl std::fmt::Display for RouteDoc {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "{} {}", self.method, self.path)
+    }
+}
+impl RouteDoc {
+    pub fn new<T: Into<String>>(method: axum::http::Method, path: T) -> Self {
+        RouteDoc {
+            method,
+            path: path.into(),
+        }
+    }
+}
--- a/llm/rust/triton-llm/src/http/service/error.rs
+++ b/llm/rust/triton-llm/src/http/service/error.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+use thiserror::Error;
+#[derive(Debug, Error)]
+pub enum ServiceHttpError {
+    #[error("Model not found: {0}")]
+    ModelNotFound(String),
+    #[error("Model already exists: {0}")]
+    ModelAlreadyExists(String),
+}
+/// Implementation of the Completion Engines served by the HTTP service should
+/// map their custom errors to to this error type if they wish to return error
+/// codes besides 500.
+#[derive(Debug, Error)]
+#[error("HTTP Error {code}: {message}")]
+pub struct HttpError {
+    pub code: u16,
+    pub message: String,
+}
--- a/llm/rust/triton-llm/src/http/service/metrics.rs
+++ b/llm/rust/triton-llm/src/http/service/metrics.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+use axum::{extract::State, http::StatusCode, response::IntoResponse, routing::get, Router};
+use prometheus::{Encoder, HistogramOpts, HistogramVec, IntCounterVec, IntGaugeVec, Opts};
+use std::{sync::Arc, time::Instant};
+pub use prometheus::Registry;
+use super::{DeploymentState, RouteDoc};
+/// Value for the `status` label in the request counter for successful requests
+pub const REQUEST_STATUS_SUCCESS: &str = "success";
+/// Value for the `status` label in the request counter if the request failed
+pub const REQUEST_STATUS_ERROR: &str = "error";
+/// Partial value for the `type` label in the request counter for streaming requests
+pub const REQUEST_TYPE_STREAM: &str = "stream";
+/// Partial value for the `type` label in the request counter for unary requests
+pub const REQUEST_TYPE_UNARY: &str = "unary";
+pub struct Metrics {
+    request_counter: IntCounterVec,
+    inflight_gauge: IntGaugeVec,
+    request_duration: HistogramVec,
+}
+/// RAII object for inflight gauge and request counters
+/// If this object is dropped without calling `mark_ok`, then the request will increment
+/// the request counter with the `status` label with [`REQUEST_STATUS_ERROR`]; otherwise, it will increment
+/// the counter with `status` label [`REQUEST_STATUS_SUCCESS`]
+pub struct InflightGuard {
+    metrics: Arc<Metrics>,
+    model: String,
+    endpoint: Endpoint,
+    request_type: RequestType,
+    status: Status,
+    timer: Instant,
+}
+/// Requests will be logged by the type of endpoint hit
+/// This will include llamastack in the future
+pub enum Endpoint {
+    /// OAI Completions
+    Completions,
+    /// OAI Chat Completions
+    ChatCompletions,
+}
+/// Metrics for the HTTP service
+pub enum RequestType {
+    /// SingleIn / SingleOut
+    Unary,
+    /// SingleIn / ManyOut
+    Stream,
+}
+/// Status
+pub enum Status {
+    Success,
+    Error,
+}
+impl Default for Metrics {
+    fn default() -> Self {
+        Self::new("nv_llm")
+    }
+}
+impl Metrics {
+    /// Create Metrics with the given prefix
+    /// The following metrics will be created:
+    /// - `{prefix}_http_service_requests_total` - IntCounterVec for the total number of requests processed
+    /// - `{prefix}_http_service_inflight_requests` - IntGaugeVec for the number of inflight requests
+    /// - `{prefix}_http_service_request_duration_seconds` - HistogramVec for the duration of requests
+    pub fn new(prefix: &str) -> Self {
+        let request_counter = IntCounterVec::new(
+            Opts::new(
+                format!("{}_http_service_requests_total", prefix),
+                "Total number of LLM requests processed",
+            ),
+            &["model", "endpoint", "request_type", "status"],
+        )
+        .unwrap();
+        let inflight_gauge = IntGaugeVec::new(
+            Opts::new(
+                format!("{}_http_service_inflight_requests", prefix),
+                "Number of inflight requests",
+            ),
+            &["model"],
+        )
+        .unwrap();
+        let buckets = vec![0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0];
+        let request_duration = HistogramVec::new(
+            HistogramOpts::new(
+                format!("{}_http_service_request_duration_seconds", prefix),
+                "Duration of LLM requests",
+            )
+            .buckets(buckets),
+            &["model"],
+        )
+        .unwrap();
+        Metrics {
+            request_counter,
+            inflight_gauge,
+            request_duration,
+        }
+    }
+    /// Get the number of successful requests for the given dimensions:
+    /// - model
+    /// - endpoint (completions/chat_completions)
+    /// - request type (unary/stream)
+    /// - status (success/error)
+    pub fn get_request_counter(
+        &self,
+        model: &str,
+        endpoint: &Endpoint,
+        request_type: &RequestType,
+        status: &Status,
+    ) -> u64 {
+        self.request_counter
+            .with_label_values(&[
+                model,
+                endpoint.as_str(),
+                request_type.as_str(),
+                status.as_str(),
+            ])
+            .get()
+    }
+    /// Increment the counter for requests for the given dimensions:
+    /// - model
+    /// - endpoint (completions/chat_completions)
+    /// - request type (unary/stream)
+    /// - status (success/error)
+    fn inc_request_counter(
+        &self,
+        model: &str,
+        endpoint: &Endpoint,
+        request_type: &RequestType,
+        status: &Status,
+    ) {
+        self.request_counter
+            .with_label_values(&[
+                model,
+                endpoint.as_str(),
+                request_type.as_str(),
+                status.as_str(),
+            ])
+            .inc()
+    }
+    /// Get the number if inflight requests for the given model
+    pub fn get_inflight_count(&self, model: &str) -> i64 {
+        self.inflight_gauge.with_label_values(&[model]).get()
+    }
+    fn inc_inflight_gauge(&self, model: &str) {
+        self.inflight_gauge.with_label_values(&[model]).inc()
+    }
+    fn dec_inflight_gauge(&self, model: &str) {
+        self.inflight_gauge.with_label_values(&[model]).dec()
+    }
+    pub fn register(&self, registry: &Registry) -> Result<(), prometheus::Error> {
+        registry.register(Box::new(self.request_counter.clone()))?;
+        registry.register(Box::new(self.inflight_gauge.clone()))?;
+        registry.register(Box::new(self.request_duration.clone()))?;
+        Ok(())
+    }
+}
+impl DeploymentState {
+    /// Create a new [`InflightGuard`] for the given model and annotate if its a streaming request,
+    /// and the kind of endpoint that was hit
+    ///
+    /// The [`InflightGuard`] is an RAII object will handle incrementing the inflight gauge and
+    /// request counters.
+    pub fn create_inflight_guard(
+        &self,
+        model: &str,
+        endpoint: Endpoint,
+        streaming: bool,
+    ) -> InflightGuard {
+        let request_type = if streaming {
+            RequestType::Stream
+        } else {
+            RequestType::Unary
+        };
+        InflightGuard::new(
+            self.metrics.clone(),
+            model.to_string(),
+            endpoint,
+            request_type,
+        )
+    }
+}
+impl InflightGuard {
+    fn new(
+        metrics: Arc<Metrics>,
+        model: String,
+        endpoint: Endpoint,
+        request_type: RequestType,
+    ) -> Self {
+        // Start the timer
+        let timer = Instant::now();
+        // Increment the inflight gauge when the guard is created
+        metrics.inc_inflight_gauge(&model);
+        // Return the RAII Guard
+        InflightGuard {
+            metrics,
+            model,
+            endpoint,
+            request_type,
+            status: Status::Error,
+            timer,
+        }
+    }
+    pub(crate) fn mark_ok(&mut self) {
+        self.status = Status::Success;
+    }
+}
+impl Drop for InflightGuard {
+    fn drop(&mut self) {
+        // Decrement the gauge when the guard is dropped
+        self.metrics.dec_inflight_gauge(&self.model);
+        // the frequency on incrementing the full request counter is relatively low
+        // if we were incrementing the counter on every forward pass, we'd use static CounterVec or
+        // discrete counter object without the more costly lookup required for the following calls
+        self.metrics.inc_request_counter(
+            &self.model,
+            &self.endpoint,
+            &self.request_type,
+            &self.status,
+        );
+        // Record the duration of the request
+        self.metrics
+            .request_duration
+            .with_label_values(&[&self.model])
+            .observe(self.timer.elapsed().as_secs_f64());
+    }
+}
+impl std::fmt::Display for Endpoint {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Endpoint::Completions => write!(f, "completions"),
+            Endpoint::ChatCompletions => write!(f, "chat_completions"),
+        }
+    }
+}
+impl Endpoint {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Endpoint::Completions => "completions",
+            Endpoint::ChatCompletions => "chat_completions",
+        }
+    }
+}
+impl RequestType {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            RequestType::Unary => REQUEST_TYPE_UNARY,
+            RequestType::Stream => REQUEST_TYPE_STREAM,
+        }
+    }
+}
+impl Status {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Status::Success => REQUEST_STATUS_SUCCESS,
+            Status::Error => REQUEST_STATUS_ERROR,
+        }
+    }
+}
+/// Create a new router with the given path
+pub fn router(registry: Registry, path: Option<String>) -> (Vec<RouteDoc>, Router) {
+    let registry = Arc::new(registry);
+    let path = path.unwrap_or_else(|| "/metrics".to_string());
+    let doc = RouteDoc::new(axum::http::Method::GET, &path);
+    let route = Router::new()
+        .route(&path, get(handler_metrics))
+        .with_state(registry);
+    (vec![doc], route)
+}
+/// Metrics Handler
+async fn handler_metrics(State(registry): State<Arc<Registry>>) -> impl IntoResponse {
+    let encoder = prometheus::TextEncoder::new();
+    let metric_families = registry.gather();
+    let mut buffer = vec![];
+    if encoder.encode(&metric_families, &mut buffer).is_err() {
+        return (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            "Failed to encode metrics",
+        )
+            .into_response();
+    }
+    let metrics = match String::from_utf8(buffer) {
+        Ok(metrics) => metrics,
+        Err(_) => {
+            return (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                "Failed to encode metrics",
+            )
+                .into_response()
+        }
+    };
+    (StatusCode::OK, metrics).into_response()
+}
--- a/llm/rust/triton-llm/src/http/service/openai.rs
+++ b/llm/rust/triton-llm/src/http/service/openai.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+use axum::{
+    extract::State,
+    http::StatusCode,
+    response::{
+        sse::{Event, KeepAlive, Sse},
+        IntoResponse, Response,
+    },
+    routing::{get, post},
+    Json, Router,
+};
+use futures::{Stream, StreamExt};
+use serde::{Deserialize, Serialize};
+use std::{
+    collections::{HashMap, HashSet},
+    pin::Pin,
+    sync::Arc,
+    time::{SystemTime, UNIX_EPOCH},
+};
+use tokio_stream::wrappers::ReceiverStream;
+use super::DeploymentState;
+use super::{
+    error::HttpError,
+    metrics::{Endpoint, InflightGuard},
+    RouteDoc,
+};
+use crate::protocols::openai::{
+    chat_completions::ChatCompletionResponse, completions::CompletionResponse,
+};
+use crate::types::{
+    openai::{chat_completions::ChatCompletionRequest, completions::CompletionRequest},
+    Annotated,
+};
+use triton_distributed::pipeline::{AsyncEngineContext, Context};
+#[derive(Serialize, Deserialize)]
+pub(crate) struct ErrorResponse {
+    error: String,
+}
+impl ErrorResponse {
+    /// Not Found Error
+    pub fn model_not_found() -> (StatusCode, Json<ErrorResponse>) {
+        (
+            StatusCode::NOT_FOUND,
+            Json(ErrorResponse {
+                error: "Model not found".to_string(),
+            }),
+        )
+    }
+    /// Service Unavailable
+    /// This is returned when the service is live, but not ready.
+    pub fn _service_unavailable() -> (StatusCode, Json<ErrorResponse>) {
+        (
+            StatusCode::SERVICE_UNAVAILABLE,
+            Json(ErrorResponse {
+                error: "Service is not ready".to_string(),
+            }),
+        )
+    }
+    /// Internal Service Error
+    /// Return this error when the service encounters an internal error.
+    /// We should return a generic message to the client instead of the real error.
+    /// Internal Services errors are the result of misconfiguration or bugs in the service.
+    pub fn internal_server_error(msg: &str) -> (StatusCode, Json<ErrorResponse>) {
+        tracing::error!("Internal server error: {msg}");
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(ErrorResponse {
+                error: msg.to_string(),
+            }),
+        )
+    }
+    /// The OAI endpoints call an [`triton_distributed::engine::AsyncEngine`] which are specialized to return
+    /// an [`anyhow::Error`]. This method will convert the [`anyhow::Error`] into an [`HttpError`].
+    /// If successful, it will return the [`HttpError`] as an [`ErrorResponse::internal_server_error`]
+    /// with the details of the error.
+    pub fn from_anyhow(err: anyhow::Error, alt_msg: &str) -> (StatusCode, Json<ErrorResponse>) {
+        match err.downcast::<HttpError>() {
+            Ok(http_error) => ErrorResponse::from_http_error(http_error),
+            Err(err) => ErrorResponse::internal_server_error(&format!("{alt_msg}: {err}")),
+        }
+    }
+    /// Implementers should only be able to throw 400-499 errors.
+    pub fn from_http_error(err: HttpError) -> (StatusCode, Json<ErrorResponse>) {
+        if err.code < 400 || err.code >= 500 {
+            return ErrorResponse::internal_server_error(&err.message);
+        }
+        match StatusCode::from_u16(err.code) {
+            Ok(code) => (code, Json(ErrorResponse { error: err.message })),
+            Err(_) => ErrorResponse::internal_server_error(&err.message),
+        }
+    }
+}
+impl From<HttpError> for ErrorResponse {
+    fn from(err: HttpError) -> Self {
+        ErrorResponse { error: err.message }
+    }
+}
+/// OpenAI Completions Request Handler
+///
+/// This method will handle the incoming request for the `/v1/completions endpoint`. The endpoint is a "source"
+/// for an [`super::OpenAICompletionsStreamingEngine`] and will return a stream of
+/// responses which will be forward to the client.
+///
+/// Note: For all requests, streaming or non-streaming, we always call the engine with streaming enabled. For
+/// non-streaming requests, we will fold the stream into a single response as part of this handler.
+#[tracing::instrument(skip_all)]
+async fn completions(
+    State(state): State<Arc<DeploymentState>>,
+    Json(request): Json<CompletionRequest>,
+) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
+    // return a 503 if the service is not ready
+    check_ready(&state)?;
+    // todo - extract distributed tracing id and context id from headers
+    let request_id = uuid::Uuid::new_v4().to_string();
+    // todo - decide on default
+    let streaming = request.stream.unwrap_or(false);
+    // update the request to always stream
+    let request = CompletionRequest {
+        stream: Some(true),
+        ..request
+    };
+    // todo - make the protocols be optional for model name
+    // todo - when optional, if none, apply a default
+    let model = &request.model;
+    // todo - error handling should be more robust
+    let engine = state
+        .get_completions_engine(model)
+        .map_err(|_| ErrorResponse::model_not_found())?;
+    // this will increment the inflight gauge for the model
+    let mut inflight = state.create_inflight_guard(model, Endpoint::Completions, streaming);
+    // setup context
+    // todo - inherit request_id from distributed trace details
+    let request = Context::with_id(request, request_id.clone());
+    // issue the generate call on the engine
+    let stream = engine
+        .generate(request)
+        .await
+        .map_err(|e| ErrorResponse::from_anyhow(e, "Failed to generate completions"))?;
+    // capture the context to cancel the stream if the client disconnects
+    let ctx = stream.context();
+    // todo - tap the stream and propagate request level metrics
+    // note - we might do this as part of the post processing set to make it more generic
+    if streaming {
+        let stream = stream.map(|response| Event::try_from(EventConverter::from(response)));
+        let stream = monitor_for_disconnects(stream.boxed(), ctx, inflight).await;
+        Ok(Sse::new(stream)
+            .keep_alive(KeepAlive::default())
+            .into_response())
+    } else {
+        let response = CompletionResponse::from_annotated_stream(stream.into())
+            .await
+            .map_err(|e| {
+                tracing::error!(
+                    "Failed to fold completions stream for {}: {:?}",
+                    request_id,
+                    e
+                );
+                ErrorResponse::internal_server_error("Failed to fold completions stream")
+            })?;
+        inflight.mark_ok();
+        Ok(Json(response).into_response())
+    }
+}
+/// OpenAI Chat Completions Request Handler
+///
+/// This method will handle the incoming request for the /v1/chat/completions endpoint. The endpoint is a "source"
+/// for an [`super::OpenAIChatCompletionsStreamingEngine`] and will return a stream of responses which will be
+/// forward to the client.
+///
+/// Note: For all requests, streaming or non-streaming, we always call the engine with streaming enabled. For
+/// non-streaming requests, we will fold the stream into a single response as part of this handler.
+#[tracing::instrument(skip_all)]
+async fn chat_completions(
+    State(state): State<Arc<DeploymentState>>,
+    Json(request): Json<ChatCompletionRequest>,
+) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
+    // return a 503 if the service is not ready
+    check_ready(&state)?;
+    // todo - extract distributed tracing id and context id from headers
+    let request_id = uuid::Uuid::new_v4().to_string();
+    // todo - decide on default
+    let streaming = request.stream.unwrap_or(false);
+    // update the request to always stream
+    let request = ChatCompletionRequest {
+        stream: Some(true),
+        ..request
+    };
+    // todo - make the protocols be optional for model name
+    // todo - when optional, if none, apply a default
+    let model = &request.model;
+    // todo - determine the proper error code for when a request model is not present
+    tracing::trace!("Getting chat completions engine for model: {}", model);
+    let engine = state
+        .get_chat_completions_engine(model)
+        .map_err(|_| ErrorResponse::model_not_found())?;
+    // this will increment the inflight gauge for the model
+    let mut inflight = state.create_inflight_guard(model, Endpoint::ChatCompletions, streaming);
+    // setup context
+    // todo - inherit request_id from distributed trace details
+    let request = Context::with_id(request, request_id.clone());
+    tracing::trace!("Issuing generate call for chat completions");
+    // issue the generate call on the engine
+    let stream = engine
+        .generate(request)
+        .await
+        .map_err(|e| ErrorResponse::from_anyhow(e, "Failed to generate completions"))?;
+    // capture the context to cancel the stream if the client disconnects
+    let ctx = stream.context();
+    // todo - tap the stream and propagate request level metrics
+    // note - we might do this as part of the post processing set to make it more generic
+    if streaming {
+        let stream = stream.map(|response| Event::try_from(EventConverter::from(response)));
+        let stream = monitor_for_disconnects(stream.boxed(), ctx, inflight).await;
+        Ok(Sse::new(stream)
+            .keep_alive(KeepAlive::default())
+            .into_response())
+    } else {
+        let response = ChatCompletionResponse::from_annotated_stream(stream.into())
+            .await
+            .map_err(|e| {
+                tracing::error!(
+                    request_id,
+                    "Failed to fold chat completions stream for: {:?}",
+                    e
+                );
+                ErrorResponse::internal_server_error(&format!(
+                    "Failed to fold chat completions stream: {}",
+                    e
+                ))
+            })?;
+        inflight.mark_ok();
+        Ok(Json(response).into_response())
+    }
+}
+// todo - abstract this to the top level lib.rs to be reused
+// todo - move the service_observer to its own state/arc
+fn check_ready(_state: &Arc<DeploymentState>) -> Result<(), (StatusCode, Json<ErrorResponse>)> {
+    // if state.service_observer.stage() != ServiceStage::Ready {
+    //     return Err(ErrorResponse::service_unavailable());
+    // }
+    Ok(())
+}
+/// list models handler, non-standard format
+async fn list_models_custom(
+    State(state): State<Arc<DeploymentState>>,
+) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
+    check_ready(&state)?;
+    let mut models = HashMap::new();
+    let chat_models = state
+        .chat_completion_engines
+        .lock()
+        .unwrap()
+        .engines
+        .keys()
+        .cloned()
+        .collect::<Vec<String>>();
+    let completion_models = state
+        .completion_engines
+        .lock()
+        .unwrap()
+        .engines
+        .keys()
+        .cloned()
+        .collect::<Vec<String>>();
+    models.insert("chat_completion_models", chat_models);
+    models.insert("completion_models", completion_models);
+    Ok(Json(models).into_response())
+}
+/// openai compatible format
+/// Example:
+/// {
+///  "object": "list",
+///  "data": [
+///    {
+///      "id": "model-id-0",
+///      "object": "model",
+///      "created": 1686935002,
+///      "owned_by": "organization-owner"
+///    },
+///    ]
+/// }
+async fn list_models_openai(
+    State(state): State<Arc<DeploymentState>>,
+) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
+    check_ready(&state)?;
+    let created = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap()
+        .as_secs();
+    let mut data = Vec::new();
+    let models: HashSet<String> = state
+        .chat_completion_engines
+        .lock()
+        .unwrap()
+        .engines
+        .keys()
+        .chain(state.completion_engines.lock().unwrap().engines.keys())
+        .cloned()
+        .collect();
+    for model_id in models {
+        data.push(ModelListing {
+            id: model_id.clone(),
+            object: "object",
+            created,                        // Where would this come from? The GGUF?
+            owned_by: "nvidia".to_string(), // Get organization from GGUF
+        });
+    }
+    let out = ListModelOpenAI {
+        object: "list",
+        data,
+    };
+    Ok(Json(out).into_response())
+}
+#[derive(Serialize)]
+struct ListModelOpenAI {
+    object: &'static str, // always "list"
+    data: Vec<ModelListing>,
+}
+#[derive(Serialize)]
+struct ModelListing {
+    id: String,
+    object: &'static str, // always "object"
+    created: u64,         //  Seconds since epoch
+    owned_by: String,
+}
+/// This method will consume a stream of SSE events and forward them to a new stream defined by a tokio channel.
+/// In this way, if the downstream is dropped, then the upstream will be unable to send any more events. This is
+/// how we can monitor for disconnects and stop the generation of completions.
+///
+/// If a disconnect is detected, then the context will issue a `stop_generating` call to the context which will
+/// propagate the cancellation signal to the backend.
+async fn monitor_for_disconnects(
+    stream: Pin<
+        Box<dyn Stream<Item = Result<axum::response::sse::Event, axum::Error>> + std::marker::Send>,
+    >,
+    context: Arc<dyn AsyncEngineContext>,
+    inflight: InflightGuard,
+) -> ReceiverStream<Result<Event, axum::Error>> {
+    let (tx, rx) = tokio::sync::mpsc::channel(8);
+    tokio::spawn(async move {
+        let mut inflight = inflight;
+        let mut stream = stream;
+        while let Some(event) = stream.next().await {
+            let event = match event {
+                Ok(event) => Ok(event),
+                Err(err) => Ok(Event::default().event("error").comment(err.to_string())),
+            };
+            if (tx.send(event).await).is_err() {
+                tracing::trace!("Forwarding SSE stream was dropped; breaking loop");
+                context.stop_generating();
+                break;
+            }
+        }
+        // the stream completed successfully - mark as ok
+        // this will increment the request counter with an "success" status
+        if tx.send(Ok(Event::default().data("[DONE]"))).await.is_ok() {
+            inflight.mark_ok();
+        }
+    });
+    ReceiverStream::new(rx)
+}
+struct EventConverter<T>(Annotated<T>);
+impl<T> From<Annotated<T>> for EventConverter<T> {
+    fn from(annotated: Annotated<T>) -> Self {
+        EventConverter(annotated)
+    }
+}
+/// Convert an Annotated into an Event
+/// If the Event represents an Error, then return an axum::Error
+/// The [`monitor_for_disconnects`] method will handle the error, emit to the sse stream
+/// then stop the generation of completions.
+impl<T: Serialize> TryFrom<EventConverter<T>> for Event {
+    type Error = axum::Error;
+    fn try_from(annotated: EventConverter<T>) -> Result<Self, Self::Error> {
+        let annotated = annotated.0;
+        let mut event = Event::default();
+        if let Some(data) = annotated.data {
+            event = event.json_data(data)?;
+        }
+        if let Some(msg) = annotated.event {
+            if msg == "error" {
+                let msgs = annotated
+                    .comment
+                    .unwrap_or_else(|| vec!["unspecified error".to_string()]);
+                return Err(axum::Error::new(msgs.join(" -- ")));
+            }
+            event = event.event(msg);
+        }
+        if let Some(comments) = annotated.comment {
+            for comment in comments {
+                event = event.comment(comment);
+            }
+        }
+        Ok(event)
+    }
+}
+/// Create an Axum [`Router`] for the OpenAI API Completions endpoint
+/// If not path is provided, the default path is `/v1/completions`
+pub fn completions_router(
+    state: Arc<DeploymentState>,
+    path: Option<String>,
+) -> (Vec<RouteDoc>, Router) {
+    let path = path.unwrap_or("/v1/completions".to_string());
+    let doc = RouteDoc::new(axum::http::Method::POST, &path);
+    let router = Router::new()
+        .route(&path, post(completions))
+        .with_state(state);
+    (vec![doc], router)
+}
+/// Create an Axum [`Router`] for the OpenAI API Chat Completions endpoint
+/// If not path is provided, the default path is `/v1/chat/completions`
+pub fn chat_completions_router(
+    state: Arc<DeploymentState>,
+    path: Option<String>,
+) -> (Vec<RouteDoc>, Router) {
+    let path = path.unwrap_or("/v1/chat/completions".to_string());
+    let doc = RouteDoc::new(axum::http::Method::POST, &path);
+    let router = Router::new()
+        .route(&path, post(chat_completions))
+        .with_state(state);
+    (vec![doc], router)
+}
+/// List Models
+pub fn list_models_router(
+    state: Arc<DeploymentState>,
+    path: Option<String>,
+) -> (Vec<RouteDoc>, Router) {
+    // TODO: Why do we have this endpoint?
+    let custom_path = path.unwrap_or("/triton/alpha/list-models".to_string());
+    let doc_for_custom = RouteDoc::new(axum::http::Method::GET, &custom_path);
+    // Standard OpenAI compatible list models endpoint
+    let openai_path = "/v1/models".to_string();
+    let doc_for_openai = RouteDoc::new(axum::http::Method::GET, &openai_path);
+    let router = Router::new()
+        .route(&custom_path, get(list_models_custom))
+        .route(&openai_path, get(list_models_openai))
+        .with_state(state);
+    (vec![doc_for_custom, doc_for_openai], router)
+}
+#[cfg(test)]
+mod tests {
+    use super::super::ServiceHttpError;
+    use super::*;
+    const BACKUP_ERROR_MESSAGE: &str = "Failed to generate completions";
+    fn http_error_from_engine(code: u16) -> Result<(), anyhow::Error> {
+        Err(HttpError {
+            code,
+            message: "custom error message".to_string(),
+        })?
+    }
+    fn other_error_from_engine() -> Result<(), anyhow::Error> {
+        Err(ServiceHttpError::ModelNotFound("foo".to_string()))?
+    }
+    #[test]
+    fn test_http_error_response_from_anyhow() {
+        let err = http_error_from_engine(400).unwrap_err();
+        let (status, response) = ErrorResponse::from_anyhow(err, BACKUP_ERROR_MESSAGE);
+        assert_eq!(status, StatusCode::BAD_REQUEST);
+        assert_eq!(response.error, "custom error message");
+    }
+    #[test]
+    fn test_error_response_from_anyhow_out_of_range() {
+        let err = http_error_from_engine(399).unwrap_err();
+        let (status, response) = ErrorResponse::from_anyhow(err, BACKUP_ERROR_MESSAGE);
+        assert_eq!(status, StatusCode::INTERNAL_SERVER_ERROR);
+        assert_eq!(response.error, "custom error message");
+        let err = http_error_from_engine(500).unwrap_err();
+        let (status, response) = ErrorResponse::from_anyhow(err, BACKUP_ERROR_MESSAGE);
+        assert_eq!(status, StatusCode::INTERNAL_SERVER_ERROR);
+        assert_eq!(response.error, "custom error message");
+        let err = http_error_from_engine(501).unwrap_err();
+        let (status, response) = ErrorResponse::from_anyhow(err, BACKUP_ERROR_MESSAGE);
+        assert_eq!(status, StatusCode::INTERNAL_SERVER_ERROR);
+        assert_eq!(response.error, "custom error message");
+    }
+    #[test]
+    fn test_other_error_response_from_anyhow() {
+        let err = other_error_from_engine().unwrap_err();
+        let (status, response) = ErrorResponse::from_anyhow(err, BACKUP_ERROR_MESSAGE);
+        assert_eq!(status, StatusCode::INTERNAL_SERVER_ERROR);
+        assert_eq!(
+            response.error,
+            format!(
+                "{}: {}",
+                BACKUP_ERROR_MESSAGE,
+                other_error_from_engine().unwrap_err()
+            )
+        );
+    }
+}
--- a/llm/rust/triton-llm/src/http/service/service_v2.rs
+++ b/llm/rust/triton-llm/src/http/service/service_v2.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+use super::metrics;
+use super::ModelManager;
+use derive_builder::Builder;
+use tokio_util::sync::CancellationToken;
+#[derive(Clone)]
+pub struct HttpService {
+    models: ModelManager,
+    router: axum::Router,
+    port: u16,
+}
+#[derive(Clone, Builder)]
+#[builder(build_fn(private, name = "build_internal"))]
+pub struct HttpServiceConfig {
+    #[builder(default = "8787")]
+    port: u16,
+    // #[builder(default)]
+    // custom: Vec<axum::Router>
+    #[builder(default = "true")]
+    enable_chat_endpoints: bool,
+    #[builder(default = "true")]
+    enable_cmpl_endpoints: bool,
+}
+impl HttpService {
+    pub fn builder() -> HttpServiceConfigBuilder {
+        HttpServiceConfigBuilder::default()
+    }
+    pub fn model_manager(&self) -> &ModelManager {
+        &self.models
+    }
+    pub async fn run(&self, cancel_token: CancellationToken) -> anyhow::Result<()> {
+        let address = format!("0.0.0.0:{}", self.port);
+        tracing::info!(address, "Starting HTTP service on: {address}");
+        let listener = tokio::net::TcpListener::bind(address.as_str())
+            .await
+            .unwrap_or_else(|_| panic!("could not bind to address: {address}"));
+        let router = self.router.clone();
+        let observer = cancel_token.child_token();
+        Ok(axum::serve(listener, router)
+            .with_graceful_shutdown(observer.cancelled_owned())
+            .await
+            .inspect_err(|_| cancel_token.cancel())?)
+    }
+}
+impl HttpServiceConfigBuilder {
+    pub fn build(self) -> Result<HttpService, anyhow::Error> {
+        let config = self.build_internal()?;
+        let model_manager = ModelManager::new();
+        // enable prometheus metrics
+        let registry = metrics::Registry::new();
+        model_manager.metrics().register(&registry)?;
+        let mut router = axum::Router::new();
+        let mut all_docs = Vec::new();
+        let mut routes = vec![
+            metrics::router(registry, None),
+            super::openai::list_models_router(model_manager.state(), None),
+        ];
+        if config.enable_chat_endpoints {
+            routes.push(super::openai::completions_router(
+                model_manager.state(),
+                None,
+            ));
+        }
+        if config.enable_cmpl_endpoints {
+            routes.push(super::openai::chat_completions_router(
+                model_manager.state(),
+                None,
+            ));
+        }
+        // for (route_docs, route) in routes.into_iter().chain(self.routes.into_iter()) {
+        //     router = router.merge(route);
+        //     all_docs.extend(route_docs);
+        // }
+        for (route_docs, route) in routes.into_iter() {
+            router = router.merge(route);
+            all_docs.extend(route_docs);
+        }
+        Ok(HttpService {
+            models: model_manager,
+            router,
+            port: config.port,
+        })
+    }
+}
--- a/llm/rust/triton-llm/src/lib.rs
+++ b/llm/rust/triton-llm/src/lib.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//! # Triton LLM
+//!
+//! The `triton-llm` crate is a Rust library that provides a set of traits and types for building
+//! distributed LLM inference solutions.
+pub mod http;
+pub mod protocols;
+pub mod types;
--- a/llm/rust/triton-llm/src/protocols.rs
+++ b/llm/rust/triton-llm/src/protocols.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//! # Triton LLM Protocols
+//!
+//! This module contains the protocols, i.e. messages formats, used to exchange requests and responses
+//! both publicly via the HTTP API and internally between Triton components.
+//!
+use futures::{Stream, StreamExt};
+use serde::{Deserialize, Serialize};
+use std::pin::Pin;
+pub mod codec;
+pub mod common;
+pub mod openai;
+/// The token ID type
+pub type TokenIdType = u32;
+pub type DataStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync>>;
+// TODO: This is an awkward dependency that we need to address
+// Originally, all the Annotated/SSE Codec bits where in the LLM protocol module; however, [Annotated]
+// has become the common response envelope for triton-distributed.
+// We may want to move the original Annotated back here and has a Infallible conversion to the the
+// ResponseEnvelop in triton-distributed.
+pub use triton_distributed::protocols::annotated::Annotated;
+/// The LLM responses have multiple different fields and nests of objects to get to the actual
+/// text completion returned. This trait can be applied to the `choice` level objects to extract
+/// the completion text.
+///
+/// To avoid an optional, if no completion text is found, the [`ContentProvider::content`] should
+/// return an empty string.
+pub trait ContentProvider {
+    fn content(&self) -> String;
+}
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct Usage {
+    pub prompt_tokens: i32,
+    pub completion_tokens: i32,
+    pub total_tokens: i32,
+}
+/// Converts of a stream of [codec::Message]s into a stream of [Annotated]s.
+pub fn convert_sse_stream<R>(
+    stream: DataStream<Result<codec::Message, codec::SseCodecError>>,
+) -> DataStream<Annotated<R>>
+where
+    R: for<'de> Deserialize<'de> + Serialize,
+{
+    let stream = stream.map(|message| match message {
+        Ok(message) => {
+            let delta = Annotated::<R>::try_from(message);
+            match delta {
+                Ok(delta) => delta,
+                Err(e) => Annotated::from_error(e.to_string()),
+            }
+        }
+        Err(e) => Annotated::from_error(e.to_string()),
+    });
+    Box::pin(stream)
+}
--- a/llm/rust/triton-llm/src/protocols/codec.rs
+++ b/llm/rust/triton-llm/src/protocols/codec.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//! A module for parsing Server-Sent Events (SSE) streams according to the SSE specification.
+//!
+//! This module provides `SseLineCodec<T>`, a codec for decoding SSE streams into typed messages.
+//! It handles parsing of `id`, `event`, `data`, and comments, and attempts to deserialize
+//! the `data` field into the specified type `T`.
+//!
+// TODO: Determine if we should use an External EventSource crate. There appear to be several
+// potential candidates.
+use bytes::BytesMut;
+use futures::Stream;
+use serde::Deserialize;
+use std::{io::Cursor, pin::Pin};
+use tokio_util::codec::{Decoder, FramedRead, LinesCodec};
+use super::Annotated;
+/// An error that occurs when decoding an SSE stream.
+#[derive(Debug, thiserror::Error)]
+pub enum SseCodecError {
+    #[error("SseLineCodec decode error: {0}")]
+    DecodeError(String),
+    #[error("IO error: {0}")]
+    IoError(#[from] std::io::Error),
+}
+/// A codec for decoding SSE streams into `Message<T>` instances.
+///
+/// This codec parses SSE streams according to the SSE specification and attempts to deserialize
+/// the `data` field into the specified type `T`.
+///
+/// # Type Parameters
+///
+/// * `T` - The type to deserialize the `data` field into.
+pub struct SseLineCodec {
+    lines_codec: LinesCodec,
+    data_buffer: String,
+    event_type_buffer: String,
+    last_event_id_buffer: String,
+    comments_buffer: Vec<String>,
+}
+/// Represents a parsed SSE message.
+///
+/// The `Message` struct contains optional fields for `id`, `event`, `data`, and a vector of `comments`.
+///
+/// # Type Parameters
+///
+/// * `T` - The type to deserialize the `data` field into.
+#[derive(Debug)]
+pub struct Message {
+    pub id: Option<String>,
+    pub event: Option<String>,
+    pub data: Option<String>,
+    pub comments: Option<Vec<String>>,
+}
+impl Message {
+    /// Deserializes the `data` field into the specified type `T`.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the `data` field is empty or if deserialization fails.
+    pub fn decode_data<T>(&self) -> Result<T, SseCodecError>
+    where
+        T: for<'de> Deserialize<'de>,
+    {
+        serde_json::from_str(self.data.as_ref().ok_or(SseCodecError::DecodeError(
+            "no data: message to decode".to_string(),
+        ))?)
+        .map_err(|e| SseCodecError::DecodeError(format!("failed to deserialized data: {}", e)))
+    }
+}
+impl<T> TryFrom<Message> for Annotated<T>
+where
+    T: for<'de> Deserialize<'de>,
+{
+    type Error = String;
+    fn try_from(value: Message) -> Result<Annotated<T>, Self::Error> {
+        // determine if the message had an error
+        if let Some(event) = value.event.as_ref() {
+            if event == "error" {
+                let message = match &value.comments {
+                    Some(comments) => comments.join("\n"),
+                    None => "`event: error` detected, but no error message found".to_string(),
+                };
+                return Err(message);
+            }
+        }
+        // try to deserialize the data to T
+        let data: Option<T> = match &value.data {
+            Some(_) => value.decode_data().map_err(|e| e.to_string())?,
+            None => None,
+        };
+        Ok(Annotated {
+            data,
+            id: value.id,
+            event: value.event,
+            comment: value.comments,
+        })
+    }
+}
+impl SseLineCodec {
+    /// Creates a new `SseLineCodec<T>`.
+    pub fn new() -> Self {
+        Self::default()
+    }
+}
+impl Default for SseLineCodec {
+    fn default() -> Self {
+        Self {
+            lines_codec: LinesCodec::new(),
+            data_buffer: String::new(),
+            event_type_buffer: String::new(),
+            last_event_id_buffer: String::new(),
+            comments_buffer: Vec::new(),
+        }
+    }
+}
+impl Decoder for SseLineCodec {
+    type Item = Message;
+    type Error = SseCodecError;
+    fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
+        loop {
+            match self
+                .lines_codec
+                .decode(src)
+                .map_err(|e| SseCodecError::DecodeError(e.to_string()))?
+            {
+                Some(line) => {
+                    let line = line.trim_end_matches(&['\r', '\n'][..]);
+                    if line.is_empty() {
+                        // End of event; dispatch
+                        if !self.data_buffer.is_empty()
+                            || !self.event_type_buffer.is_empty()
+                            || !self.last_event_id_buffer.is_empty()
+                            || !self.comments_buffer.is_empty()
+                        {
+                            // Remove the last '\n' if present in data_buffer
+                            if self.data_buffer.ends_with('\n') {
+                                self.data_buffer.pop();
+                            }
+                            let data = if !self.data_buffer.is_empty() {
+                                Some(std::mem::take(&mut self.data_buffer))
+                            } else {
+                                None
+                            };
+                            let message = Message {
+                                id: if self.last_event_id_buffer.is_empty() {
+                                    None
+                                } else {
+                                    Some(std::mem::take(&mut self.last_event_id_buffer))
+                                },
+                                event: if self.event_type_buffer.is_empty() {
+                                    None
+                                } else {
+                                    Some(std::mem::take(&mut self.event_type_buffer))
+                                },
+                                data,
+                                comments: if self.comments_buffer.is_empty() {
+                                    None
+                                } else {
+                                    Some(std::mem::take(&mut self.comments_buffer))
+                                },
+                            };
+                            // No need to clear the buffers; they've been replaced with empty values
+                            return Ok(Some(message));
+                        } else {
+                            // No data to dispatch; continue
+                            continue;
+                        }
+                    } else if let Some(comment) = line.strip_prefix(':') {
+                        self.comments_buffer.push(comment.trim().into());
+                    } else {
+                        let (field_name, field_value) = if let Some(idx) = line.find(':') {
+                            let (name, value) = line.split_at(idx);
+                            let value = value[1..].trim_start_matches(' ');
+                            (name, value)
+                        } else {
+                            (line, "")
+                        };
+                        match field_name {
+                            "event" => {
+                                self.event_type_buffer = field_value.to_string();
+                            }
+                            "data" => {
+                                if field_value != "[DONE]" {
+                                    if !self.data_buffer.is_empty() {
+                                        self.data_buffer.push('\n');
+                                    }
+                                    self.data_buffer.push_str(field_value);
+                                }
+                            }
+                            "id" => {
+                                if !field_value.contains('\0') {
+                                    self.last_event_id_buffer = field_value.to_string();
+                                }
+                            }
+                            "retry" => {
+                                // For simplicity, we'll ignore retry in this implementation
+                            }
+                            _ => {
+                                // Ignore unknown fields
+                            }
+                        }
+                    }
+                }
+                None => {
+                    // No more data available at the moment
+                    return Ok(None);
+                }
+            }
+        }
+    }
+    fn decode_eof(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
+        // Attempt to process any remaining data
+        let result = self.decode(src)?;
+        if result.is_some() {
+            return Ok(result);
+        }
+        // If there's no data left to process, return None
+        if self.data_buffer.is_empty()
+            && self.event_type_buffer.is_empty()
+            && self.last_event_id_buffer.is_empty()
+            && self.comments_buffer.is_empty()
+        {
+            Ok(None)
+        } else {
+            // Dispatch any remaining data as an event
+            if self.data_buffer.ends_with('\n') {
+                self.data_buffer.pop();
+            }
+            let data = if !self.data_buffer.is_empty() {
+                Some(std::mem::take(&mut self.data_buffer))
+            } else {
+                None
+            };
+            let message = Message {
+                id: if self.last_event_id_buffer.is_empty() {
+                    None
+                } else {
+                    Some(std::mem::take(&mut self.last_event_id_buffer))
+                },
+                event: if self.event_type_buffer.is_empty() {
+                    None
+                } else {
+                    Some(std::mem::take(&mut self.event_type_buffer))
+                },
+                data,
+                comments: if self.comments_buffer.is_empty() {
+                    None
+                } else {
+                    Some(std::mem::take(&mut self.comments_buffer))
+                },
+            };
+            // No need to clear the buffers; they've been replaced with empty values
+            Ok(Some(message))
+        }
+    }
+}
+/// Creates a stream of `Message` instances from a text stream of SSE events.
+pub fn create_message_stream(
+    text: &str,
+) -> Pin<Box<dyn Stream<Item = Result<Message, SseCodecError>> + Send + Sync>> {
+    let cursor = Cursor::new(text.to_string());
+    let framed = FramedRead::new(cursor, SseLineCodec::new());
+    Box::pin(framed)
+}
+#[cfg(test)]
+mod tests {
+    use std::io::Cursor;
+    use futures::stream::StreamExt;
+    use tokio_util::codec::FramedRead;
+    use super::*;
+    #[derive(Deserialize, Debug, PartialEq)]
+    struct TestData {
+        message: String,
+    }
+    #[tokio::test]
+    async fn test_message_with_all_fields() {
+        let sample_data = r#"id: 123
+event: test
+data: {"message": "Hello World"}
+: This is a comment
+"#;
+        let cursor = Cursor::new(sample_data);
+        let mut framed = FramedRead::new(cursor, SseLineCodec::new());
+        if let Some(Ok(message)) = framed.next().await {
+            assert_eq!(message.id, Some("123".to_string()));
+            assert_eq!(message.event, Some("test".to_string()));
+            assert_eq!(
+                message.comments,
+                Some(vec!["This is a comment".to_string()])
+            );
+            let data: TestData = message.decode_data().unwrap();
+            assert_eq!(data.message, "Hello World".to_string());
+        } else {
+            panic!("Expected a message");
+        }
+    }
+    #[tokio::test]
+    async fn test_message_with_only_data() {
+        let sample_data = r#"data: {"message": "Just some data"}
+"#;
+        let cursor = Cursor::new(sample_data);
+        let mut framed = FramedRead::new(cursor, SseLineCodec::new());
+        if let Some(Ok(message)) = framed.next().await {
+            assert!(message.id.is_none());
+            assert!(message.event.is_none());
+            assert!(message.comments.is_none());
+            let data: TestData = message.decode_data().unwrap();
+            assert_eq!(data.message, "Just some data".to_string());
+        } else {
+            panic!("Expected a message");
+        }
+    }
+    #[tokio::test]
+    async fn test_message_with_only_comment() {
+        let sample_data = r#": This is a comment
+"#;
+        let cursor = Cursor::new(sample_data);
+        let mut framed = FramedRead::new(cursor, SseLineCodec::new());
+        if let Some(Ok(message)) = framed.next().await {
+            assert!(message.id.is_none());
+            assert!(message.event.is_none());
+            assert!(message.data.is_none());
+            assert_eq!(
+                message.comments,
+                Some(vec!["This is a comment".to_string()])
+            );
+        } else {
+            panic!("Expected a message");
+        }
+    }
+    #[tokio::test]
+    async fn test_message_with_multiple_comments() {
+        let sample_data = r#": First comment
+: Second comment
+"#;
+        let cursor = Cursor::new(sample_data);
+        let mut framed = FramedRead::new(cursor, SseLineCodec::new());
+        if let Some(Ok(message)) = framed.next().await {
+            assert!(message.id.is_none());
+            assert!(message.event.is_none());
+            assert!(message.data.is_none());
+            assert_eq!(
+                message.comments,
+                Some(vec![
+                    "First comment".to_string(),
+                    "Second comment".to_string()
+                ])
+            );
+        } else {
+            panic!("Expected a message");
+        }
+    }
+    #[tokio::test]
+    async fn test_message_with_partial_fields() {
+        let sample_data = r#"id: 456
+data: {"message": "Partial data"}
+"#;
+        let cursor = Cursor::new(sample_data);
+        let mut framed = FramedRead::new(cursor, SseLineCodec::new());
+        if let Some(Ok(message)) = framed.next().await {
+            assert_eq!(message.id, Some("456".to_string()));
+            assert!(message.event.is_none());
+            assert!(message.comments.is_none());
+            let data: TestData = message.decode_data().unwrap();
+            assert_eq!(data.message, "Partial data".to_string());
+        } else {
+            panic!("Expected a message");
+        }
+    }
+    #[tokio::test]
+    async fn test_message_with_invalid_json_data() {
+        let sample_data = r#"data: {"message": "Invalid JSON
+"#;
+        let cursor = Cursor::new(sample_data);
+        let mut framed = FramedRead::new(cursor, SseLineCodec::new());
+        if let Some(result) = framed.next().await {
+            match result {
+                Ok(message) => {
+                    // got a message, but it has invalid json
+                    let data = message.decode_data::<TestData>();
+                    assert!(data.is_err(), "Expected an error; got {:?}", data);
+                }
+                _ => panic!("Expected a message"),
+            }
+        } else {
+            panic!("Expected an error");
+        }
+    }
+    #[tokio::test]
+    async fn test_message_with_missing_data_field() {
+        let sample_data = r#"id: 789
+event: test_event
+"#;
+        let cursor = Cursor::new(sample_data);
+        let mut framed = FramedRead::new(cursor, SseLineCodec::new());
+        if let Some(Ok(message)) = framed.next().await {
+            assert_eq!(message.id, Some("789".to_string()));
+            assert_eq!(message.event, Some("test_event".to_string()));
+            assert!(message.data.is_none());
+            assert!(message.comments.is_none());
+        } else {
+            panic!("Expected a message");
+        }
+    }
+    #[tokio::test]
+    async fn test_message_with_empty_data_field() {
+        let sample_data = r#"data:
+"#;
+        let cursor = Cursor::new(sample_data);
+        let mut framed = FramedRead::new(cursor, SseLineCodec::new());
+        if let Some(result) = framed.next().await {
+            match result {
+                Ok(_) => {
+                    panic!("Expected no message");
+                }
+                Err(e) => panic!("Unexpected error: {}", e),
+            }
+        } else {
+            // no message is emitted
+        }
+    }
+    #[tokio::test]
+    async fn test_message_with_multiple_data_lines() {
+        let sample_data = r#"data: {"message": "Line1"}
+data: {"message": "Line2"}
+"#;
+        let cursor = Cursor::new(sample_data);
+        let mut framed = FramedRead::new(cursor, SseLineCodec::new());
+        if let Some(result) = framed.next().await {
+            match result {
+                Ok(message) => {
+                    // got a message with data, but the data is junk
+                    let data = message.decode_data::<TestData>();
+                    assert!(data.is_err(), "Expected an error; got {:?}", data);
+                }
+                _ => panic!("Expected a message"),
+            }
+        } else {
+            panic!("Expected an error");
+        }
+    }
+    #[tokio::test]
+    async fn test_message_with_unrecognized_field() {
+        let sample_data = r#"unknown: value
+data: {"message": "Hello"}
+"#;
+        let cursor = Cursor::new(sample_data);
+        let mut framed = FramedRead::new(cursor, SseLineCodec::new());
+        if let Some(Ok(message)) = framed.next().await {
+            // Unrecognized fields are ignored
+            assert!(message.id.is_none());
+            assert!(message.event.is_none());
+            assert!(message.comments.is_none());
+            let data: TestData = message.decode_data().unwrap();
+            assert_eq!(data.message, "Hello".to_string());
+        } else {
+            panic!("Expected a message");
+        }
+    }
+    // data recorded on 2024-09-30 from
+    // + curl https://integrate.api.nvidia.com/v1/chat/completions -H 'Content-Type: application/json' \
+    //     -H 'Authorization: Bearer nvapi-<redacted>' -d '{
+    //     "model": "mistralai/mixtral-8x22b-instruct-v0.1",
+    //     "messages": [{"role":"user","content":"Write a limerick about the wonders of GPU computing."}],
+    //     "temperature": 0.5,
+    //     "top_p": 1,
+    //     "max_tokens": 64,
+    //     "stream": true
+    //   }'
+    const SAMPLE_CHAT_DATA: &str = r#"
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":"assistant","content":null},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"A"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" GPU"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" so"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" swift"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" and"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" so"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" clever"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":","},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"\n"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"In"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" comput"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"ations"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" it"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"'"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"s"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" quite"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" the"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" ende"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"avor"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"."},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"\n"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"With"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" its"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" thousands"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" of"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" co"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"res"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":","},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"\n"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"On"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" complex"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" tasks"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" it"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" ro"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"ars"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":","},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"\n"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"S"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"olving"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" problems"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" like"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" never"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":","},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":" forever"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":"!"},"logprobs":null,"finish_reason":null}]}
+data: {"id":"chat-e135180178ae4fe6a7a301aa65aaeea5","object":"chat.completion.chunk","created":1727750141,"model":"mistralai/mixtral-8x22b-instruct-v0.1","choices":[{"index":0,"delta":{"role":null,"content":""},"logprobs":null,"finish_reason":"stop","stop_reason":null}]}
+data: [DONE]
+"#;
+    #[tokio::test]
+    async fn test_openai_chat_stream() {
+        use crate::protocols::openai::chat_completions::ChatCompletionResponseDelta;
+        // let cursor = Cursor::new(SAMPLE_CHAT_DATA);
+        // let mut framed = FramedRead::new(cursor, SseLineCodec::new());
+        let mut stream = create_message_stream(SAMPLE_CHAT_DATA);
+        let mut counter = 0;
+        loop {
+            match stream.next().await {
+                Some(Ok(message)) => {
+                    let delta: ChatCompletionResponseDelta =
+                        serde_json::from_str(&message.data.unwrap()).unwrap();
+                    counter += 1;
+                    println!("counter: {}", counter);
+                    println!("delta: {:?}", delta);
+                }
+                Some(Err(e)) => {
+                    panic!("Error: {:?}", e);
+                }
+                None => {
+                    break;
+                }
+            }
+        }
+        assert_eq!(counter, 47);
+    }
+    #[test]
+    fn test_successful_conversion() {
+        let message = Message {
+            id: Some("123".to_string()),
+            event: Some("update".to_string()),
+            data: Some(r#"{"message": "Hello World"}"#.to_string()),
+            comments: Some(vec!["Some comment".to_string()]),
+        };
+        let annotated: Annotated<TestData> = message.try_into().unwrap();
+        assert_eq!(annotated.id, Some("123".to_string()));
+        assert_eq!(annotated.event, Some("update".to_string()));
+        assert_eq!(annotated.comment, Some(vec!["Some comment".to_string()]));
+        assert_eq!(
+            annotated.data,
+            Some(TestData {
+                message: "Hello World".to_string()
+            })
+        );
+    }
+    #[test]
+    fn test_error_event_with_comments() {
+        let message = Message {
+            id: Some("456".to_string()),
+            event: Some("error".to_string()),
+            data: Some("Error data".to_string()),
+            comments: Some(vec!["An error occurred".to_string()]),
+        };
+        let result: Result<Annotated<TestData>, _> = message.try_into();
+        assert!(result.is_err());
+        assert_eq!(result.unwrap_err(), "An error occurred".to_string());
+    }
+    #[test]
+    fn test_error_event_without_comments() {
+        let message = Message {
+            id: Some("789".to_string()),
+            event: Some("error".to_string()),
+            data: Some("Error data".to_string()),
+            comments: None,
+        };
+        let result: Result<Annotated<TestData>, _> = message.try_into();
+        assert!(result.is_err());
+    }
+    #[test]
+    fn test_invalid_json_data() {
+        let message = Message {
+            id: None,
+            event: Some("update".to_string()),
+            data: Some("Invalid JSON".to_string()),
+            comments: None,
+        };
+        let result: Result<Annotated<TestData>, _> = message.try_into();
+        assert!(result.is_err());
+    }
+    #[test]
+    fn test_missing_data_field() {
+        let message = Message {
+            id: None,
+            event: Some("update".to_string()),
+            data: None,
+            comments: None,
+        };
+        let result: Result<Annotated<TestData>, _> = message.try_into();
+        assert!(result.is_ok());
+        let annotated = result.unwrap();
+        assert!(annotated.data.is_none());
+        assert_eq!(annotated.event, Some("update".to_string()));
+    }
+}
--- a/llm/rust/triton-llm/src/protocols/common.rs
+++ b/llm/rust/triton-llm/src/protocols/common.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//! Engine Protocols
+//! ================
+//!
+//! This module contains the protocols in public API for the LLM Engine and AsyncEngine facades.
+//!
+//! The core components are the `CompletionRequest` and `StreamingCompletionResponse` objects.
+//!
+//! The `StreamingCompletionResponse` objects are the outputs of the LLM Engine; however, we
+//! need some additional information to propagate intermediate results for improved observability.
+//! The metadata is transferred via the other arms of the `StreamingResponse` enum.
+//!
+use anyhow::Result;
+use derive_builder::Builder;
+use serde::ser::SerializeStruct;
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
+use std::collections::HashMap;
+use std::time::SystemTime;
+use super::TokenIdType;
+pub mod kv_routing;
+pub mod llm_backend;
+pub mod postprocessor;
+pub mod preprocessor;
+/// SamplingOptionsProvider is a trait that allows the caller to extract the sampling options from
+/// the object that implements it. This will mutate the object.
+pub trait SamplingOptionsProvider {
+    fn extract_sampling_options(&self) -> Result<SamplingOptions>;
+}
+pub trait StopConditionsProvider {
+    fn extract_stop_conditions(&self) -> Result<StopConditions>;
+}
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+pub enum FinishReason {
+    #[serde(rename = "eos")]
+    EoS,
+    #[serde(rename = "length")]
+    Length,
+    #[serde(rename = "stop")]
+    Stop,
+    #[serde(rename = "error")]
+    Error(String),
+    #[serde(rename = "cancelled")]
+    Cancelled,
+}
+/// LLM Inference Engines can accept a variety of input types. Not all Engines will support all
+/// input types. For example, the trtllm::AsyncEngine only supports `PromptType::Tokens` as an
+/// input type. The higher-level `Backend` class is a general wrapper around Engines that will
+/// enable many of the input options that require pre/postprocessing.
+#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
+pub enum PromptType {
+    /// If allowed, this input type allowed the caller to pass a list of token_ids directly to the
+    /// inference engine. This is an advanced feature that requires the caller to handle all of the
+    /// necessary prompt formatting and tokenization.
+    #[serde(rename = "token_ids")]
+    TokenIds(Vec<TokenIdType>),
+    /// If allowed, the raw text will be tokenized and converted to token_ids without any additional
+    /// preprocessing. This is an advanced features that requires the caller to correctly format the
+    /// prompt as defined by the model.
+    #[serde(rename = "raw")]
+    Raw(String),
+    /// If allowed, the `CompletionContext` will be preprocessed server-side. If the `Model` trait
+    /// `requires_prompt_template` returns true then the `CompletionContext` will be used to
+    /// to render the formatted prompt from the template. `Completion` is the preferred `PromptType`
+    /// for single turn completions.
+    #[serde(rename = "completion")]
+    Completion(CompletionContext),
+    /// If allowed, the `ChatContext` will be preprocessed server-side. Most chat models will have
+    /// a predefined prompt format/structure. If the `Model` trait `requires_prompt_template` returns
+    /// true then the `ChatContext` will be used to to render the formatted prompt from the template.
+    /// `ChatCompletion` is the preferred `PromptType` for multi-turn completions.
+    #[serde(rename = "chat_completion")]
+    ChatCompletion(ChatContext),
+    /// If allowed, then `Model::requires_prompt_template()` must also return true. The `serde_json::Value`
+    /// will be passed directly the prompt template. This allows for a complete generic data model and
+    /// prompt template to be passed to be defined and used by the server.
+    #[serde(rename = "custom_json")]
+    CustomJson(serde_json::Value),
+}
+/// TensorRT LLM does not perform preprocessing or postprocessing. The input_ids / token_ids
+/// are expected to be preprocessed by the client. The client is responsible for constructing
+/// the model specific prompt template and applying the tokenizer.
+///
+/// TensorRT LLM will perform some server side postprocessing to ensure that generation is
+/// efficiently stopped. See `StopConditions` below.
+#[derive(Serialize, Deserialize, Debug, Clone, Builder)]
+pub struct CompletionRequest {
+    /// Type of prompt
+    pub prompt: PromptType,
+    /// StopConditions are conditions that the inference engine will use to stop generation.
+    pub stop_conditions: StopConditions,
+    /// SamplingOptions directs the inference engine to use sampling instead of greedy decoding.
+    /// More documentation on how and on the order in which sampling options are applied
+    /// are needed.
+    pub sampling_options: SamplingOptions,
+    /// The computed checksum of the Model Deployment Card (MDC).
+    #[builder(default)]
+    pub mdc_sum: Option<String>,
+    /// User requested annotations for the request
+    #[builder(default)]
+    pub annotations: Option<Vec<String>>,
+}
+impl CompletionRequest {
+    pub fn builder() -> CompletionRequestBuilder {
+        CompletionRequestBuilder::default()
+    }
+}
+#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
+/// Defines the prompt template and system prompt for a completion request.
+/// If the model does not support prompt templates, the system_prompt will be ignored.
+pub struct CompletionContext {
+    /// Prompt sent by the user
+    pub prompt: String,
+    /// Optional system_prompt for models that support prompt templates with system_prompts.
+    pub system_prompt: Option<String>,
+}
+/// ChatTurn is a struct that contains the user and assistant messages in a chat.
+#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
+pub struct ChatTurn {
+    /// The user message
+    pub user: String,
+    /// The assistant response
+    pub assistant: String,
+}
+/// ChatContext is a struct that contains the role and context of a chat message
+/// along with a flattened CompletionContext.
+#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
+pub struct ChatContext {
+    /// CompletionContext for this chat turn
+    #[serde(flatten)]
+    pub completion: CompletionContext,
+    /// The history/context of the user and assistant messages in the chat context
+    pub context: Vec<ChatTurn>,
+}
+/// TensorRT LLM server-side stop conditions. These options allow for the server to evaluate
+/// the generated sequence and stop generation if the sequence meets a stop condition.
+#[derive(Serialize, Deserialize, Debug, Clone, Default)]
+pub struct StopConditions {
+    /// The maximum number of tokens to generate
+    pub max_tokens: Option<u32>,
+    /// List of strings that stop the generation when they are generated.
+    /// The returned output will not contain the stop strings.
+    pub stop: Option<Vec<String>>,
+    /// List of tokens that stop the generation when they are
+    /// generated. The returned output will NOT contain the stop tokens.
+    pub stop_token_ids_hidden: Option<Vec<TokenIdType>>,
+    /// The minimum number of tokens to generate
+    /// To ignore_eos, set min_tokens to max_tokens
+    pub min_tokens: Option<u32>,
+    /// Whether to ignore the EOS token and continue generating
+    /// tokens after the EOS token is generated.
+    // TODO(ignore_eos) - improve this my masking the EOS token with logit bias
+    pub ignore_eos: Option<bool>,
+}
+impl StopConditions {
+    pub fn apply_ignore_eos(&mut self) {
+        if self.ignore_eos.unwrap_or(false) {
+            self.min_tokens = self.max_tokens;
+            self.stop = None;
+            self.stop_token_ids_hidden = None;
+        }
+    }
+}
+/// Temperature range for sampling.
+pub const TEMPERATURE_RANGE: (f32, f32) = (0.0, 1.0);
+/// Top P range for sampling.
+pub const TOP_P_RANGE: (f32, f32) = (0.0, 1.0);
+/// Frequency Penalty range for sampling.
+pub const FREQUENCY_PENALTY_RANGE: (f32, f32) = (-1.0, 1.0);
+/// Collection of options that control the sampling behavior of the inference engine.
+#[derive(Serialize, Deserialize, Debug, Clone, Default)]
+pub struct SamplingOptions {
+    /// Number of output sequences to return for the given prompt
+    pub n: Option<i32>,
+    /// Number of output sequences that are generated from the prompt.
+    /// From these `best_of` sequences, the top `n` sequences are returned.
+    /// `best_of` must be greater than or equal to `n`. This is treated as
+    /// the beam width when `use_beam_search` is True. By default, `best_of`
+    /// is set to `n`.
+    pub best_of: Option<i32>,
+    /// Float that penalizes new tokens based on whether they
+    /// appear in the generated text so far. Values > 0 encourage the model
+    /// to use new tokens, while values < 0 encourage the model to repeat
+    /// tokens.
+    pub presence_penalty: Option<f32>,
+    /// Float that penalizes new tokens based on their
+    /// frequency in the generated text so far. Values > 0 encourage the
+    /// model to use new tokens, while values < 0 encourage the model to
+    /// repeat tokens.
+    pub frequency_penalty: Option<f32>,
+    /// Float that penalizes new tokens based on whether
+    /// they appear in the prompt and the generated text so far. Values > 1
+    /// encourage the model to use new tokens, while values < 1 encourage
+    /// the model to repeat tokens.
+    pub repetition_penalty: Option<f32>,
+    /// Float that controls the randomness of the sampling. Lower
+    /// values make the model more deterministic, while higher values make
+    /// the model more random. Zero means greedy sampling.
+    pub temperature: Option<f32>,
+    /// Float that controls the cumulative probability of the top tokens
+    /// to consider. Must be in (0, 1]. Set to 1 to consider all tokens.
+    pub top_p: Option<f32>,
+    /// Integer that controls the number of top tokens to consider. Set
+    /// to -1 to consider all tokens.
+    pub top_k: Option<i32>,
+    /// Float that represents the minimum probability for a token to be
+    /// considered, relative to the probability of the most likely token.
+    /// Must be in [0, 1]. Set to 0 to disable this.
+    pub min_p: Option<f32>,
+    /// Whether to use beam search instead of sampling.
+    pub use_beam_search: Option<bool>,
+    /// Float that penalizes sequences based on their length.
+    /// Used in beam search.
+    pub length_penalty: Option<f32>,
+    /// The seed to use when sampling
+    pub seed: Option<i64>,
+}
+impl SamplingOptions {
+    pub fn force_greedy(&mut self) {
+        self.presence_penalty = None;
+        self.frequency_penalty = None;
+        self.repetition_penalty = None;
+        self.temperature = None;
+        self.top_p = None;
+        self.top_k = None;
+        self.min_p = None;
+    }
+}
+/// Collection of options that control what information the inference engine returns in the response.
+#[derive(Serialize, Deserialize, Debug, Clone, Default)]
+pub struct OutputOptions {
+    /// Number of log probabilities to return per output token.
+    /// Note that the implementation follows the OpenAI API: The return
+    /// result includes the log probabilities on the `logprobs` most likely
+    /// tokens, as well the chosen tokens. The API will always return the
+    /// log probability of the sampled token, so there  may be up to
+    /// `logprobs+1` elements in the response
+    pub logprobs: Option<u32>,
+    /// Number of log probabilities to return per prompt token.
+    pub prompt_logprobs: Option<u32>,
+    /// Whether to skip special tokens in the output.
+    /// spaces_between_special_tokens: Whether to add spaces between special
+    /// tokens in the output.  Defaults to True.
+    pub skip_special_tokens: Option<bool>,
+    /// If true, the Context object will contain the prompt that was pass to
+    /// the tokenizer. This is useful for inspecting the behavior of prompt
+    /// templates that are applied during the backend preprocessing.
+    pub formatted_prompt: Option<bool>,
+}
+// Struct for log probability information
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ChatCompletionLogprobs {
+    /// A list of message content tokens with log probability information.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub content: Option<Vec<ChatCompletionTokenLogprob>>,
+    /// A list of message refusal tokens with log probability information.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub refusal: Option<Vec<ChatCompletionTokenLogprob>>,
+}
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ChatCompletionTokenLogprob {
+    /// The token.
+    pub token: String,
+    /// The log probability of this token, if it is within the top 20 most likely tokens.
+    /// Otherwise, the value `-9999.0` signifies that the token is very unlikely.
+    pub logprob: f64,
+    /// A list of integers representing the UTF-8 bytes representation of the token.
+    /// Useful in instances where characters are represented by multiple tokens and their
+    /// byte representations must be combined to generate the correct text representation.
+    /// Can be `None` if there is no bytes representation for the token.
+    pub bytes: Option<Vec<u8>>,
+    /// List of the most likely tokens and their log probability, at this token position.
+    /// In rare cases, there may be fewer than the requested number of `top_logprobs` returned.
+    pub top_logprobs: Vec<TopLogprob>,
+}
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct TopLogprob {
+    /// The token.
+    pub token: String,
+    /// The log probability of this token.
+    pub logprob: f64,
+    /// A list of integers representing the UTF-8 bytes representation of the token.
+    /// Can be `None` if there is no bytes representation for the token.
+    pub bytes: Option<Vec<u8>>,
+}
+// /// UserData is a struct that contains user-defined data that can be passed to the inference engine.
+// /// This information will be use to annotate the distributed traces for improved observability.
+// #[derive(Serialize, Deserialize, Debug, Clone, Default)]
+// pub struct UserData {
+//     /// Apply server-side prompt template to the request
+//     pub request_uuid: Option<uuid::Uuid>,
+// }
+/// StreamingResponse is the primary response object for the LLM Engine. The response stream
+/// can emit three different types of messages. The Initialize and Finalize messages are optional
+/// and primarily used over disaggreated transports to move states from the server to the client.
+#[derive(Serialize, Deserialize, Debug)]
+pub enum StreamingResponse {
+    /// Initialize transports a Prologue object which communication the LLM Engine Context
+    Initialize(Option<Prologue>),
+    /// Step is the primary data in the response stream. It contains the StreamingCompletionResponse
+    Step(Box<StreamingCompletionResponse>),
+    /// Finalize is an optional final message in the response stream. It contains the Epilogue object which
+    /// is used to communicate extra information about the completion and the engine statistics.
+    Finalize(Option<Epilogue>),
+}
+// TODO(ryan) - this should be part of the internal api as it is not deserializble
+//              the public API should drop the Option<Arc<Stats>> in favor of Option<Stats>
+//              the two variants both serialize to the same json; however, the internal version
+//              can not be deserialized directly.
+//              we use the internal one on the server side to avoid the cost of cloning the Stats
+//              object; however, client side, we should always fully materialize the Stats object.
+//
+// TODO(ryan) - update this object to use an enum where we have the current definition be the
+//              StepResponse arm; then we will add the following arms:
+//              - Initialize(Prologue)
+//              - Step()
+//              - Finalize(Epilogue)
+/// This is the first message that will be emitted by an Engine Response Stream
+/// It indicates that the request has been preprocessed and queued for execution on the backend.
+#[derive(Serialize, Deserialize, Debug)]
+pub struct Prologue {
+    /// If the request was preprocessed with a prompt template, this will contain the formatted prompt
+    pub formatted_prompt: Option<String>,
+    /// If the request did not contain TokenIds, this will contain the token_ids that were generated
+    /// from tokenizing the prompt.
+    pub input_token_ids: Option<Vec<TokenIdType>>,
+}
+/// This is the final message that will be emitted by a Engine Response Stream when it
+/// finishes without error. In some cases, the engine may emit an error which will indicate
+/// the end of the steam. Another case in which an Finalize(Epilogue) will not be emitted is
+/// if the response handler has stalled and too many responses
+#[derive(Serialize, Deserialize, Debug)]
+pub struct Epilogue {}
+#[derive(Debug)]
+pub struct StreamingCompletionResponse {
+    pub delta: Delta,
+    pub logprobs: Option<ChatCompletionLogprobs>,
+}
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub enum StreamState {
+    Active,
+    Finished(FinishReason),
+}
+#[derive(Serialize, Deserialize, Debug, Clone)]
+#[serde(rename_all = "snake_case")]
+pub enum Logits {
+    All(Vec<f32>),
+    Sparse(Vec<(u32, f32)>),
+}
+#[derive(Serialize, Deserialize, Debug, Clone)]
+#[serde(rename_all = "snake_case")]
+pub enum LogProbs {
+    Normalized(Logits),
+    Raw(Logits),
+}
+/// At each SequencePosition we hold position specific data
+pub struct SequencePositionData {
+    pub token_id: TokenIdType,
+    /// The log probability of the token
+    pub logprobs: Option<LogProbs>,
+}
+// todo(ryan) - we need to create a DeltaBuilder which is a mutable object that can be passed
+// around from the low-level compute engine to the high-level api. The DeltaBuilder will allow
+// us to construct the Delta object at multiple layers in the streaming response path.
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
+pub struct Delta {
+    pub is_complete: bool,
+    pub finish_reason: Option<FinishReason>,
+    // new token_ids
+    pub token_ids: Option<Vec<u32>>,
+    // tokens
+    pub tokens: Option<Vec<String>>,
+    // decoded text
+    pub text: Option<String>,
+    // current sequence length
+    // when stream, we expect this to increase by 1 on each response
+    pub sequence_length: Option<usize>,
+    // if the number of slots for a given request is greater than 1
+    // this indicates the index of the slot for the response
+    pub index: Option<usize>,
+    /// cumulative log probabilities
+    pub cum_log_probs: Option<f64>,
+    /// error message from engine
+    /// if this is set, is_complete should also be true
+    pub err_msg: Option<String>,
+    /// usage info
+    pub usage: Option<Usage>,
+}
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
+pub struct Usage {
+    pub input_tokens_count: usize,
+    pub output_tokens_count: usize,
+}
+// todo(ryan) - we need to update this object to make it more generic
+// we need to define a set of generic stats traits that allow those stats to be None
+// then back them by a concrete implementation like a TrtllmStats object
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
+pub struct Stats {
+    /// Time since the last Epoch/Forward Pass in microseconds (us).
+    /// This is measured and recorded by the Response Router rather then the
+    /// Inference Engine. Note, when evaluating the responses, if the this
+    /// values is greater then the stream's measured value, then there was a gap
+    /// between forward passes. In normal operation, the value of this field should
+    /// be less than the recorded value on the response stream.
+    pub time_since_last_forward_pass_us: Option<u64>,
+    pub request_active_count: u32,
+    pub request_context_count: u32,
+    pub request_generation_count: u32,
+    pub request_scheduled_count: u32,
+    pub request_max_count: u32,
+    pub kv_free_cache_blocks: u64,
+    pub kv_max_cache_blocks: u64,
+    pub kv_used_cache_blocks: u64,
+    pub kv_tokens_per_cache_block: u64,
+    pub runtime_cpu_memory_usage: u64,
+    pub runtime_gpu_memory_usage: u64,
+    pub runtime_pinned_memory_usage: u64,
+    pub iteration_counter: u64,
+    pub microbatch_id: u64,
+    pub total_context_tokens: u32,
+    pub timestamp: String,
+}
+impl Serialize for StreamingCompletionResponse {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let mut state = serializer.serialize_struct("StreamingCompletionResponse", 2)?;
+        // Serialize `delta` field
+        state.serialize_field("delta", &self.delta)?;
+        state.end()
+    }
+}
+impl<'de> Deserialize<'de> for StreamingCompletionResponse {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        // Create a temporary struct for deserialization
+        #[derive(Deserialize)]
+        struct TempResponse {
+            delta: Delta,
+            logprobs: Option<ChatCompletionLogprobs>,
+        }
+        let TempResponse { delta, logprobs } = TempResponse::deserialize(deserializer)?;
+        Ok(StreamingCompletionResponse { delta, logprobs })
+    }
+}
+#[derive(Serialize, Deserialize, Debug)]
+pub struct ScatterData<T> {
+    pub x: Vec<T>,
+    pub y: Vec<T>,
+}
+#[derive(Serialize, Deserialize, Debug)]
+pub struct Trace {
+    pub time_to_first_token: u64,
+    pub token_to_token: Vec<u64>,
+    pub start: SystemTime,
+    pub complete: SystemTime,
+    pub initial_tokens: u32,
+    pub max_tokens: u32,
+    pub t2ft_iteration_count: u64,
+    pub t2t_iteration_count: Vec<u64>,
+}
+#[derive(Serialize, Deserialize, Debug)]
+pub struct PerformanceModel {
+    // linear regression parameters fitting t2ft vs. initial tokens
+    pub t2ft_intercept: f64,
+    pub t2ft_slope: f64,
+    // linear regression parameters fitting t2tl vs. initial tokens
+    pub t2tl_intercept: f64,
+    pub t2tl_slope: f64,
+    // r2 values from the regression
+    pub t2ft_fit_r2: f64,
+    pub t2tl_fit_r2: f64,
+}
+#[derive(Serialize, Deserialize, Debug)]
+pub struct CalibrationResults {
+    pub effective_flops: f64,
+    pub effective_memory_bandwidth: f64,
+    pub max_q: u32,
+    pub performance_model: PerformanceModel,
+    pub traces: Vec<Trace>,
+    pub t2ft_scatter_data: ScatterData<f64>,
+    pub t2tl_scatter_data: ScatterData<f64>,
+}
+#[derive(Serialize, Deserialize, Debug)]
+pub struct LoadgenResults {
+    pub stats_by_iteration: HashMap<u64, Stats>,
+    pub traces: Vec<Trace>,
+}
+impl CompletionContext {
+    /// Create a new CompletionContext
+    pub fn new(prompt: String, system_prompt: Option<String>) -> Self {
+        Self {
+            prompt,
+            system_prompt,
+        }
+    }
+    /// Create a new CompletionContext with only a prompt
+    pub fn from_prompt(prompt: String) -> Self {
+        Self {
+            prompt,
+            system_prompt: None,
+        }
+    }
+    /// Create a new CompletionContext with a prompt and system prompt
+    pub fn with_system_prompt(prompt: String, system_prompt: String) -> Self {
+        Self {
+            prompt,
+            system_prompt: Some(system_prompt),
+        }
+    }
+}
+// todo(ryan) - create a builder for chat context
+impl From<CompletionContext> for PromptType {
+    fn from(context: CompletionContext) -> Self {
+        PromptType::Completion(context)
+    }
+}
+#[cfg(test)]
+mod tests {
+    use serde_json;
+    use super::*;
+    #[test]
+    fn test_completion_context_new() {
+        let prompt = "Hello, world!".to_string();
+        let system_prompt = Some("This is a system prompt.".to_string());
+        let context = CompletionContext::new(prompt.clone(), system_prompt.clone());
+        assert_eq!(context.prompt, prompt);
+        assert_eq!(context.system_prompt, system_prompt);
+    }
+    #[test]
+    fn test_completion_context_from_prompt() {
+        let prompt = "Hello, world!".to_string();
+        let context = CompletionContext::from_prompt(prompt.clone());
+        assert_eq!(context.prompt, prompt);
+        assert_eq!(context.system_prompt, None);
+    }
+    #[test]
+    fn test_completion_context_with_system_prompt() {
+        let prompt = "Hello, world!".to_string();
+        let system_prompt = "This is a system prompt.".to_string();
+        let context = CompletionContext::with_system_prompt(prompt.clone(), system_prompt.clone());
+        assert_eq!(context.prompt, prompt);
+        assert_eq!(context.system_prompt, Some(system_prompt));
+    }
+    #[test]
+    fn test_completion_context_into_prompt_type() {
+        let prompt = "Hello, world!".to_string();
+        let system_prompt = "This is a system prompt.".to_string();
+        let context = CompletionContext::with_system_prompt(prompt.clone(), system_prompt.clone());
+        let prompt_type: PromptType = context.into();
+        if let PromptType::Completion(completion_context) = prompt_type {
+            assert_eq!(completion_context.prompt, prompt);
+            assert_eq!(completion_context.system_prompt, Some(system_prompt));
+        } else {
+            panic!("Expected a Completion variant");
+        }
+    }
+    // #[test]
+    // fn test_serialize_with_stats() {
+    //     let response = StreamingCompletionResponse {
+    //         delta: Delta {
+    //             is_complete: true,
+    //             finish_reason: Some(FinishReason::Length),
+    //             token_ids: Some(vec![101, 102, 103]),
+    //             tokens: Some(vec!["token1".to_string(), "token2".to_string()]),
+    //             text: Some("example text".to_string()),
+    //             sequence_length: Some(3),
+    //             index: Some(0),
+    //             cum_log_probs: Some(-0.5),
+    //             err_msg: None,
+    //             usage: None,
+    //         },
+    //         logprobs: None,
+    //     };
+    //     // Serialize the response
+    //     let serialized = serde_json::to_string(&response).expect("Failed to serialize");
+    //     // Expected JSON string (simplified)
+    //     let expected = r#"{
+    //         "delta": {
+    //             "is_complete": true,
+    //             "finish_reason": "length",
+    //             "token_ids": [101, 102, 103],
+    //             "tokens": ["token1", "token2"],
+    //             "text": "example text",
+    //             "sequence_length": 3,
+    //             "index": 0,
+    //             "cum_log_probs": -0.5,
+    //             "err_msg": null,
+    //             "usage": null
+    //         },
+    //         "stats": {
+    //             "time_since_last_forward_pass_us": 1000,
+    //             "request_active_count": 2,
+    //             "request_context_count": 1,
+    //             "request_generation_count": 3,
+    //             "request_scheduled_count": 1,
+    //             "request_max_count": 10,
+    //             "kv_free_cache_blocks": 500,
+    //             "kv_max_cache_blocks": 1000,
+    //             "kv_used_cache_blocks": 500,
+    //             "kv_tokens_per_cache_block": 10,
+    //             "runtime_cpu_memory_usage": 5000,
+    //             "runtime_gpu_memory_usage": 2000,
+    //             "runtime_pinned_memory_usage": 1000,
+    //             "iteration_counter": 5,
+    //             "microbatch_id": 12345,
+    //             "total_context_tokens": 256,
+    //             "timestamp": "2024-01-01T00:00:00Z"
+    //         }
+    //     }"#;
+    //     assert_eq!(
+    //         serde_json::from_str::<serde_json::Value>(&serialized).unwrap(),
+    //         serde_json::from_str::<serde_json::Value>(expected).unwrap()
+    //     );
+    // }
+    #[test]
+    fn test_serialize_without_stats() {
+        let response = StreamingCompletionResponse {
+            delta: Delta {
+                is_complete: false,
+                finish_reason: None,
+                token_ids: None,
+                tokens: None,
+                text: None,
+                sequence_length: None,
+                index: None,
+                cum_log_probs: None,
+                err_msg: None,
+                usage: None,
+            },
+            logprobs: None,
+        };
+        // Serialize the response
+        let serialized = serde_json::to_string(&response).expect("Failed to serialize");
+        // Expected JSON string
+        let expected = r#"{
+            "delta": {
+                "is_complete": false,
+                "finish_reason": null,
+                "token_ids": null,
+                "tokens": null,
+                "text": null,
+                "sequence_length": null,
+                "index": null,
+                "cum_log_probs": null,
+                "err_msg": null,
+                "usage": null
+            }
+        }"#;
+        assert_eq!(
+            serde_json::from_str::<serde_json::Value>(&serialized).unwrap(),
+            serde_json::from_str::<serde_json::Value>(expected).unwrap()
+        );
+    }
+    // #[test]
+    // fn test_deserialize_with_stats() {
+    //     let json_data = r#"{
+    //         "delta": {
+    //             "is_complete": true,
+    //             "finish_reason": "length",
+    //             "token_ids": [101, 102, 103],
+    //             "tokens": ["token1", "token2"],
+    //             "text": "example text",
+    //             "sequence_length": 3,
+    //             "index": 0,
+    //             "cum_log_probs": -0.5,
+    //             "err_msg": null,
+    //             "usage": null
+    //         },
+    //         "stats": {
+    //             "time_since_last_forward_pass_us": 1000,
+    //             "request_active_count": 2,
+    //             "request_context_count": 1,
+    //             "request_generation_count": 3,
+    //             "request_scheduled_count": 1,
+    //             "request_max_count": 10,
+    //             "kv_free_cache_blocks": 500,
+    //             "kv_max_cache_blocks": 1000,
+    //             "kv_used_cache_blocks": 500,
+    //             "kv_tokens_per_cache_block": 10,
+    //             "runtime_cpu_memory_usage": 5000,
+    //             "runtime_gpu_memory_usage": 2000,
+    //             "runtime_pinned_memory_usage": 1000,
+    //             "iteration_counter": 5,
+    //             "microbatch_id": 12345,
+    //             "total_context_tokens": 256,
+    //             "timestamp": "2024-01-01T00:00:00Z"
+    //         }
+    //     }"#;
+    //     // Deserialize the JSON string
+    //     let deserialized: StreamingCompletionResponse =
+    //         serde_json::from_str(json_data).expect("Failed to deserialize");
+    //     // Expected response object
+    //     let expected = StreamingCompletionResponse {
+    //         delta: Delta {
+    //             is_complete: true,
+    //             finish_reason: Some(FinishReason::Length),
+    //             token_ids: Some(vec![101, 102, 103]),
+    //             tokens: Some(vec!["token1".to_string(), "token2".to_string()]),
+    //             text: Some("example text".to_string()),
+    //             sequence_length: Some(3),
+    //             index: Some(0),
+    //             cum_log_probs: Some(-0.5),
+    //             err_msg: None,
+    //             usage: None,
+    //         },
+    //         logprobs: None,
+    //     };
+    //     // This is wieldy but we can no longer do assert_eq!(deserialized, expected);
+    //     // because the struct no longer has the PartialEq trait
+    //     assert_eq!(deserialized.delta.is_complete, expected.delta.is_complete);
+    //     assert_eq!(
+    //         deserialized.delta.finish_reason,
+    //         expected.delta.finish_reason
+    //     );
+    //     assert_eq!(deserialized.delta.token_ids, expected.delta.token_ids);
+    //     assert_eq!(deserialized.delta.tokens, expected.delta.tokens);
+    //     assert_eq!(deserialized.delta.text, expected.delta.text);
+    //     assert_eq!(
+    //         deserialized.delta.sequence_length,
+    //         expected.delta.sequence_length
+    //     );
+    //     assert_eq!(deserialized.delta.index, expected.delta.index);
+    //     assert_eq!(
+    //         deserialized.delta.cum_log_probs,
+    //         expected.delta.cum_log_probs
+    //     );
+    //     assert_eq!(deserialized.delta.err_msg, expected.delta.err_msg);
+    //     assert_eq!(deserialized.delta.usage, expected.delta.usage);
+    //     assert_eq!(
+    //         deserialized_stats.time_since_last_forward_pass_us,
+    //         expected_stats.time_since_last_forward_pass_us
+    //     );
+    //     assert_eq!(
+    //         deserialized_stats.request_active_count,
+    //         expected_stats.request_active_count
+    //     );
+    //     assert_eq!(
+    //         deserialized_stats.request_context_count,
+    //         expected_stats.request_context_count
+    //     );
+    //     assert_eq!(
+    //         deserialized_stats.request_generation_count,
+    //         expected_stats.request_generation_count
+    //     );
+    //     assert_eq!(
+    //         deserialized_stats.request_scheduled_count,
+    //         expected_stats.request_scheduled_count
+    //     );
+    //     assert_eq!(
+    //         deserialized_stats.request_max_count,
+    //         expected_stats.request_max_count
+    //     );
+    //     assert_eq!(
+    //         deserialized_stats.kv_free_cache_blocks,
+    //         expected_stats.kv_free_cache_blocks
+    //     );
+    //     assert_eq!(
+    //         deserialized_stats.kv_max_cache_blocks,
+    //         expected_stats.kv_max_cache_blocks
+    //     );
+    //     assert_eq!(
+    //         deserialized_stats.kv_used_cache_blocks,
+    //         expected_stats.kv_used_cache_blocks
+    //     );
+    //     assert_eq!(
+    //         deserialized_stats.kv_tokens_per_cache_block,
+    //         expected_stats.kv_tokens_per_cache_block
+    //     );
+    //     assert_eq!(
+    //         deserialized_stats.runtime_cpu_memory_usage,
+    //         expected_stats.runtime_cpu_memory_usage
+    //     );
+    //     assert_eq!(
+    //         deserialized_stats.runtime_gpu_memory_usage,
+    //         expected_stats.runtime_gpu_memory_usage
+    //     );
+    //     assert_eq!(
+    //         deserialized_stats.runtime_pinned_memory_usage,
+    //         expected_stats.runtime_pinned_memory_usage
+    //     );
+    //     assert_eq!(
+    //         deserialized_stats.iteration_counter,
+    //         expected_stats.iteration_counter
+    //     );
+    //     assert_eq!(
+    //         deserialized_stats.microbatch_id,
+    //         expected_stats.microbatch_id
+    //     );
+    //     assert_eq!(
+    //         deserialized_stats.total_context_tokens,
+    //         expected_stats.total_context_tokens
+    //     );
+    //     assert_eq!(deserialized_stats.timestamp, expected_stats.timestamp);
+    // }
+    #[test]
+    fn test_deserialize_without_stats() {
+        let json_data = r#"{
+            "delta": {
+                "is_complete": false,
+                "finish_reason": null,
+                "token_ids": null,
+                "tokens": null,
+                "text": null,
+                "sequence_length": null,
+                "index": null,
+                "cum_log_probs": null,
+                "err_msg": null,
+                "usage": null
+            }
+        }"#;
+        // Deserialize the JSON string
+        let deserialized: StreamingCompletionResponse =
+            serde_json::from_str(json_data).expect("Failed to deserialize");
+        // Expected response object
+        let expected = StreamingCompletionResponse {
+            delta: Delta {
+                is_complete: false,
+                finish_reason: None,
+                token_ids: None,
+                tokens: None,
+                text: None,
+                sequence_length: None,
+                index: None,
+                cum_log_probs: None,
+                err_msg: None,
+                usage: None,
+            },
+            logprobs: None,
+        };
+        // This is wieldy but we can no longer do assert_eq!(deserialized, expected);
+        // because the struct no longer has the PartialEq trait
+        assert_eq!(deserialized.delta.is_complete, expected.delta.is_complete);
+        assert_eq!(
+            deserialized.delta.finish_reason,
+            expected.delta.finish_reason
+        );
+        assert_eq!(deserialized.delta.token_ids, expected.delta.token_ids);
+        assert_eq!(deserialized.delta.tokens, expected.delta.tokens);
+        assert_eq!(deserialized.delta.text, expected.delta.text);
+        assert_eq!(
+            deserialized.delta.sequence_length,
+            expected.delta.sequence_length
+        );
+        assert_eq!(deserialized.delta.index, expected.delta.index);
+        assert_eq!(
+            deserialized.delta.cum_log_probs,
+            expected.delta.cum_log_probs
+        );
+        assert_eq!(deserialized.delta.err_msg, expected.delta.err_msg);
+        assert_eq!(deserialized.delta.usage, expected.delta.usage);
+    }
+    #[test]
+    fn test_serialize_delta_and_none_stats() {
+        let response = StreamingCompletionResponse {
+            delta: Delta {
+                is_complete: true,
+                finish_reason: Some(FinishReason::Length),
+                token_ids: Some(vec![101, 102, 103]),
+                tokens: Some(vec!["token1".to_string(), "token2".to_string()]),
+                text: Some("example text".to_string()),
+                sequence_length: Some(3),
+                index: Some(0),
+                cum_log_probs: Some(-0.5),
+                err_msg: None,
+                usage: None,
+            },
+            logprobs: None,
+        };
+        // Serialize the response
+        let serialized = serde_json::to_string(&response).expect("Failed to serialize");
+        // Expected JSON string where stats is null
+        let expected_json = r#"{
+            "delta": {
+                "is_complete": true,
+                "finish_reason": "length",
+                "token_ids": [101, 102, 103],
+                "tokens": ["token1", "token2"],
+                "text": "example text",
+                "sequence_length": 3,
+                "index": 0,
+                "cum_log_probs": -0.5,
+                "err_msg": null,
+                "usage": null
+            }
+        }"#;
+        // Parse both the serialized response and the expected JSON as serde_json::Value for easy comparison
+        assert_eq!(
+            serde_json::from_str::<serde_json::Value>(&serialized).unwrap(),
+            serde_json::from_str::<serde_json::Value>(expected_json).unwrap()
+        );
+    }
+}
--- a/llm/rust/triton-llm/src/protocols/common/kv_routing.rs
+++ b/llm/rust/triton-llm/src/protocols/common/kv_routing.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+use serde::{Deserialize, Serialize};
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct ForwardPassMetrics {
+    pub request_active_slots: u64,
+    pub request_total_slots: u64,
+    pub kv_active_blocks: u64,
+    pub kv_total_blocks: u64,
+}
--- a/llm/rust/triton-llm/src/protocols/common/llm_backend.rs
+++ b/llm/rust/triton-llm/src/protocols/common/llm_backend.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+use serde::{Deserialize, Serialize};
+use crate::protocols::TokenIdType;
+pub type TokenType = Option<String>;
+pub type LogProbs = Vec<f64>;
+pub use super::preprocessor::PreprocessedRequest as BackendInput;
+pub use super::FinishReason;
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
+pub struct BackendOutput {
+    /// New token_ids generated from the LLM Engine
+    pub token_ids: Vec<TokenIdType>,
+    /// Unlike [`LLMEngineOutput::tokens`], this is a vector of tokens, not an optional.
+    /// The size of this vector should be the same as the size of `token_ids`.
+    pub tokens: Vec<TokenType>,
+    /// Decoded text from the list tokens.
+    pub text: Option<String>,
+    /// Optional cumulative log probabilities
+    pub cum_log_probs: Option<f64>,
+    /// Optional log probabilities
+    pub log_probs: Option<LogProbs>,
+    // TODO: Enrich this with more information as can apply our first-level postprocessing
+    // logic and return more detailed information
+    pub finish_reason: Option<FinishReason>,
+    /// Model Deployment Card checksum
+    pub mdcsum: String,
+}
+/// The LLM engine and backnd with manage it's own state, specifically translating how a
+/// given request/slot is managed on that particular backend.
+///
+/// For nvLLM's purpose, it has a single tracable request_id as part of it's context that
+/// has propaged through the service pipeline to the backend.
+///
+/// This is the minimal raw output from the LLM engine. The Backend may then apply multiple
+/// levels of post-processing before the BackendOutput is returns
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
+pub struct LLMEngineOutput {
+    // new token_ids
+    pub token_ids: Vec<TokenIdType>,
+    /// If the LLM Engine performs the detokenization, then this will have a Some of the detokenized
+    /// text/tokens. If this value is None, then the Backend is responsible for detokenization.
+    pub tokens: Option<Vec<TokenType>>,
+    // decoded text -
+    pub text: Option<String>,
+    /// cumulative log probabilities
+    pub cum_log_probs: Option<f64>,
+    /// Optional log probabilities
+    pub log_probs: Option<LogProbs>,
+    // TODO: Enrich this with more information as can apply our first-level postprocessing
+    // logic and return more detailed information
+    pub finish_reason: Option<FinishReason>,
+}
+impl LLMEngineOutput {
+    pub fn cancelled() -> Self {
+        LLMEngineOutput {
+            token_ids: vec![],
+            tokens: None,
+            text: None,
+            cum_log_probs: None,
+            log_probs: None,
+            finish_reason: Some(FinishReason::Cancelled),
+        }
+    }
+    pub fn stop() -> Self {
+        LLMEngineOutput {
+            token_ids: vec![],
+            tokens: None,
+            text: None,
+            cum_log_probs: None,
+            log_probs: None,
+            finish_reason: Some(FinishReason::Stop),
+        }
+    }
+    pub fn length() -> Self {
+        LLMEngineOutput {
+            token_ids: vec![],
+            tokens: None,
+            text: None,
+            cum_log_probs: None,
+            log_probs: None,
+            finish_reason: Some(FinishReason::Length),
+        }
+    }
+    pub fn error(err_msg: String) -> Self {
+        LLMEngineOutput {
+            token_ids: vec![],
+            tokens: None,
+            text: None,
+            cum_log_probs: None,
+            log_probs: None,
+            finish_reason: Some(FinishReason::Error(err_msg)),
+        }
+    }
+}
--- a/llm/rust/triton-llm/src/protocols/common/postprocessor.rs
+++ b/llm/rust/triton-llm/src/protocols/common/postprocessor.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+use serde::{Deserialize, Serialize};
+use super::FinishReason;
+use crate::protocols::TokenIdType;
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
+pub struct PostprocessedResponse {
+    /// Model Deployment Card checksum
+    pub mdcsum: String,
+    // if the number of slots for a given request is greater than 1
+    // this indicates the index of the slot for the response
+    pub index: Option<usize>,
+    pub finish_reason: Option<FinishReason>,
+    // new token_ids
+    pub token_ids: Vec<TokenIdType>,
+    // tokens
+    pub tokens: Option<Vec<Option<String>>>,
+    // decoded text
+    pub text: Option<String>,
+    /// cumulative log probabilities
+    pub cum_log_probs: Option<f64>,
+}
--- a/llm/rust/triton-llm/src/protocols/common/preprocessor.rs
+++ b/llm/rust/triton-llm/src/protocols/common/preprocessor.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+use derive_builder::Builder;
+use serde::{Deserialize, Serialize};
+use super::{SamplingOptions, StopConditions};
+use crate::protocols::TokenIdType;
+/// [`PreprocessedRequest`] is the internal representation of an LLM request. The [`triton-llm-preprocessor`]
+/// crate is responsible for converting request from the public APIs to this internal representation.
+#[derive(Serialize, Deserialize, Debug, Clone, Builder)]
+pub struct PreprocessedRequest {
+    /// Type of prompt
+    pub token_ids: Vec<TokenIdType>,
+    /// StopConditions are conditions that the inference engine will use to stop generation.
+    pub stop_conditions: StopConditions,
+    /// SamplingOptions directs the inference engine to use sampling instead of greedy decoding.
+    /// More documentation on how and on the order in which sampling options are applied
+    /// are needed.
+    pub sampling_options: SamplingOptions,
+    /// The EOS token ID(s) for the Model
+    /// Not every backend needs this, but those that do can find it here.
+    /// TODO - refactor this to a better location
+    #[builder(default)]
+    pub eos_token_ids: Vec<TokenIdType>,
+    /// The computed checksum of the Model Deployment Card (MDC).
+    #[builder(default)]
+    pub mdc_sum: Option<String>,
+    /// User requested annotations for the request
+    #[builder(default)]
+    pub annotations: Vec<String>,
+}
+impl PreprocessedRequest {
+    pub fn has_annotation(&self, annotation: &str) -> bool {
+        self.annotations.contains(&annotation.to_string())
+    }
+}
+impl PreprocessedRequest {
+    pub fn builder() -> PreprocessedRequestBuilder {
+        PreprocessedRequestBuilder::default()
+    }
+}
--- a/llm/rust/triton-llm/src/protocols/openai.rs
+++ b/llm/rust/triton-llm/src/protocols/openai.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/// Forward openai_api_rs::v1 to triton_llm::protocols::openai::v1
+pub mod chat_completions;
+pub mod completions;
+pub mod models;
+pub mod nvext;
+use anyhow::Result;
+use serde::{Deserialize, Serialize};
+use std::{
+    collections::HashMap,
+    fmt::Display,
+    ops::{Add, Div, Mul, Sub},
+};
+use validator::ValidationError;
+use super::{
+    common::{self, SamplingOptionsProvider, StopConditionsProvider},
+    ContentProvider,
+};
+/// Minimum allowed value for OpenAI's `temperature` sampling option
+pub const MIN_TEMPERATURE: f32 = 0.0;
+/// Maximum allowed value for OpenAI's `temperature` sampling option
+pub const MAX_TEMPERATURE: f32 = 2.0;
+/// Allowed range of values for OpenAI's `temperature`` sampling option
+pub const TEMPERATURE_RANGE: (f32, f32) = (MIN_TEMPERATURE, MAX_TEMPERATURE);
+/// Minimum allowed value for OpenAI's `top_p` sampling option
+pub const MIN_TOP_P: f32 = 0.0;
+/// Maximum allowed value for OpenAI's `top_p` sampling option
+pub const MAX_TOP_P: f32 = 1.0;
+/// Allowed range of values for OpenAI's `top_p` sampling option
+pub const TOP_P_RANGE: (f32, f32) = (MIN_TOP_P, MAX_TOP_P);
+/// Minimum allowed value for OpenAI's `frequency_penalty` sampling option
+pub const MIN_FREQUENCY_PENALTY: f32 = -2.0;
+/// Maximum allowed value for OpenAI's `frequency_penalty` sampling option
+pub const MAX_FREQUENCY_PENALTY: f32 = 2.0;
+/// Allowed range of values for OpenAI's `frequency_penalty` sampling option
+pub const FREQUENCY_PENALTY_RANGE: (f32, f32) = (MIN_FREQUENCY_PENALTY, MAX_FREQUENCY_PENALTY);
+/// Minimum allowed value for OpenAI's `presence_penalty` sampling option
+pub const MIN_PRESENCE_PENALTY: f32 = -2.0;
+/// Maximum allowed value for OpenAI's `presence_penalty` sampling option
+pub const MAX_PRESENCE_PENALTY: f32 = 2.0;
+/// Allowed range of values for OpenAI's `presence_penalty` sampling option
+pub const PRESENCE_PENALTY_RANGE: (f32, f32) = (MIN_PRESENCE_PENALTY, MAX_PRESENCE_PENALTY);
+/// Usage statistics for the completion request
+#[derive(Serialize, Deserialize, Debug, Clone, Default)]
+pub struct CompletionUsage {
+    /// Number of tokens in the generated completion.
+    pub completion_tokens: i32,
+    /// Number of tokens in the prompt.
+    pub prompt_tokens: i32,
+    /// Total number of tokens used in the request (prompt + completion).
+    pub total_tokens: i32,
+    /// Breakdown of tokens used in a completion, optional.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub completion_tokens_details: Option<CompletionTokensDetails>,
+    /// Breakdown of tokens used in the prompt, optional.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub prompt_tokens_details: Option<PromptTokensDetails>,
+}
+// Struct for details on completion tokens
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct CompletionTokensDetails {
+    /// Audio input tokens generated by the model.
+    pub audio_tokens: Option<i32>,
+    /// Tokens generated by the model for reasoning.
+    pub reasoning_tokens: Option<i32>,
+}
+// Struct for details on prompt tokens
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct PromptTokensDetails {
+    /// Audio input tokens present in the prompt.
+    pub audio_tokens: Option<i32>,
+    /// Cached tokens present in the prompt.
+    pub cached_tokens: Option<i32>,
+}
+/// Represents a streaming response from the OpenAI API
+/// The object is generalized on R, which is the type of the response.
+/// For SSE streaming responses, the expected `data: ` field is always a JSON
+/// object corresponding to `R`; however, the comments in the SSE stream `: `
+/// may correspond to other types of information, such as performance metrics,
+/// as represented by other arms of this enum.
+///
+/// This is part of the common API as both the client and service need to agree
+/// on the format of the streaming responses.
+#[derive(Serialize, Deserialize, Debug)]
+pub enum StreamingDelta<R> {
+    /// Represents a response delta from the API
+    Delta(R),
+    Comment(String),
+}
+#[derive(Serialize, Deserialize, Debug)]
+pub struct AnnotatedDelta<R> {
+    pub delta: R,
+    pub id: Option<String>,
+    pub event: Option<String>,
+    pub comment: Option<String>,
+}
+trait OpenAISamplingOptionsProvider {
+    fn get_temperature(&self) -> Option<f32>;
+    fn get_top_p(&self) -> Option<f32>;
+    fn get_frequency_penalty(&self) -> Option<f32>;
+    fn get_presence_penalty(&self) -> Option<f32>;
+    fn nvext(&self) -> Option<&nvext::NvExt>;
+}
+trait OpenAIStopConditionsProvider {
+    fn get_max_tokens(&self) -> Option<i32>;
+    fn get_min_tokens(&self) -> Option<i32>;
+    fn get_stop(&self) -> Option<Vec<String>>;
+    fn nvext(&self) -> Option<&nvext::NvExt>;
+}
+impl<T: OpenAISamplingOptionsProvider> SamplingOptionsProvider for T {
+    fn extract_sampling_options(&self) -> Result<common::SamplingOptions> {
+        // let result = self.validate();
+        // if let Err(e) = result {
+        //     return Err(format!("Error validating sampling options: {}", e));
+        // }
+        let mut temperature = validate_range(self.get_temperature(), &TEMPERATURE_RANGE)
+            .map_err(|e| anyhow::anyhow!("Error validating temperature: {}", e))?;
+        let mut top_p = validate_range(self.get_top_p(), &TOP_P_RANGE)
+            .map_err(|e| anyhow::anyhow!("Error validating top_p: {}", e))?;
+        let frequency_penalty =
+            validate_range(self.get_frequency_penalty(), &FREQUENCY_PENALTY_RANGE)
+                .map_err(|e| anyhow::anyhow!("Error validating frequency_penalty: {}", e))?;
+        let presence_penalty = validate_range(self.get_presence_penalty(), &PRESENCE_PENALTY_RANGE)
+            .map_err(|e| anyhow::anyhow!("Error validating presence_penalty: {}", e))?;
+        if let Some(nvext) = self.nvext() {
+            let greedy = nvext.greed_sampling.unwrap_or(false);
+            if greedy {
+                top_p = None;
+                temperature = None;
+            }
+        }
+        Ok(common::SamplingOptions {
+            n: None,
+            best_of: None,
+            frequency_penalty,
+            presence_penalty,
+            repetition_penalty: None,
+            temperature,
+            top_p,
+            top_k: None,
+            min_p: None,
+            seed: None,
+            use_beam_search: None,
+            length_penalty: None,
+        })
+    }
+}
+impl<T: OpenAIStopConditionsProvider> StopConditionsProvider for T {
+    fn extract_stop_conditions(&self) -> Result<common::StopConditions> {
+        let max_tokens = self.get_max_tokens().map(|x| x as u32);
+        let min_tokens = self.get_min_tokens();
+        let stop = self.get_stop();
+        if let Some(stop) = &stop {
+            if stop.len() > 4 {
+                anyhow::bail!("stop conditions must be less than 4")
+            }
+        }
+        let mut ignore_eos = None;
+        if let Some(nvext) = self.nvext() {
+            ignore_eos = nvext.ignore_eos;
+        }
+        Ok(common::StopConditions {
+            max_tokens,
+            min_tokens: min_tokens.map(|v| v as u32),
+            stop,
+            stop_token_ids_hidden: None,
+            ignore_eos,
+        })
+    }
+}
+/// Common structure for chat completion responses; the only delta is the type of choices which differs
+/// between streaming and non-streaming requests.
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct GenericCompletionResponse<C>
+// where
+//     C: Serialize + Clone,
+{
+    /// A unique identifier for the chat completion.
+    pub id: String,
+    /// A list of chat completion choices. Can be more than one if n is greater than 1.
+    pub choices: Vec<C>,
+    /// The Unix timestamp (in seconds) of when the chat completion was created.
+    pub created: u64,
+    /// The model used for the chat completion.
+    pub model: String,
+    /// The object type, which is `chat.completion` if the type of `Choice` is `ChatCompletionChoice`,
+    /// or is `chat.completion.chunk` if the type of `Choice` is `ChatCompletionChoiceDelta`.
+    pub object: String,
+    pub usage: Option<CompletionUsage>,
+    /// This fingerprint represents the backend configuration that the model runs with.
+    ///
+    /// Can be used in conjunction with the seed request parameter to understand when backend changes
+    /// have been made that might impact determinism.
+    ///
+    /// NIM Compatibility:
+    /// This field is not supported by the NIM; however it will be added in the future.
+    /// The optional nature of this field will be relaxed when it is supported.
+    pub system_fingerprint: Option<String>,
+    // TODO() - add NvResponseExtention
+}
+fn validate_logit_bias(logit_bias: &HashMap<String, i32>) -> Result<(), ValidationError> {
+    for key in logit_bias.keys() {
+        if key.parse::<i32>().is_err() {
+            return Err(
+                ValidationError::new("logit_bias").with_message("Keys must be integers".into())
+            );
+        }
+    }
+    Ok(())
+}
+// todo - move to common location
+fn validate_range<T>(value: Option<T>, range: &(T, T)) -> Result<Option<T>>
+where
+    T: PartialOrd + Display,
+{
+    if value.is_none() {
+        return Ok(None);
+    }
+    let value = value.unwrap();
+    if value < range.0 || value > range.1 {
+        anyhow::bail!("Value {} is out of range [{}, {}]", value, range.0, range.1);
+    }
+    Ok(Some(value))
+}
+// todo - move to common location
+/// scale value in `src` range to `dst` range
+pub fn scale_value<T>(value: &T, src: &(T, T), dst: &(T, T)) -> Result<T>
+where
+    T: Copy
+        + PartialOrd
+        + Add<Output = T>
+        + Sub<Output = T>
+        + Mul<Output = T>
+        + Div<Output = T>
+        + From<f32>,
+{
+    let dst_range = dst.1 - dst.0;
+    let src_range = src.1 - src.0;
+    if dst_range == T::from(0.0) {
+        anyhow::bail!("dst range is 0");
+    }
+    if src_range == T::from(0.0) {
+        anyhow::bail!("src range is 0");
+    }
+    let value_scaled = (*value - src.0) / src_range;
+    Ok(dst.0 + (value_scaled * dst_range))
+}
+pub trait DeltaGeneratorExt<ResponseType: Send + Sync + 'static + std::fmt::Debug>:
+    Send + Sync + 'static
+{
+    fn choice_from_postprocessor(
+        &mut self,
+        response: common::llm_backend::BackendOutput,
+    ) -> Result<ResponseType>;
+}
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[test]
+    fn test_validate_range() {
+        assert_eq!(validate_range(Some(0.5), &(0.0, 1.0)).unwrap(), Some(0.5));
+        assert_eq!(validate_range(Some(0.0), &(0.0, 1.0)).unwrap(), Some(0.0));
+        assert_eq!(validate_range(Some(1.0), &(1.0, 1.0)).unwrap(), Some(1.0));
+        assert_eq!(validate_range(Some(1_i32), &(1, 1)).unwrap(), Some(1));
+        assert_eq!(
+            validate_range(Some(1.1), &(0.0, 1.0))
+                .unwrap_err()
+                .to_string(),
+            "Value 1.1 is out of range [0, 1]"
+        );
+        assert_eq!(
+            validate_range(Some(-0.1), &(0.0, 1.0))
+                .unwrap_err()
+                .to_string(),
+            "Value -0.1 is out of range [0, 1]"
+        );
+    }
+    #[test]
+    fn test_scaled_value() {
+        assert_eq!(scale_value(&0.5, &(0.0, 1.0), &(0.0, 2.0)).unwrap(), 1.0);
+        assert_eq!(scale_value(&0.0, &(0.0, 1.0), &(0.0, 2.0)).unwrap(), 0.0);
+        assert_eq!(scale_value(&-1.0, &(-2.0, 2.0), &(1.0, 2.0)).unwrap(), 1.25);
+        assert!(scale_value(&1.0, &(1.0, 1.0), &(0.0, 2.0)).is_err());
+    }
+}
--- a/llm/rust/triton-llm/src/protocols/openai/chat_completions.rs
+++ b/llm/rust/triton-llm/src/protocols/openai/chat_completions.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+use std::collections::HashMap;
+use std::collections::VecDeque;
+use std::fmt;
+use std::fmt::Display;
+use derive_builder::Builder;
+use serde::de::{self, SeqAccess, Visitor};
+use serde::ser::SerializeMap;
+use serde::{Deserialize, Serialize};
+use serde::{Deserializer, Serializer};
+use serde_json::Value;
+use validator::Validate;
+mod aggregator;
+mod delta;
+use super::nvext::NvExtProvider;
+pub use super::{CompletionTokensDetails, CompletionUsage, PromptTokensDetails};
+// pub use aggregator::DeltaAggregator;
+pub use delta::DeltaGenerator;
+use super::{
+    common::{self, ChatCompletionLogprobs, SamplingOptionsProvider, StopConditionsProvider},
+    nvext::NvExt,
+    validate_logit_bias, ContentProvider, OpenAISamplingOptionsProvider,
+    OpenAIStopConditionsProvider,
+};
+// use crate::AnnotationsProvider;
+/// Request object which is used to generate chat completions.
+#[derive(Serialize, Deserialize, Builder, Validate, Debug, Clone)]
+#[builder(build_fn(private, name = "build_internal", validate = "Self::validate"))]
+pub struct ChatCompletionRequest {
+    /// Multi-turn chat messages.
+    ///
+    /// NIM Compatibility:
+    /// Multi-turn chat models vary, some of which work with the OpenAI ChatGPT format, while others
+    /// will require `NvExt`.
+    pub messages: Vec<ChatCompletionMessage>,
+    /// Name of the model
+    #[builder(setter(into))]
+    pub model: String,
+    /// The maximum number of tokens that can be generated in the completion.
+    /// The token count of your prompt plus max_tokens cannot exceed the model's context length.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[builder(default, setter(into, strip_option))]
+    #[validate(range(min = 1))]
+    pub max_tokens: Option<i32>,
+    /// The minimum number of tokens to generate. We ignore stop tokens until we see this many
+    /// tokens. Leave this None unless you are working on the pre-processor.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[builder(default, setter(into, strip_option))]
+    pub min_tokens: Option<i32>,
+    /// If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only
+    /// server-sent events as they become available, with the stream terminated by a data: \[DONE\]
+    ///
+    /// NIM Compatibility:
+    /// The NIM SDK can send extra meta data in the SSE stream using the `:` comment, `event:`,
+    /// or `id:` fields. See the `enable_sse_metadata` field in the NvExt object.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[builder(default, setter(strip_option))]
+    pub stream: Option<bool>,
+    /// How many chat completion choices to generate for each input message.
+    ///
+    /// NIM Compatibility:
+    /// Values greater than 1 are not currently supported by NIM.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[builder(default, setter(into, strip_option))]
+    pub n: Option<i32>,
+    /// What sampling `temperature` to use, between 0 and 2. Higher values like 0.8 will make the
+    /// output more random, while lower values like 0.2 will make it more focused and deterministic.
+    /// OpenAI defaults to 1.0; however, in this crate, the default is None, and model-specific defaults
+    /// can be applied later as part of associating the request with a given model.
+    ///
+    /// OpenAI generally recommend altering this or `top_p` but not both.
+    ///
+    /// TODO(): Add a model specific validation which could enforce only a single type of sampling can be used.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[validate(range(min = "super::MIN_TEMPERATURE", max = "super::MAX_TEMPERATURE"))]
+    #[builder(default, setter(into, strip_option))]
+    pub temperature: Option<f32>,
+    /// An alternative to sampling with `temperature`, called nucleus sampling, where the model
+    /// considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens
+    /// comprising the top 10% probability mass are considered.
+    ///
+    /// We generally recommend altering this or `temperature` but not both.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[validate(range(min = "super::MIN_TOP_P", max = "super::MAX_TOP_P"))]
+    #[builder(default, setter(into, strip_option))]
+    pub top_p: Option<f32>,
+    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency
+    /// in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[validate(range(
+        min = "super::MIN_FREQUENCY_PENALTY",
+        max = "super::MAX_FREQUENCY_PENALTY"
+    ))]
+    #[builder(default, setter(into, strip_option))]
+    pub frequency_penalty: Option<f32>,
+    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in
+    /// the text so far, increasing the model's likelihood to talk about new topics.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[validate(range(
+        min = "super::MIN_PRESENCE_PENALTY",
+        max = "super::MAX_PRESENCE_PENALTY"
+    ))]
+    #[builder(default, setter(into, strip_option))]
+    pub presence_penalty: Option<f32>,
+    /// OpenAI specific API fields:
+    /// See: <https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format>
+    ///
+    /// NIM Compatibility:
+    /// This option is not currently supported by NIM LLM. An error will be returned if this field is set.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[builder(default)]
+    pub response_format: Option<Value>,
+    /// Up to 4 sequences where the API will stop generating further tokens.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[validate(length(max = 4))]
+    #[builder(default, setter(into, strip_option))]
+    pub stop: Option<Vec<String>>,
+    /// Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities
+    /// of each output token returned in the content of message.
+    ///
+    /// Not all models support logprobs. If logprobs is set to true for a model that does not support it,
+    /// the request will be processed as if logprobs is set to false.
+    ///
+    /// NIM Compatibility:
+    /// TODO - Add a NvExt `strict` object which will disable relaxing of model specific limitations; meaning,
+    /// if the user requests `logprobs` and the model does not support them, the request will fail wth an error.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[builder(default, setter(strip_option))]
+    pub logprobs: Option<bool>,
+    /// An integer between 0 and 20 specifying the number of most likely tokens to return at each token position,
+    /// each with an associated log probability. logprobs must be set to true if this parameter is used.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[validate(range(min = 0, max = 20))]
+    #[builder(default, setter(into, strip_option))]
+    pub top_logprobs: Option<i32>,
+    /// Modify the likelihood of specified tokens appearing in the completion.
+    ///
+    /// Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an
+    /// associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs.
+    /// Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact
+    /// effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of
+    /// selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
+    ///
+    /// As specified in the OpenAI examples, this is a map of tokens_ids as strings to a bias value that
+    /// is an integer.
+    ///
+    /// However, the OpenAI blog using the SDK shows that it can also be specified more accurately as a
+    /// map of token_ids as ints to a bias value that is also an int.
+    ///
+    /// NIM Compatibility:
+    /// In the conversion of the OpenAI request to the internal NIM format, the keys of this map will be
+    /// validated to ensure they are integers. Since different models may have different tokenizers, the
+    /// range and values will again be validated on the compute backend to ensure they map to valid tokens
+    /// in the vocabulary of the model.
+    ///
+    /// ```
+    /// use triton_llm::protocols::openai::completions::CompletionRequest;
+    ///
+    /// let request = CompletionRequest::builder()
+    ///     .prompt("What is the meaning of life?")
+    ///     .model("meta/llama-3.1-8b-instruct")
+    ///     .add_logit_bias(1337, -100) // using an int as a key is ok
+    ///     .add_logit_bias("42", 100)  // using a string as a key is also ok
+    ///     .build()
+    ///     .expect("Should not fail");
+    ///
+    /// assert!(CompletionRequest::builder()
+    ///     .prompt("What is the meaning of life?")
+    ///     .model("meta/llama-3.1-8b-instruct")
+    ///     .add_logit_bias("some non int", -100)
+    ///     .build()
+    ///     .is_err());
+    /// ```
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[validate(custom(function = "validate_logit_bias"))]
+    #[builder(default, setter(into, strip_option))]
+    pub logit_bias: Option<HashMap<String, i32>>,
+    /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
+    ///
+    /// NIM Compatibility:
+    /// If provided, then the value of this field will be included in the trace metadata and the accounting
+    /// data (if enabled).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[builder(default, setter(into, strip_option))]
+    pub user: Option<String>,
+    /// If specified, our system will make a best effort to sample deterministically, such that repeated
+    /// requests with the same seed and parameters should return the same result. Determinism is not guaranteed,
+    /// and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[builder(default, setter(into, strip_option))]
+    pub seed: Option<i64>,
+    /// A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
+    /// provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.
+    ///
+    /// NIM Compatibility:
+    /// This field is not currently supported by NIM LLM. An error will be returned if this field is set.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[builder(default)]
+    pub tools: Option<Vec<Tool>>,
+    /// Controls which (if any) function is called by the model. none means the model will not call a function
+    /// and instead generates a message. auto means the model can pick between generating a message or calling
+    /// a function. Specifying a particular function via {"type": "function", "function": {"name": "my_function"}}
+    /// forces the model to call that function.
+    ///
+    /// `none` is the default when no functions are present. `auto` is the default if functions are present.
+    ///
+    /// NIM Compatibility:
+    /// This field is not currently supported by NIM LLM. An error will be returned if this field is set.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(serialize_with = "serialize_tool_choice")]
+    #[builder(default)]
+    pub tool_choice: Option<ToolChoiceType>,
+    /// Additional parameters supported by NIM backends
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[builder(default, setter(strip_option))]
+    pub nvext: Option<NvExt>,
+}
+impl ChatCompletionRequest {
+    pub fn builder() -> ChatCompletionRequestBuilder {
+        ChatCompletionRequestBuilder::default()
+    }
+}
+impl ChatCompletionRequestBuilder {
+    // This is a pre-build validate function
+    // This is called before the generated build method, in this case build_internal, is called
+    // This has access to the internal state of the builder
+    fn validate(&self) -> Result<(), String> {
+        Ok(())
+    }
+    /// Builds and validates the ChatCompletionRequest
+    ///
+    /// ```rust
+    /// use triton_llm::protocols::openai::chat_completions::ChatCompletionRequest;
+    ///
+    /// let request = ChatCompletionRequest::builder()
+    ///     .model("mixtral-8x7b-instruct-v0.1")
+    ///     .add_user_message("Hello")
+    ///     .max_tokens(16)
+    ///     .build()
+    ///     .expect("Failed to build ChatCompletionRequest");
+    /// ```
+    pub fn build(&self) -> anyhow::Result<ChatCompletionRequest> {
+        // Calls the build_private, validates the result, then performs addition
+        // post build validation where we are looking a mutually exclusive fields
+        // and ensuring that there are not mutually exclusive collisions.
+        let request = self
+            .build_internal()
+            .map_err(|e| anyhow::anyhow!("Failed to build ChatCompletionRequest: {}", e))?;
+        request
+            .validate()
+            .map_err(|e| anyhow::anyhow!("Failed to validate ChatCompletionRequest: {}", e))?;
+        // check mutually exclusive fields
+        if request.top_logprobs.is_some() {
+            if request.logprobs.is_none() {
+                anyhow::bail!("top_logprobs requires logprobs to be set to true");
+            }
+            if let Some(logprobs) = request.logprobs {
+                if !logprobs {
+                    anyhow::bail!("top_logprobs requires logprobs to be set to true");
+                }
+            }
+        }
+        Ok(request)
+    }
+    /// Add a message to the `Vec<ChatCompletionMessage>` in the ChatCompletionRequest
+    /// This will either create or append to the `Vec<ChatCompletionMessage>`
+    pub fn add_message(&mut self, message: ChatCompletionMessage) -> &mut Self {
+        // If messages exist we get them or we create new messages with Vec::new
+        self.messages.get_or_insert_with(Vec::new).push(message);
+        self
+    }
+    /// Add a user message to the `Vec<ChatCompletionMessage>` in the ChatCompletionRequest
+    pub fn add_user_message(&mut self, content: impl Into<String>) -> &mut Self {
+        self.add_message(ChatCompletionMessage {
+            role: MessageRole::user,
+            content: Content::Text(content.into()),
+            name: None,
+        })
+    }
+    /// Add an assistant message to the `Vec<ChatCompletionMessage>` in the ChatCompletionRequest
+    pub fn add_assistant_message(&mut self, content: impl Into<String>) -> &mut Self {
+        self.add_message(ChatCompletionMessage {
+            role: MessageRole::assistant,
+            content: Content::Text(content.into()),
+            name: None,
+        })
+    }
+    /// Add a system message to the `Vec<ChatCompletionMessage>` in the ChatCompletionRequest
+    pub fn add_system_message(&mut self, content: impl Into<String>) -> &mut Self {
+        self.add_message(ChatCompletionMessage {
+            role: MessageRole::system,
+            content: Content::Text(content.into()),
+            name: None,
+        })
+    }
+    /// Add a stop condition to the `Vec<String>` in the ChatCompletionRequest
+    /// This will either create or append to the `Vec<String>`
+    pub fn add_stop(&mut self, stop: impl Into<String>) -> &mut Self {
+        self.stop
+            .get_or_insert_with(|| Some(vec![]))
+            .as_mut()
+            .expect("stop should always be Some(Vec)")
+            .push(stop.into());
+        self
+    }
+    /// Add a token and bias to the `HashMap<String, i32>` in the ChatCompletionRequest
+    /// This will either create or update the `HashMap<String, i32>`
+    /// See: [`ChatCompletionRequest::logit_bias`] for more details
+    pub fn add_logit_bias<T>(&mut self, token_id: T, bias: i32) -> &mut Self
+    where
+        T: std::fmt::Display,
+    {
+        self.logit_bias
+            .get_or_insert_with(|| Some(HashMap::new()))
+            .as_mut()
+            .expect("logit_bias should always be Some(HashMap)")
+            .insert(token_id.to_string(), bias);
+        self
+    }
+}
+/// Each turn in a conversation is represented by a ChatCompletionMessage.
+#[derive(Builder, Debug, Deserialize, Serialize, Clone)]
+pub struct ChatCompletionMessage {
+    pub role: MessageRole,
+    #[serde(deserialize_with = "deserialize_content")]
+    pub content: Content,
+    #[serde(skip_serializing_if = "Option::is_none", default)]
+    #[builder(default)]
+    pub name: Option<String>,
+}
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
+#[allow(non_camel_case_types)]
+pub enum MessageRole {
+    user,
+    system,
+    assistant,
+    function,
+}
+impl Display for MessageRole {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
+        use MessageRole::*;
+        let s = match self {
+            user => "user",
+            system => "system",
+            assistant => "assistant",
+            function => "function",
+        };
+        write!(f, "{s}")
+    }
+}
+#[derive(Debug, Deserialize, Clone, PartialEq, Eq)]
+pub enum Content {
+    Text(String),
+    ImageUrl(Vec<ImageUrl>),
+}
+impl serde::Serialize for Content {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        match *self {
+            Content::Text(ref text) => serializer.serialize_str(text),
+            Content::ImageUrl(ref image_url) => image_url.serialize(serializer),
+        }
+    }
+}
+fn deserialize_content<'de, D>(deserializer: D) -> Result<Content, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    struct ContentVisitor;
+    impl<'de> Visitor<'de> for ContentVisitor {
+        type Value = Content;
+        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+            formatter.write_str("a string or an array of content parts")
+        }
+        fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
+        where
+            E: de::Error,
+        {
+            Ok(Content::Text(value.to_owned()))
+        }
+        fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
+        where
+            A: SeqAccess<'de>,
+        {
+            let mut parts = Vec::new();
+            while let Some(value) = seq.next_element::<String>()? {
+                if value.starts_with("http://") || value.starts_with("https://") {
+                    parts.push(ImageUrl {
+                        r#type: ContentType::image_url,
+                        text: None,
+                        image_url: Some(ImageUrlType { url: value }),
+                    });
+                } else {
+                    parts.push(ImageUrl {
+                        r#type: ContentType::text,
+                        text: Some(value),
+                        image_url: None,
+                    });
+                }
+            }
+            Ok(Content::ImageUrl(parts))
+        }
+    }
+    deserializer.deserialize_any(ContentVisitor)
+}
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
+#[allow(non_camel_case_types)]
+pub enum ContentType {
+    text,
+    image_url,
+}
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
+#[allow(non_camel_case_types)]
+pub struct ImageUrlType {
+    pub url: String,
+}
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
+#[allow(non_camel_case_types)]
+pub struct ImageUrl {
+    pub r#type: ContentType,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub text: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub image_url: Option<ImageUrlType>,
+}
+/// Represents a chat completion response returned by model, based on the provided input.
+pub type ChatCompletionResponse = ChatCompletionGeneric<ChatCompletionChoice>;
+/// Represents a streamed chunk of a chat completion response returned by model, based on the provided input.
+pub type ChatCompletionResponseDelta = ChatCompletionGeneric<ChatCompletionChoiceDelta>;
+/// Common structure for chat completion responses; the only delta is the type of choices which differs
+/// between streaming and non-streaming requests.
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct ChatCompletionGeneric<C>
+where
+    C: Serialize + Clone + ContentProvider,
+{
+    /// A unique identifier for the chat completion.
+    pub id: String,
+    /// A list of chat completion choices. Can be more than one if n is greater than 1.
+    pub choices: Vec<C>,
+    /// The Unix timestamp (in seconds) of when the chat completion was created.
+    pub created: u64,
+    /// The model used for the chat completion.
+    pub model: String,
+    /// The object type, which is `chat.completion` if the type of `Choice` is `ChatCompletionChoice`,
+    /// or is `chat.completion.chunk` if the type of `Choice` is `ChatCompletionChoiceDelta`.
+    pub object: String,
+    /// Usage information for the completion request.
+    pub usage: Option<CompletionUsage>,
+    /// The service tier used for processing the request, optional.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub service_tier: Option<ServiceTier>,
+    /// This fingerprint represents the backend configuration that the model runs with.
+    ///
+    /// Can be used in conjunction with the seed request parameter to understand when backend changes
+    /// have been made that might impact determinism.
+    ///
+    /// NIM Compatibility:
+    /// This field is not supported by the NIM; however it will be added in the future.
+    /// The optional nature of this field will be relaxed when it is supported.
+    pub system_fingerprint: Option<String>,
+    // TODO() - add NvResponseExtention
+}
+// Enum for service tier, either "scale" or "default"
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "snake_case")]
+pub enum ServiceTier {
+    Auto,
+    Scale,
+    Default,
+}
+#[derive(Deserialize, Serialize, Debug, Clone)]
+pub struct ChatCompletionChoice {
+    /// A chat completion message generated by the model.
+    pub message: ChatCompletionContent,
+    /// The index of the choice in the list of choices.
+    pub index: u64,
+    /// The reason the model stopped generating tokens. This will be `stop` if the model hit a natural
+    /// stop point or a provided stop sequence, `length` if the maximum number of tokens specified
+    /// in the request was reached, `content_filter` if content was omitted due to a flag from our content
+    /// filters, `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called
+    /// a function.
+    ///
+    /// NIM Compatibility:
+    /// Only `stop` and `length` are currently supported by NIM.
+    /// NIM may also provide additional reasons in the future, such as `error`, `timeout` or `cancelation`.
+    pub finish_reason: FinishReason,
+    /// Log probability information for the choice, optional field.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub logprobs: Option<ChatCompletionLogprobs>,
+}
+impl ContentProvider for ChatCompletionChoice {
+    fn content(&self) -> String {
+        self.message.content()
+    }
+}
+/// Same as ChatCompletionMessage, but received during a response stream.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct ChatCompletionChoiceDelta {
+    /// The index of the choice in the list of choices.
+    pub index: u64,
+    /// The reason the model stopped generating tokens. This will be `stop` if the model hit a natural
+    /// stop point or a provided stop sequence, `length` if the maximum number of tokens specified
+    /// in the request was reached, `content_filter` if content was omitted due to a flag from our content
+    /// filters, `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called
+    /// a function.
+    ///
+    /// NIM Compatibility:
+    /// Only `stop` and `length` are currently supported by NIM.
+    /// NIM may also provide additional reasons in the future, such as `error`, `timeout` or `cancelation`.
+    pub finish_reason: Option<FinishReason>,
+    /// A chat completion delta generated by streamed model responses.
+    pub delta: ChatCompletionContent,
+    /// Log probability information for the choice, optional field.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub logprobs: Option<ChatCompletionLogprobs>,
+}
+impl ContentProvider for ChatCompletionChoiceDelta {
+    fn content(&self) -> String {
+        self.delta.content()
+    }
+}
+/// A chat completion message generated by the model.
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct ChatCompletionContent {
+    /// The role of the author of this message.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub role: Option<MessageRole>,
+    /// The contents of the message.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub content: Option<String>,
+    /// Tool calls made by the model.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_calls: Option<Vec<ToolCall>>,
+}
+impl ContentProvider for ChatCompletionContent {
+    fn content(&self) -> String {
+        self.content.clone().unwrap_or("".to_string())
+    }
+}
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
+pub enum ToolChoiceType {
+    None,
+    Auto,
+    ToolChoice { tool: Tool },
+}
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
+pub struct Function {
+    pub name: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub description: Option<String>,
+    pub parameters: FunctionParameters,
+}
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
+#[serde(rename_all = "lowercase")]
+pub enum JSONSchemaType {
+    Object,
+    Number,
+    String,
+    Array,
+    Null,
+    Boolean,
+}
+#[derive(Debug, Deserialize, Serialize, Clone, Default, PartialEq, Eq)]
+pub struct JSONSchemaDefine {
+    #[serde(rename = "type")]
+    pub schema_type: Option<JSONSchemaType>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub description: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub enum_values: Option<Vec<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub properties: Option<HashMap<String, Box<JSONSchemaDefine>>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub required: Option<Vec<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub items: Option<Box<JSONSchemaDefine>>,
+}
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
+pub struct FunctionParameters {
+    #[serde(rename = "type")]
+    pub schema_type: JSONSchemaType,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub properties: Option<HashMap<String, Box<JSONSchemaDefine>>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub required: Option<Vec<String>>,
+}
+#[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq, Eq)]
+#[allow(non_camel_case_types)]
+pub enum FinishReason {
+    stop,
+    length,
+    content_filter,
+    tool_calls,
+    cancelled,
+    null,
+}
+/// from_str trait
+impl std::str::FromStr for FinishReason {
+    type Err = String;
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "stop" => Ok(FinishReason::stop),
+            "length" => Ok(FinishReason::length),
+            "content_filter" => Ok(FinishReason::content_filter),
+            "tool_calls" => Ok(FinishReason::tool_calls),
+            "null" => Ok(FinishReason::null),
+            _ => Err(format!("Unknown FinishReason: {}", s)),
+        }
+    }
+}
+impl std::fmt::Display for FinishReason {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        match self {
+            FinishReason::stop => write!(f, "stop"),
+            FinishReason::length => write!(f, "length"),
+            FinishReason::content_filter => write!(f, "content_filter"),
+            FinishReason::tool_calls => write!(f, "tool_calls"),
+            FinishReason::cancelled => write!(f, "cancelled"),
+            FinishReason::null => write!(f, "null"),
+        }
+    }
+}
+#[derive(Debug, Deserialize, Serialize)]
+#[allow(non_camel_case_types)]
+pub struct FinishDetails {
+    pub r#type: FinishReason,
+    pub stop: String,
+}
+#[derive(Debug, Deserialize, Serialize, Clone)]
+pub struct ToolCall {
+    pub id: String,
+    pub r#type: String,
+    pub function: ToolCallFunction,
+}
+#[derive(Debug, Deserialize, Serialize, Clone)]
+pub struct ToolCallFunction {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub name: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub arguments: Option<String>,
+}
+fn serialize_tool_choice<S>(
+    value: &Option<ToolChoiceType>,
+    serializer: S,
+) -> Result<S::Ok, S::Error>
+where
+    S: Serializer,
+{
+    match value {
+        Some(ToolChoiceType::None) => serializer.serialize_str("none"),
+        Some(ToolChoiceType::Auto) => serializer.serialize_str("auto"),
+        Some(ToolChoiceType::ToolChoice { tool }) => {
+            let mut map = serializer.serialize_map(Some(2))?;
+            map.serialize_entry("type", &tool.r#type)?;
+            map.serialize_entry("function", &tool.function)?;
+            map.end()
+        }
+        None => serializer.serialize_none(),
+    }
+}
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
+pub struct Tool {
+    pub r#type: ToolType,
+    pub function: Function,
+}
+#[derive(Debug, Deserialize, Serialize, Copy, Clone, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum ToolType {
+    Function,
+}
+impl ChatCompletionRequest {}
+impl NvExtProvider for ChatCompletionRequest {
+    fn nvext(&self) -> Option<&NvExt> {
+        self.nvext.as_ref()
+    }
+    fn raw_prompt(&self) -> Option<String> {
+        None
+    }
+}
+// impl AnnotationsProvider for ChatCompletionRequest {
+//     fn annotations(&self) -> Option<Vec<String>> {
+//         self.nvext
+//             .as_ref()
+//             .and_then(|nvext| nvext.annotations.clone())
+//     }
+//     fn has_annotation(&self, annotation: &str) -> bool {
+//         self.nvext
+//             .as_ref()
+//             .and_then(|nvext| nvext.annotations.as_ref())
+//             .map(|annotations| annotations.contains(&annotation.to_string()))
+//             .unwrap_or(false)
+//     }
+// }
+impl OpenAISamplingOptionsProvider for ChatCompletionRequest {
+    fn get_temperature(&self) -> Option<f32> {
+        self.temperature
+    }
+    fn get_top_p(&self) -> Option<f32> {
+        self.top_p
+    }
+    fn get_frequency_penalty(&self) -> Option<f32> {
+        self.frequency_penalty
+    }
+    fn get_presence_penalty(&self) -> Option<f32> {
+        self.presence_penalty
+    }
+    fn nvext(&self) -> Option<&NvExt> {
+        self.nvext.as_ref()
+    }
+}
+impl OpenAIStopConditionsProvider for ChatCompletionRequest {
+    fn get_max_tokens(&self) -> Option<i32> {
+        self.max_tokens
+    }
+    fn get_min_tokens(&self) -> Option<i32> {
+        self.min_tokens
+    }
+    fn get_stop(&self) -> Option<Vec<String>> {
+        self.stop.clone()
+    }
+    fn nvext(&self) -> Option<&NvExt> {
+        self.nvext.as_ref()
+    }
+}
+/// Implements TryFrom for converting an OpenAI's ChatCompletionRequest to an Engine's CompletionRequest
+impl TryFrom<ChatCompletionRequest> for common::CompletionRequest {
+    type Error = anyhow::Error;
+    fn try_from(request: ChatCompletionRequest) -> Result<Self, Self::Error> {
+        // openai_api_rs::v1::chat_completion
+        // pub struct ChatCompletionRequest {
+        //  NA pub model: String,
+        //  L  pub messages: Vec<ChatCompletionMessage, Global>,
+        //  SO pub temperature: Option<f32>,
+        //  SO pub top_p: Option<f32>,
+        //  SO pub n: Option<i32>,
+        //  ** pub response_format: Option<Value>,
+        //  NA pub stream: Option<bool>,  // See Issue #8
+        //  SC pub stop: Option<Vec<String, Global>>,
+        //  SC pub max_tokens: Option<i32>,
+        //  SO pub presence_penalty: Option<f32>,
+        //  SO pub frequency_penalty: Option<f32>,
+        //  ** pub logit_bias: Option<HashMap<String, i32, RandomState>>,
+        //  ** pub user: Option<String>,
+        //  SO pub seed: Option<i64>,
+        //  ** pub tools: Option<Vec<Tool, Global>>,
+        //  ** pub tool_choice: Option<ToolChoiceType>,
+        // }
+        //
+        // ** not supported
+        // NA not applicable
+        // L  local in this method
+        // SO extract_sampling_options
+        // SC extract_stop_conditions
+        // first we validate the OpenAI request
+        // we can not validate everything as some fields require backend awareness
+        // however, we can validate against the public OpenAI limit
+        request
+            .validate()
+            .map_err(|e| anyhow::anyhow!("Failed to validate ChatCompletionRequest: {}", e))?;
+        // todo(ryan) - open a ticket to support this
+        if request.logit_bias.is_some() {
+            anyhow::bail!("logit_bias is not supported");
+        }
+        // todo(ryan) - add support for user
+        if request.user.is_some() {
+            anyhow::bail!("user is not supported");
+        }
+        if request.response_format.is_some() {
+            anyhow::bail!("response_format is not supported");
+        }
+        if request.tools.is_some() {
+            anyhow::bail!("tools is not supported");
+        }
+        if request.tool_choice.is_some() {
+            anyhow::bail!("tool_choice is not supported");
+        }
+        // sampling options
+        let sampling_options = request
+            .extract_sampling_options()
+            .map_err(|e| anyhow::anyhow!("Failed to extract SamplingOptions: {}", e))?;
+        // stop conditions
+        let stop_conditions = request
+            .extract_stop_conditions()
+            .map_err(|e| anyhow::anyhow!("Failed to extract StopConditions: {}", e))?;
+        // first we need to process the messages
+        let prompt = common::PromptType::ChatCompletion(
+            validate_and_collect_chat_messages(request.messages)
+                .map_err(|e| anyhow::anyhow!("Failed to validate chat messages: {}", e))?,
+        );
+        // return the completion request
+        Ok(common::CompletionRequest {
+            prompt,
+            stop_conditions,
+            sampling_options,
+            mdc_sum: None,
+            annotations: None,
+        })
+    }
+}
+impl TryFrom<common::StreamingCompletionResponse> for ChatCompletionChoice {
+    type Error = anyhow::Error;
+    fn try_from(response: common::StreamingCompletionResponse) -> Result<Self, Self::Error> {
+        let choice = ChatCompletionChoice {
+            index: response.delta.index.unwrap_or(0) as u64,
+            message: ChatCompletionContent {
+                role: Some(MessageRole::assistant),
+                content: response.delta.text,
+                tool_calls: None,
+            },
+            finish_reason: match &response.delta.finish_reason {
+                Some(common::FinishReason::EoS) => FinishReason::stop,
+                Some(common::FinishReason::Stop) => FinishReason::stop,
+                Some(common::FinishReason::Length) => FinishReason::length,
+                Some(common::FinishReason::Error(err_msg)) => {
+                    return Err(anyhow::anyhow!("finish_reason::error = {}", err_msg));
+                }
+                Some(common::FinishReason::Cancelled) => FinishReason::null,
+                None => FinishReason::null,
+            },
+            logprobs: response.logprobs,
+        };
+        Ok(choice)
+    }
+}
+impl TryFrom<common::StreamingCompletionResponse> for ChatCompletionChoiceDelta {
+    type Error = anyhow::Error;
+    fn try_from(response: common::StreamingCompletionResponse) -> Result<Self, Self::Error> {
+        let choice = ChatCompletionChoiceDelta {
+            index: response.delta.index.unwrap_or(0) as u64,
+            delta: ChatCompletionContent {
+                role: Some(MessageRole::assistant),
+                content: response.delta.text,
+                tool_calls: None,
+            },
+            finish_reason: match &response.delta.finish_reason {
+                Some(common::FinishReason::EoS) => Some(FinishReason::stop),
+                Some(common::FinishReason::Stop) => Some(FinishReason::stop),
+                Some(common::FinishReason::Length) => Some(FinishReason::length),
+                Some(common::FinishReason::Error(err_msg)) => {
+                    return Err(anyhow::anyhow!("finish_reason::error = {}", err_msg));
+                }
+                Some(common::FinishReason::Cancelled) => Some(FinishReason::null),
+                None => None,
+            },
+            logprobs: response.logprobs,
+        };
+        Ok(choice)
+    }
+}
+fn validate_and_collect_chat_messages(
+    messages: Vec<ChatCompletionMessage>,
+) -> Result<common::ChatContext, anyhow::Error> {
+    let mut system_prompt = None;
+    let mut turns = VecDeque::new();
+    let mut last_role = MessageRole::assistant;
+    for message in messages {
+        match message.role {
+            MessageRole::system => {
+                if system_prompt.is_some() {
+                    return Err(anyhow::anyhow!("More than one system message found"));
+                }
+                system_prompt = Some(message.content);
+            }
+            MessageRole::user | MessageRole::assistant => {
+                if last_role == message.role {
+                    if turns.is_empty() {
+                        return Err(anyhow::anyhow!("First message must be a user message"));
+                    }
+                    return Err(anyhow::anyhow!(
+                        "User and assistant messages must alternate"
+                    ));
+                }
+                last_role = message.role.clone();
+                turns.push_back(message);
+            }
+            MessageRole::function => {} // Ignoring function messages as per assumption.
+        }
+    }
+    if let Some(first) = turns.front() {
+        if let MessageRole::assistant = first.role {
+            return Err(anyhow::anyhow!("Sequence must start with a user message"));
+        }
+    }
+    if turns.len() % 2 == 0 {
+        return Err(anyhow::anyhow!("Sequence must end with a user message"));
+    }
+    let mut context = Vec::new();
+    while turns.len() >= 2 {
+        let user = turns.pop_front().unwrap();
+        let asst = turns.pop_front().unwrap();
+        let user = match user.content {
+            Content::Text(text) => text,
+            _ => return Err(anyhow::anyhow!("User message must be text")),
+        };
+        let asst = match asst.content {
+            Content::Text(text) => text,
+            _ => return Err(anyhow::anyhow!("Assistant message must be text")),
+        };
+        context.push(common::ChatTurn {
+            user,
+            assistant: asst,
+        });
+    }
+    let prompt = turns.pop_back().unwrap();
+    let prompt = match prompt.content {
+        Content::Text(text) => text,
+        _ => return Err(anyhow::anyhow!("Prompt message must be text")),
+    };
+    let system_prompt = match system_prompt {
+        Some(Content::Text(text)) => Some(text),
+        Some(_) => return Err(anyhow::anyhow!("System prompt must be text")),
+        None => None,
+    };
+    Ok(common::ChatContext {
+        completion: common::CompletionContext {
+            prompt,
+            system_prompt,
+        },
+        context,
+    })
+}
+#[cfg(test)]
+mod tests {
+    use anyhow::Result;
+    use serde_json::json;
+    use std::error::Error;
+    use super::*;
+    #[test]
+    fn test_chat_completions_valid_request_minimal() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct")
+            .add_user_message("Hello!")
+            .build();
+        assert!(
+            request.is_ok(),
+            "Request should succeed with minimal fields"
+        );
+        Ok(())
+    }
+    #[test]
+    fn test_chat_completions_valid_request_full() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct")
+            .add_user_message("Hello!")
+            .max_tokens(50)
+            .stream(true)
+            .n(1)
+            .temperature(1.0)
+            .top_p(0.9)
+            .frequency_penalty(0.5)
+            .presence_penalty(0.5)
+            .stop(vec!["The end.".to_string()])
+            .logprobs(true)
+            .top_logprobs(5)
+            .logit_bias(HashMap::new())
+            .user("test_user")
+            .seed(1234)
+            .build();
+        println!("{:?}", request);
+        assert!(
+            request.is_ok(),
+            "Request should succeed with all fields set"
+        );
+        Ok(())
+    }
+    #[test]
+    fn test_chat_completions_top_logprobs_requires_logprobs() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct")
+            .add_user_message("Hello!")
+            .top_logprobs(5) // logprobs is not set to true
+            .build();
+        assert!(
+            request.is_err(),
+            "Request should fail when top_logprobs is set without logprobs being true"
+        );
+        Ok(())
+    }
+    #[ignore]
+    #[test]
+    fn test_chat_completions_max_tokens_out_of_range() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct")
+            .add_user_message("Hello!")
+            .max_tokens(4097) // assuming the model has a max context length of 4096
+            .build();
+        assert!(
+            request.is_err(),
+            "Request should fail when max_tokens exceeds model's context length"
+        );
+        Ok(())
+    }
+    #[test]
+    fn test_chat_completions_invalid_top_p() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct")
+            .add_user_message("Hello!")
+            .top_p(1.5) // Invalid, should be between 0 and 1
+            .build();
+        assert!(
+            request.is_err(),
+            "Request should fail with invalid top_p value"
+        );
+        Ok(())
+    }
+    #[test]
+    fn test_chat_completions_missing_messages() -> Result<(), Box<dyn Error>> {
+        // Missing messages field in the request
+        let request_result = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct") // Valid model
+            .build(); // This should fail because no messages are provided.
+        assert!(
+            request_result.is_err(),
+            "Expected request to fail without messages."
+        );
+        if let Err(e) = request_result {
+            println!("Expected error: {}", e); // Optionally print the error for debugging
+        }
+        Ok(())
+    }
+    #[test]
+    fn test_chat_completions_negative_max_tokens() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct")
+            .add_user_message("Hello, world!")
+            .max_tokens(-10)
+            .build();
+        assert!(
+            request.is_err(),
+            "Request should fail with negative max_tokens"
+        );
+        Ok(())
+    }
+    #[ignore]
+    #[test]
+    fn test_chat_completions_unsupported_logit_bias() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct")
+            .add_user_message("Hello, world!")
+            .add_logit_bias("50256", -100)
+            .build();
+        assert!(request.is_err(), "Request should fail with logit_bias");
+        Ok(())
+    }
+    #[test]
+    fn test_chat_completions_invalid_temperature() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct")
+            .add_user_message("Hello!")
+            .temperature(2.5) // Invalid, should be between 0 and 2
+            .build();
+        assert!(
+            request.is_err(),
+            "Request should fail with invalid temperature"
+        );
+        Ok(())
+    }
+    #[test]
+    fn test_chat_completions_max_stop_sequences() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct")
+            .add_user_message("Tell me a story.")
+            .stop(vec![
+                "The end.".to_string(),
+                "Once upon a time,".to_string(),
+                "And then,".to_string(),
+                "They lived happily ever after.".to_string(),
+            ]) // 4 stop sequences, valid
+            .build();
+        assert!(
+            request.is_ok(),
+            "Request should succeed with 4 stop sequences"
+        );
+        Ok(())
+    }
+    #[test]
+    fn test_chat_completions_large_stop_sequences() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct")
+            .add_user_message("Tell me a story.")
+            .stop(vec![
+                "The end.".to_string(),
+                "And so,".to_string(),
+                "Once upon a time,".to_string(),
+                "They lived happily ever after.".to_string(),
+                "Unexpected stop.".to_string(),
+            ])
+            .build();
+        assert!(
+            request.is_err(),
+            "Request should fail with too many stop sequences"
+        );
+        Ok(())
+    }
+    #[ignore]
+    #[test]
+    fn test_chat_completions_invalid_stop_sequences() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct")
+            .add_user_message("Tell me a joke.")
+            .stop(vec!["".to_string()])
+            .build();
+        assert!(
+            request.is_err(),
+            "Request should fail with invalid stop sequences"
+        );
+        Ok(())
+    }
+    #[ignore]
+    #[test]
+    fn test_chat_completions_presence_penalty_out_of_range() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct")
+            .add_user_message("What's up?")
+            .presence_penalty(3.0) // Out of valid range (-2.0 to 2.0)
+            .build();
+        assert!(
+            request.is_err(),
+            "Request should fail with invalid presence_penalty"
+        );
+        Ok(())
+    }
+    #[test]
+    fn test_chat_completions_invalid_presence_penalty() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct")
+            .add_user_message("What's up?")
+            .presence_penalty(-2.5) // Invalid, should be between -2.0 and 2.0
+            .build();
+        assert!(
+            request.is_err(),
+            "Request should fail with invalid presence_penalty"
+        );
+        Ok(())
+    }
+    #[ignore]
+    #[tokio::test]
+    async fn test_chat_completions_with_user_field() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct")
+            .add_user_message("Hi there!")
+            .user("test_user")
+            .build()
+            .unwrap();
+        // assert!(request.is_err(), "Request should fail with 'user' field");
+        let result: Result<common::CompletionRequest> = request.try_into();
+        assert!(
+            result.is_err(),
+            "Conversion should fail with 'user' field set",
+        );
+        Ok(())
+    }
+    #[test]
+    fn test_chat_completions_valid_with_seed() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("meta/llama-3.1-8b-instruct")
+            .add_user_message("Repeatable result")
+            .seed(12345)
+            .build();
+        assert!(
+            request.is_ok(),
+            "Request should succeed with seed value for determinism"
+        );
+        Ok(())
+    }
+    #[test]
+    fn test_validate_chat_messages_multiple_system_messages() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("test-model")
+            .add_system_message("System message 1")
+            .add_system_message("System message 2")
+            .add_user_message("Hello!")
+            .build()?;
+        let result = validate_and_collect_chat_messages(request.messages.clone());
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(e.to_string(), "More than one system message found");
+        }
+        Ok(())
+    }
+    #[test]
+    fn test_validate_chat_messages_user_messages_do_not_alternate() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("test-model")
+            .add_user_message("Hello!")
+            .add_user_message("How are you?")
+            .build()?;
+        let result = validate_and_collect_chat_messages(request.messages.clone());
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(e.to_string(), "User and assistant messages must alternate");
+        }
+        Ok(())
+    }
+    #[ignore]
+    #[test]
+    fn test_validate_chat_messages_user_message_not_text() -> Result<(), Box<dyn Error>> {
+        let message = ChatCompletionMessage {
+            role: MessageRole::user,
+            content: Content::ImageUrl(vec![ImageUrl {
+                r#type: ContentType::image_url,
+                text: None,
+                image_url: Some(ImageUrlType {
+                    url: "http://example.com/image.png".to_string(),
+                }),
+            }]),
+            name: None,
+        };
+        let request = ChatCompletionRequest::builder()
+            .model("test-model")
+            .add_message(message)
+            .build()?;
+        let result = validate_and_collect_chat_messages(request.messages.clone());
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(e.to_string(), "Generic error: User message must be text");
+        }
+        Ok(())
+    }
+    #[test]
+    fn test_try_from_chat_completion_request_with_unsupported_fields() -> Result<(), Box<dyn Error>>
+    {
+        let request = ChatCompletionRequest::builder()
+            .model("test-model")
+            .add_user_message("Hello!")
+            .response_format(Some(json!({"format": "unsupported"})))
+            .tools(Some(vec![Tool {
+                r#type: ToolType::Function,
+                function: Function {
+                    name: "test_function".to_string(),
+                    description: None,
+                    parameters: FunctionParameters {
+                        schema_type: JSONSchemaType::Object,
+                        properties: None,
+                        required: None,
+                    },
+                },
+            }]))
+            .tool_choice(Some(ToolChoiceType::Auto))
+            .build()?;
+        let result: Result<common::CompletionRequest> = request.try_into();
+        assert!(
+            result.is_err(),
+            "Conversion should fail with unsupported fields"
+        );
+        Ok(())
+    }
+    #[test]
+    fn test_deserialize_content_with_image_urls() {
+        let json_data = r#"
+    {
+        "role": "assistant",
+        "content": [
+            "This is a text message.",
+            "https://example.com/image1.png",
+            "Another text message.",
+            "https://example.com/image2.png"
+        ]
+    }
+    "#;
+        let message: ChatCompletionMessage =
+            serde_json::from_str(json_data).expect("Deserialization failed");
+        if let Content::ImageUrl(parts) = message.content {
+            assert_eq!(parts.len(), 4);
+            assert_eq!(parts[0].r#type, ContentType::text);
+            assert_eq!(parts[0].text.as_ref().unwrap(), "This is a text message.");
+            assert_eq!(parts[1].r#type, ContentType::image_url);
+            assert_eq!(
+                parts[1].image_url.as_ref().unwrap().url,
+                "https://example.com/image1.png"
+            );
+        } else {
+            panic!("Expected Content::ImageUrl");
+        }
+    }
+    #[test]
+    fn test_try_from_chat_completion_request_success() -> Result<(), Box<dyn Error>> {
+        let request = ChatCompletionRequest::builder()
+            .model("test-model")
+            .add_user_message("Hello!")
+            .add_assistant_message("Hi there!")
+            .add_user_message("How are you?")
+            .build()?;
+        let completion_request: common::CompletionRequest = request.try_into()?;
+        assert!(matches!(
+            completion_request.prompt,
+            common::PromptType::ChatCompletion(_)
+        ));
+        Ok(())
+    }
+    #[test]
+    fn test_chat_completion_sampling_params_with_valid_nvext() {
+        let nvext = NvExt {
+            ignore_eos: Some(true),
+            repetition_penalty: Some(0.6),
+            top_k: Some(3),
+            use_raw_prompt: None,
+            greed_sampling: None,
+            annotations: None,
+        };
+        let request = ChatCompletionRequest::builder()
+            .nvext(nvext)
+            .model("foo")
+            .add_system_message("Hello!")
+            .build()
+            .expect("Failed to build request with valid nvext");
+        assert_eq!(request.nvext.as_ref().unwrap().ignore_eos, Some(true));
+        assert_eq!(
+            request.nvext.as_ref().unwrap().repetition_penalty,
+            Some(0.6)
+        );
+        assert_eq!(request.nvext.as_ref().unwrap().top_k, Some(3));
+    }
+    #[test]
+    fn test_completion_sampling_params_without_nvext() {
+        let request = ChatCompletionRequest::builder()
+            .model("foo")
+            .add_user_message("Test")
+            .build()
+            .unwrap();
+        assert_eq!(request.frequency_penalty, None);
+        assert_eq!(request.logprobs, None);
+    }
+    #[test]
+    fn test_completion_sampling_params_with_valid_nvext() {
+        let nvext = NvExt {
+            ignore_eos: Some(true),
+            repetition_penalty: Some(0.6),
+            top_k: Some(3),
+            ..Default::default()
+        };
+        let request = ChatCompletionRequest::builder()
+            .nvext(nvext)
+            .model("foo")
+            .add_user_message("Test")
+            .build()
+            .expect("Failed to build request with valid nvext");
+        assert_eq!(request.nvext.as_ref().unwrap().ignore_eos, Some(true));
+        assert_eq!(
+            request.nvext.as_ref().unwrap().repetition_penalty,
+            Some(0.6)
+        );
+        assert_eq!(request.nvext.as_ref().unwrap().top_k, Some(3));
+    }
+    // #[test]
+    // fn test_normalize_unicode_characters() {
+    //     let str = "Hello there how are you\u{E0020}?".to_string();
+    //     let normalized = str.sanitize_text();
+    //     assert_eq!(normalized, "Hello there how are you?");
+    // }
+    // #[tokio::test]
+    // async fn test_chat_completion_request_filtered() {
+    //     // Define input messages with Unicode character to filter
+    //     let messages = vec![
+    //         ChatCompletionMessage {
+    //             role: MessageRole::user,
+    //             content: Content::Text(
+    //                 "Hello there how are you\u{E0020}?"
+    //                     .to_string()
+    //                     .normalize_unicode_characters(),
+    //             ),
+    //             name: None,
+    //         },
+    //         ChatCompletionMessage {
+    //             role: MessageRole::assistant,
+    //             content: Content::Text("How may I help you?".to_string()),
+    //             name: None,
+    //         },
+    //         ChatCompletionMessage {
+    //             role: MessageRole::user,
+    //             content: Content::Text("Do something for me?".to_string()),
+    //             name: None,
+    //         },
+    //     ];
+    //     // Define expected filtered messages
+    //     let expected = vec![
+    //         ChatCompletionMessage {
+    //             role: MessageRole::user,
+    //             content: Content::Text("Hello there how are you?".to_string()),
+    //             name: None,
+    //         },
+    //         ChatCompletionMessage {
+    //             role: MessageRole::assistant,
+    //             content: Content::Text("How may I help you?".to_string()),
+    //             name: None,
+    //         },
+    //         ChatCompletionMessage {
+    //             role: MessageRole::user,
+    //             content: Content::Text("Do something for me?".to_string()),
+    //             name: None,
+    //         },
+    //     ];
+    //     // Build ChatCompletionRequest with filtering applied
+    //     let request = ChatCompletionRequest::builder()
+    //         .model("foo")
+    //         .messages(messages)
+    //         .build()
+    //         .expect("Failed to build ChatCompletionRequest");
+    //     // Validate each message matches the expected filtered content
+    //     for (i, message) in request.messages.iter().enumerate() {
+    //         assert_eq!(message.role, expected[i].role);
+    //         if let Content::Text(ref content) = message.content {
+    //             if let Content::Text(ref expected_content) = expected[i].content {
+    //                 assert_eq!(content, expected_content);
+    //             }
+    //         }
+    //     }
+    // }
+}