Commit 86aff237 authored by Paul Hendricks's avatar Paul Hendricks Committed by GitHub
Browse files

refactor: using async_openai


Co-authored-by: default avatarGraham King <grahamk@nvidia.com>
parent d694ca6e
...@@ -151,6 +151,31 @@ version = "0.5.4" ...@@ -151,6 +151,31 @@ version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a" checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a"
[[package]]
name = "async-openai"
version = "0.27.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d126927c78e1562d7e8473008ac8b082318c04d69e3a83e3495a563f8b84a66"
dependencies = [
"backoff",
"base64 0.22.1",
"bytes",
"derive_builder",
"eventsource-stream",
"futures",
"rand",
"reqwest",
"reqwest-eventsource",
"secrecy",
"serde",
"serde_json",
"thiserror 2.0.11",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
]
[[package]] [[package]]
name = "async-stream" name = "async-stream"
version = "0.3.6" version = "0.3.6"
...@@ -320,6 +345,20 @@ dependencies = [ ...@@ -320,6 +345,20 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "backoff"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1"
dependencies = [
"futures-core",
"getrandom 0.2.15",
"instant",
"pin-project-lite",
"rand",
"tokio",
]
[[package]] [[package]]
name = "backtrace" name = "backtrace"
version = "0.3.74" version = "0.3.74"
...@@ -995,6 +1034,17 @@ dependencies = [ ...@@ -995,6 +1034,17 @@ dependencies = [
"tower-service", "tower-service",
] ]
[[package]]
name = "eventsource-stream"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab"
dependencies = [
"futures-core",
"nom",
"pin-project-lite",
]
[[package]] [[package]]
name = "fastrand" name = "fastrand"
version = "2.3.0" version = "2.3.0"
...@@ -1157,6 +1207,12 @@ version = "0.3.31" ...@@ -1157,6 +1207,12 @@ version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
[[package]]
name = "futures-timer"
version = "3.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
[[package]] [[package]]
name = "futures-util" name = "futures-util"
version = "0.3.31" version = "0.3.31"
...@@ -1202,8 +1258,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -1202,8 +1258,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [ dependencies = [
"cfg-if 1.0.0", "cfg-if 1.0.0",
"js-sys",
"libc", "libc",
"wasi 0.11.0+wasi-snapshot-preview1", "wasi 0.11.0+wasi-snapshot-preview1",
"wasm-bindgen",
] ]
[[package]] [[package]]
...@@ -1351,6 +1409,24 @@ dependencies = [ ...@@ -1351,6 +1409,24 @@ dependencies = [
"want", "want",
] ]
[[package]]
name = "hyper-rustls"
version = "0.27.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2"
dependencies = [
"futures-util",
"http",
"hyper",
"hyper-util",
"rustls",
"rustls-native-certs 0.8.1",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tower-service",
]
[[package]] [[package]]
name = "hyper-timeout" name = "hyper-timeout"
version = "0.5.2" version = "0.5.2"
...@@ -1596,6 +1672,15 @@ version = "0.1.15" ...@@ -1596,6 +1672,15 @@ version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
[[package]]
name = "instant"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [
"cfg-if 1.0.0",
]
[[package]] [[package]]
name = "iovec" name = "iovec"
version = "0.1.4" version = "0.1.4"
...@@ -1605,6 +1690,12 @@ dependencies = [ ...@@ -1605,6 +1690,12 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "ipnet"
version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
[[package]] [[package]]
name = "is_terminal_polyfill" name = "is_terminal_polyfill"
version = "1.70.1" version = "1.70.1"
...@@ -1824,6 +1915,16 @@ version = "0.3.17" ...@@ -1824,6 +1915,16 @@ version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "mime_guess"
version = "2.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
dependencies = [
"mime",
"unicase",
]
[[package]] [[package]]
name = "minijinja" name = "minijinja"
version = "2.7.0" version = "2.7.0"
...@@ -2527,6 +2628,58 @@ dependencies = [ ...@@ -2527,6 +2628,58 @@ dependencies = [
"syn 2.0.98", "syn 2.0.98",
] ]
[[package]]
name = "quinn"
version = "0.11.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef"
dependencies = [
"bytes",
"pin-project-lite",
"quinn-proto",
"quinn-udp",
"rustc-hash",
"rustls",
"socket2",
"thiserror 2.0.11",
"tokio",
"tracing",
]
[[package]]
name = "quinn-proto"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d"
dependencies = [
"bytes",
"getrandom 0.2.15",
"rand",
"ring",
"rustc-hash",
"rustls",
"rustls-pki-types",
"slab",
"thiserror 2.0.11",
"tinyvec",
"tracing",
"web-time",
]
[[package]]
name = "quinn-udp"
version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944"
dependencies = [
"cfg_aliases",
"libc",
"once_cell",
"socket2",
"tracing",
"windows-sys 0.52.0",
]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.38" version = "1.0.38"
...@@ -2661,6 +2814,68 @@ version = "0.8.5" ...@@ -2661,6 +2814,68 @@ version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "reqwest"
version = "0.12.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da"
dependencies = [
"base64 0.22.1",
"bytes",
"futures-core",
"futures-util",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-rustls",
"hyper-util",
"ipnet",
"js-sys",
"log",
"mime",
"mime_guess",
"once_cell",
"percent-encoding",
"pin-project-lite",
"quinn",
"rustls",
"rustls-native-certs 0.8.1",
"rustls-pemfile",
"rustls-pki-types",
"serde",
"serde_json",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tokio-rustls",
"tokio-util",
"tower 0.5.2",
"tower-service",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
"wasm-streams",
"web-sys",
"windows-registry",
]
[[package]]
name = "reqwest-eventsource"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "632c55746dbb44275691640e7b40c907c16a2dc1a5842aa98aaec90da6ec6bde"
dependencies = [
"eventsource-stream",
"futures-core",
"futures-timer",
"mime",
"nom",
"pin-project-lite",
"reqwest",
"thiserror 1.0.69",
]
[[package]] [[package]]
name = "ring" name = "ring"
version = "0.17.11" version = "0.17.11"
...@@ -2681,6 +2896,12 @@ version = "0.1.24" ...@@ -2681,6 +2896,12 @@ version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]] [[package]]
name = "rustc_version" name = "rustc_version"
version = "0.4.1" version = "0.4.1"
...@@ -2757,6 +2978,9 @@ name = "rustls-pki-types" ...@@ -2757,6 +2978,9 @@ name = "rustls-pki-types"
version = "1.11.0" version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c"
dependencies = [
"web-time",
]
[[package]] [[package]]
name = "rustls-webpki" name = "rustls-webpki"
...@@ -2805,6 +3029,16 @@ version = "1.2.0" ...@@ -2805,6 +3029,16 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "secrecy"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e891af845473308773346dc847b2c23ee78fe442e0472ac50e22a18a93d3ae5a"
dependencies = [
"serde",
"zeroize",
]
[[package]] [[package]]
name = "security-framework" name = "security-framework"
version = "2.11.1" version = "2.11.1"
...@@ -3123,6 +3357,9 @@ name = "sync_wrapper" ...@@ -3123,6 +3357,9 @@ name = "sync_wrapper"
version = "1.0.2" version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
dependencies = [
"futures-core",
]
[[package]] [[package]]
name = "synstructure" name = "synstructure"
...@@ -3261,6 +3498,21 @@ dependencies = [ ...@@ -3261,6 +3498,21 @@ dependencies = [
"zerovec", "zerovec",
] ]
[[package]]
name = "tinyvec"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]] [[package]]
name = "tokenizers" name = "tokenizers"
version = "0.21.0" version = "0.21.0"
...@@ -3612,6 +3864,7 @@ name = "triton-distributed-llm" ...@@ -3612,6 +3864,7 @@ name = "triton-distributed-llm"
version = "0.2.0" version = "0.2.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-openai",
"async-stream", "async-stream",
"async-trait", "async-trait",
"axum 0.8.1", "axum 0.8.1",
...@@ -3740,6 +3993,12 @@ version = "0.2.2" ...@@ -3740,6 +3993,12 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c" checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c"
[[package]]
name = "unicase"
version = "2.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.17" version = "1.0.17"
...@@ -3957,6 +4216,19 @@ dependencies = [ ...@@ -3957,6 +4216,19 @@ dependencies = [
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
dependencies = [
"cfg-if 1.0.0",
"js-sys",
"once_cell",
"wasm-bindgen",
"web-sys",
]
[[package]] [[package]]
name = "wasm-bindgen-macro" name = "wasm-bindgen-macro"
version = "0.2.100" version = "0.2.100"
...@@ -3989,6 +4261,29 @@ dependencies = [ ...@@ -3989,6 +4261,29 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "wasm-streams"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
dependencies = [
"futures-util",
"js-sys",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]
[[package]]
name = "web-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]] [[package]]
name = "web-time" name = "web-time"
version = "1.1.0" version = "1.1.0"
...@@ -4060,6 +4355,36 @@ dependencies = [ ...@@ -4060,6 +4355,36 @@ dependencies = [
"windows-targets 0.52.6", "windows-targets 0.52.6",
] ]
[[package]]
name = "windows-registry"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0"
dependencies = [
"windows-result",
"windows-strings",
"windows-targets 0.52.6",
]
[[package]]
name = "windows-result"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
dependencies = [
"windows-targets 0.52.6",
]
[[package]]
name = "windows-strings"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
dependencies = [
"windows-result",
"windows-targets 0.52.6",
]
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.48.0" version = "0.48.0"
......
...@@ -151,6 +151,31 @@ version = "0.5.4" ...@@ -151,6 +151,31 @@ version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a" checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a"
[[package]]
name = "async-openai"
version = "0.27.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d126927c78e1562d7e8473008ac8b082318c04d69e3a83e3495a563f8b84a66"
dependencies = [
"backoff",
"base64 0.22.1",
"bytes",
"derive_builder",
"eventsource-stream",
"futures",
"rand",
"reqwest",
"reqwest-eventsource",
"secrecy",
"serde",
"serde_json",
"thiserror 2.0.11",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
]
[[package]] [[package]]
name = "async-stream" name = "async-stream"
version = "0.3.6" version = "0.3.6"
...@@ -320,6 +345,20 @@ dependencies = [ ...@@ -320,6 +345,20 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "backoff"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1"
dependencies = [
"futures-core",
"getrandom 0.2.15",
"instant",
"pin-project-lite",
"rand",
"tokio",
]
[[package]] [[package]]
name = "backtrace" name = "backtrace"
version = "0.3.74" version = "0.3.74"
...@@ -987,6 +1026,17 @@ dependencies = [ ...@@ -987,6 +1026,17 @@ dependencies = [
"tower-service", "tower-service",
] ]
[[package]]
name = "eventsource-stream"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab"
dependencies = [
"futures-core",
"nom",
"pin-project-lite",
]
[[package]] [[package]]
name = "fastrand" name = "fastrand"
version = "2.3.0" version = "2.3.0"
...@@ -1149,6 +1199,12 @@ version = "0.3.31" ...@@ -1149,6 +1199,12 @@ version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
[[package]]
name = "futures-timer"
version = "3.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
[[package]] [[package]]
name = "futures-util" name = "futures-util"
version = "0.3.31" version = "0.3.31"
...@@ -1194,8 +1250,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -1194,8 +1250,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [ dependencies = [
"cfg-if 1.0.0", "cfg-if 1.0.0",
"js-sys",
"libc", "libc",
"wasi 0.11.0+wasi-snapshot-preview1", "wasi 0.11.0+wasi-snapshot-preview1",
"wasm-bindgen",
] ]
[[package]] [[package]]
...@@ -1362,6 +1420,24 @@ dependencies = [ ...@@ -1362,6 +1420,24 @@ dependencies = [
"want", "want",
] ]
[[package]]
name = "hyper-rustls"
version = "0.27.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2"
dependencies = [
"futures-util",
"http 1.2.0",
"hyper",
"hyper-util",
"rustls",
"rustls-native-certs 0.8.1",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tower-service",
]
[[package]] [[package]]
name = "hyper-timeout" name = "hyper-timeout"
version = "0.5.2" version = "0.5.2"
...@@ -1607,6 +1683,15 @@ version = "0.1.15" ...@@ -1607,6 +1683,15 @@ version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
[[package]]
name = "instant"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [
"cfg-if 1.0.0",
]
[[package]] [[package]]
name = "iovec" name = "iovec"
version = "0.1.4" version = "0.1.4"
...@@ -1616,6 +1701,12 @@ dependencies = [ ...@@ -1616,6 +1701,12 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "ipnet"
version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
[[package]] [[package]]
name = "is_terminal_polyfill" name = "is_terminal_polyfill"
version = "1.70.1" version = "1.70.1"
...@@ -1849,6 +1940,16 @@ version = "0.3.17" ...@@ -1849,6 +1940,16 @@ version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "mime_guess"
version = "2.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
dependencies = [
"mime",
"unicase",
]
[[package]] [[package]]
name = "minijinja" name = "minijinja"
version = "2.7.0" version = "2.7.0"
...@@ -2563,6 +2664,58 @@ dependencies = [ ...@@ -2563,6 +2664,58 @@ dependencies = [
"syn 2.0.98", "syn 2.0.98",
] ]
[[package]]
name = "quinn"
version = "0.11.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef"
dependencies = [
"bytes",
"pin-project-lite",
"quinn-proto",
"quinn-udp",
"rustc-hash",
"rustls",
"socket2",
"thiserror 2.0.11",
"tokio",
"tracing",
]
[[package]]
name = "quinn-proto"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d"
dependencies = [
"bytes",
"getrandom 0.2.15",
"rand",
"ring",
"rustc-hash",
"rustls",
"rustls-pki-types",
"slab",
"thiserror 2.0.11",
"tinyvec",
"tracing",
"web-time",
]
[[package]]
name = "quinn-udp"
version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944"
dependencies = [
"cfg_aliases",
"libc",
"once_cell",
"socket2",
"tracing",
"windows-sys 0.59.0",
]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.38" version = "1.0.38"
...@@ -2697,6 +2850,68 @@ version = "0.8.5" ...@@ -2697,6 +2850,68 @@ version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "reqwest"
version = "0.12.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da"
dependencies = [
"base64 0.22.1",
"bytes",
"futures-core",
"futures-util",
"http 1.2.0",
"http-body",
"http-body-util",
"hyper",
"hyper-rustls",
"hyper-util",
"ipnet",
"js-sys",
"log",
"mime",
"mime_guess",
"once_cell",
"percent-encoding",
"pin-project-lite",
"quinn",
"rustls",
"rustls-native-certs 0.8.1",
"rustls-pemfile",
"rustls-pki-types",
"serde",
"serde_json",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tokio-rustls",
"tokio-util",
"tower 0.5.2",
"tower-service",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
"wasm-streams",
"web-sys",
"windows-registry",
]
[[package]]
name = "reqwest-eventsource"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "632c55746dbb44275691640e7b40c907c16a2dc1a5842aa98aaec90da6ec6bde"
dependencies = [
"eventsource-stream",
"futures-core",
"futures-timer",
"mime",
"nom",
"pin-project-lite",
"reqwest",
"thiserror 1.0.69",
]
[[package]] [[package]]
name = "ring" name = "ring"
version = "0.17.11" version = "0.17.11"
...@@ -2717,6 +2932,12 @@ version = "0.1.24" ...@@ -2717,6 +2932,12 @@ version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]] [[package]]
name = "rustc_version" name = "rustc_version"
version = "0.4.1" version = "0.4.1"
...@@ -2793,6 +3014,9 @@ name = "rustls-pki-types" ...@@ -2793,6 +3014,9 @@ name = "rustls-pki-types"
version = "1.11.0" version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c"
dependencies = [
"web-time",
]
[[package]] [[package]]
name = "rustls-webpki" name = "rustls-webpki"
...@@ -2841,6 +3065,16 @@ version = "1.2.0" ...@@ -2841,6 +3065,16 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "secrecy"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e891af845473308773346dc847b2c23ee78fe442e0472ac50e22a18a93d3ae5a"
dependencies = [
"serde",
"zeroize",
]
[[package]] [[package]]
name = "security-framework" name = "security-framework"
version = "2.11.1" version = "2.11.1"
...@@ -3170,6 +3404,9 @@ name = "sync_wrapper" ...@@ -3170,6 +3404,9 @@ name = "sync_wrapper"
version = "1.0.2" version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
dependencies = [
"futures-core",
]
[[package]] [[package]]
name = "synstructure" name = "synstructure"
...@@ -3331,6 +3568,21 @@ dependencies = [ ...@@ -3331,6 +3568,21 @@ dependencies = [
"zerovec", "zerovec",
] ]
[[package]]
name = "tinyvec"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]] [[package]]
name = "tokenizers" name = "tokenizers"
version = "0.21.0" version = "0.21.0"
...@@ -3682,6 +3934,7 @@ name = "triton-distributed-llm" ...@@ -3682,6 +3934,7 @@ name = "triton-distributed-llm"
version = "0.2.0" version = "0.2.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-openai",
"async-stream", "async-stream",
"async-trait", "async-trait",
"axum 0.8.1", "axum 0.8.1",
...@@ -3810,6 +4063,12 @@ version = "0.2.2" ...@@ -3810,6 +4063,12 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c" checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c"
[[package]]
name = "unicase"
version = "2.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.17" version = "1.0.17"
...@@ -4027,6 +4286,19 @@ dependencies = [ ...@@ -4027,6 +4286,19 @@ dependencies = [
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
dependencies = [
"cfg-if 1.0.0",
"js-sys",
"once_cell",
"wasm-bindgen",
"web-sys",
]
[[package]] [[package]]
name = "wasm-bindgen-macro" name = "wasm-bindgen-macro"
version = "0.2.100" version = "0.2.100"
...@@ -4059,6 +4331,29 @@ dependencies = [ ...@@ -4059,6 +4331,29 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "wasm-streams"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
dependencies = [
"futures-util",
"js-sys",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]
[[package]]
name = "web-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]] [[package]]
name = "web-time" name = "web-time"
version = "1.1.0" version = "1.1.0"
...@@ -4130,6 +4425,36 @@ dependencies = [ ...@@ -4130,6 +4425,36 @@ dependencies = [
"windows-targets 0.52.6", "windows-targets 0.52.6",
] ]
[[package]]
name = "windows-registry"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0"
dependencies = [
"windows-result",
"windows-strings",
"windows-targets 0.52.6",
]
[[package]]
name = "windows-result"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
dependencies = [
"windows-targets 0.52.6",
]
[[package]]
name = "windows-strings"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
dependencies = [
"windows-result",
"windows-targets 0.52.6",
]
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.48.0" version = "0.48.0"
......
...@@ -202,6 +202,31 @@ version = "0.5.4" ...@@ -202,6 +202,31 @@ version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a" checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a"
[[package]]
name = "async-openai"
version = "0.27.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d126927c78e1562d7e8473008ac8b082318c04d69e3a83e3495a563f8b84a66"
dependencies = [
"backoff",
"base64 0.22.1",
"bytes",
"derive_builder",
"eventsource-stream",
"futures",
"rand",
"reqwest",
"reqwest-eventsource",
"secrecy",
"serde",
"serde_json",
"thiserror 2.0.11",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
]
[[package]] [[package]]
name = "async-stream" name = "async-stream"
version = "0.3.6" version = "0.3.6"
...@@ -371,6 +396,20 @@ dependencies = [ ...@@ -371,6 +396,20 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "backoff"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1"
dependencies = [
"futures-core",
"getrandom 0.2.15",
"instant",
"pin-project-lite",
"rand",
"tokio",
]
[[package]] [[package]]
name = "backtrace" name = "backtrace"
version = "0.3.74" version = "0.3.74"
...@@ -1410,6 +1449,17 @@ dependencies = [ ...@@ -1410,6 +1449,17 @@ dependencies = [
"tower-service", "tower-service",
] ]
[[package]]
name = "eventsource-stream"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab"
dependencies = [
"futures-core",
"nom",
"pin-project-lite",
]
[[package]] [[package]]
name = "exr" name = "exr"
version = "1.73.0" version = "1.73.0"
...@@ -1647,6 +1697,12 @@ version = "0.3.31" ...@@ -1647,6 +1697,12 @@ version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
[[package]]
name = "futures-timer"
version = "3.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
[[package]] [[package]]
name = "futures-util" name = "futures-util"
version = "0.3.31" version = "0.3.31"
...@@ -1810,8 +1866,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -1810,8 +1866,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [ dependencies = [
"cfg-if 1.0.0", "cfg-if 1.0.0",
"js-sys",
"libc", "libc",
"wasi 0.11.0+wasi-snapshot-preview1", "wasi 0.11.0+wasi-snapshot-preview1",
"wasm-bindgen",
] ]
[[package]] [[package]]
...@@ -2022,6 +2080,7 @@ dependencies = [ ...@@ -2022,6 +2080,7 @@ dependencies = [
"hyper", "hyper",
"hyper-util", "hyper-util",
"rustls", "rustls",
"rustls-native-certs 0.8.1",
"rustls-pki-types", "rustls-pki-types",
"tokio", "tokio",
"tokio-rustls", "tokio-rustls",
...@@ -2658,6 +2717,16 @@ version = "0.3.17" ...@@ -2658,6 +2717,16 @@ version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "mime_guess"
version = "2.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
dependencies = [
"mime",
"unicase",
]
[[package]] [[package]]
name = "minijinja" name = "minijinja"
version = "2.7.0" version = "2.7.0"
...@@ -3731,6 +3800,58 @@ version = "2.0.1" ...@@ -3731,6 +3800,58 @@ version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
[[package]]
name = "quinn"
version = "0.11.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef"
dependencies = [
"bytes",
"pin-project-lite",
"quinn-proto",
"quinn-udp",
"rustc-hash",
"rustls",
"socket2",
"thiserror 2.0.11",
"tokio",
"tracing",
]
[[package]]
name = "quinn-proto"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d"
dependencies = [
"bytes",
"getrandom 0.2.15",
"rand",
"ring",
"rustc-hash",
"rustls",
"rustls-pki-types",
"slab",
"thiserror 2.0.11",
"tinyvec",
"tracing",
"web-time",
]
[[package]]
name = "quinn-udp"
version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944"
dependencies = [
"cfg_aliases",
"libc",
"once_cell",
"socket2",
"tracing",
"windows-sys 0.59.0",
]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.38" version = "1.0.38"
...@@ -3966,11 +4087,16 @@ dependencies = [ ...@@ -3966,11 +4087,16 @@ dependencies = [
"js-sys", "js-sys",
"log", "log",
"mime", "mime",
"mime_guess",
"native-tls", "native-tls",
"once_cell", "once_cell",
"percent-encoding", "percent-encoding",
"pin-project-lite", "pin-project-lite",
"quinn",
"rustls",
"rustls-native-certs 0.8.1",
"rustls-pemfile", "rustls-pemfile",
"rustls-pki-types",
"serde", "serde",
"serde_json", "serde_json",
"serde_urlencoded", "serde_urlencoded",
...@@ -3978,15 +4104,34 @@ dependencies = [ ...@@ -3978,15 +4104,34 @@ dependencies = [
"system-configuration", "system-configuration",
"tokio", "tokio",
"tokio-native-tls", "tokio-native-tls",
"tokio-rustls",
"tokio-util",
"tower 0.5.2", "tower 0.5.2",
"tower-service", "tower-service",
"url", "url",
"wasm-bindgen", "wasm-bindgen",
"wasm-bindgen-futures", "wasm-bindgen-futures",
"wasm-streams",
"web-sys", "web-sys",
"windows-registry", "windows-registry",
] ]
[[package]]
name = "reqwest-eventsource"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "632c55746dbb44275691640e7b40c907c16a2dc1a5842aa98aaec90da6ec6bde"
dependencies = [
"eventsource-stream",
"futures-core",
"futures-timer",
"mime",
"nom",
"pin-project-lite",
"reqwest",
"thiserror 1.0.69",
]
[[package]] [[package]]
name = "ring" name = "ring"
version = "0.17.11" version = "0.17.11"
...@@ -4107,6 +4252,9 @@ name = "rustls-pki-types" ...@@ -4107,6 +4252,9 @@ name = "rustls-pki-types"
version = "1.11.0" version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c"
dependencies = [
"web-time",
]
[[package]] [[package]]
name = "rustls-webpki" name = "rustls-webpki"
...@@ -4189,6 +4337,16 @@ version = "1.2.0" ...@@ -4189,6 +4337,16 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "secrecy"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e891af845473308773346dc847b2c23ee78fe442e0472ac50e22a18a93d3ae5a"
dependencies = [
"serde",
"zeroize",
]
[[package]] [[package]]
name = "security-framework" name = "security-framework"
version = "2.11.1" version = "2.11.1"
...@@ -4843,11 +5001,27 @@ dependencies = [ ...@@ -4843,11 +5001,27 @@ dependencies = [
"zerovec", "zerovec",
] ]
[[package]]
name = "tinyvec"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]] [[package]]
name = "tio" name = "tio"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-openai",
"async-stream", "async-stream",
"async-trait", "async-trait",
"clap", "clap",
...@@ -5276,6 +5450,7 @@ name = "triton-distributed-llm" ...@@ -5276,6 +5450,7 @@ name = "triton-distributed-llm"
version = "0.2.0" version = "0.2.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-openai",
"async-stream", "async-stream",
"async-trait", "async-trait",
"async_zmq", "async_zmq",
...@@ -5406,6 +5581,12 @@ version = "0.2.2" ...@@ -5406,6 +5581,12 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c" checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c"
[[package]]
name = "unicase"
version = "2.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.17" version = "1.0.17"
...@@ -5709,6 +5890,19 @@ dependencies = [ ...@@ -5709,6 +5890,19 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "wasm-streams"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
dependencies = [
"futures-util",
"js-sys",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]
[[package]] [[package]]
name = "web-sys" name = "web-sys"
version = "0.3.77" version = "0.3.77"
......
...@@ -29,6 +29,7 @@ metal = ["triton-distributed-llm/metal"] ...@@ -29,6 +29,7 @@ metal = ["triton-distributed-llm/metal"]
[dependencies] [dependencies]
anyhow = "1" anyhow = "1"
async-openai = "0.27.2"
async-stream = { version = "0.3" } async-stream = { version = "0.3" }
async-trait = { version = "0.1" } async-trait = { version = "0.1" }
clap = { version = "4.5", features = ["derive", "env"] } clap = { version = "4.5", features = ["derive", "env"] }
......
...@@ -21,7 +21,6 @@ use std::{ ...@@ -21,7 +21,6 @@ use std::{
use triton_distributed_llm::{ use triton_distributed_llm::{
backend::Backend, backend::Backend,
preprocessor::OpenAIPreprocessor, preprocessor::OpenAIPreprocessor,
protocols::openai::chat_completions::MessageRole,
types::{ types::{
openai::chat_completions::{ openai::chat_completions::{
ChatCompletionRequest, ChatCompletionResponseDelta, ChatCompletionRequest, ChatCompletionResponseDelta,
...@@ -38,7 +37,7 @@ use triton_distributed_runtime::{ ...@@ -38,7 +37,7 @@ use triton_distributed_runtime::{
use crate::EngineConfig; use crate::EngineConfig;
/// Max response tokens for each single query. Must be less than model context size. /// Max response tokens for each single query. Must be less than model context size.
const MAX_TOKENS: i32 = 8192; const MAX_TOKENS: u32 = 8192;
/// Output of `isatty` if the fd is indeed a TTY /// Output of `isatty` if the fd is indeed a TTY
const IS_A_TTY: i32 = 1; const IS_A_TTY: i32 = 1;
...@@ -96,11 +95,12 @@ pub async fn run( ...@@ -96,11 +95,12 @@ pub async fn run(
main_loop(cancel_token, &service_name, engine, inspect_template).await main_loop(cancel_token, &service_name, engine, inspect_template).await
} }
#[allow(deprecated)]
async fn main_loop( async fn main_loop(
cancel_token: CancellationToken, cancel_token: CancellationToken,
service_name: &str, service_name: &str,
engine: OpenAIChatCompletionsStreamingEngine, engine: OpenAIChatCompletionsStreamingEngine,
inspect_template: bool, _inspect_template: bool,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
tracing::info!("Ctrl-c to exit"); tracing::info!("Ctrl-c to exit");
let theme = dialoguer::theme::ColorfulTheme::default(); let theme = dialoguer::theme::ColorfulTheme::default();
...@@ -141,30 +141,31 @@ async fn main_loop( ...@@ -141,30 +141,31 @@ async fn main_loop(
} }
} }
}; };
messages.push((MessageRole::user, prompt.clone()));
// Construct messages
let user_message = async_openai::types::ChatCompletionRequestMessage::User(
async_openai::types::ChatCompletionRequestUserMessage {
content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(prompt),
name: None,
},
);
messages.push(user_message);
// Request // Request
let mut req_builder = ChatCompletionRequest::builder(); let inner = async_openai::types::CreateChatCompletionRequestArgs::default()
req_builder .messages(messages.clone())
.model(service_name) .model(service_name)
.stream(true) .stream(true)
.max_tokens(MAX_TOKENS); .max_tokens(MAX_TOKENS)
if inspect_template { .build()?;
// This makes the pre-processor ignore stop tokens
req_builder.min_tokens(8192); // TODO We cannot set min_tokens with async-openai
} // if inspect_template {
for (role, msg) in &messages { // // This makes the pre-processor ignore stop tokens
match role { // req_builder.min_tokens(8192);
MessageRole::user => { // }
req_builder.add_user_message(msg);
} let req = ChatCompletionRequest { inner, nvext: None };
MessageRole::assistant => {
req_builder.add_assistant_message(msg);
}
x => panic!("Only 'user' and 'assistant' messages are supported, not {x}"),
}
}
let req = req_builder.build()?;
// Call the model // Call the model
let mut stream = engine.generate(Context::new(req)).await?; let mut stream = engine.generate(Context::new(req)).await?;
...@@ -174,7 +175,7 @@ async fn main_loop( ...@@ -174,7 +175,7 @@ async fn main_loop(
let mut assistant_message = String::new(); let mut assistant_message = String::new();
while let Some(item) = stream.next().await { while let Some(item) = stream.next().await {
let data = item.data.as_ref().unwrap(); let data = item.data.as_ref().unwrap();
let entry = data.choices.first(); let entry = data.inner.choices.first();
let chat_comp = entry.as_ref().unwrap(); let chat_comp = entry.as_ref().unwrap();
if let Some(c) = &chat_comp.delta.content { if let Some(c) = &chat_comp.delta.content {
let _ = stdout.write(c.as_bytes()); let _ = stdout.write(c.as_bytes());
...@@ -188,7 +189,23 @@ async fn main_loop( ...@@ -188,7 +189,23 @@ async fn main_loop(
} }
println!(); println!();
messages.push((MessageRole::assistant, assistant_message)); let assistant_content =
async_openai::types::ChatCompletionRequestAssistantMessageContent::Text(
assistant_message,
);
// ALLOW: function_call is deprecated
let assistant_message = async_openai::types::ChatCompletionRequestMessage::Assistant(
async_openai::types::ChatCompletionRequestAssistantMessage {
content: Some(assistant_content),
refusal: None,
name: None,
audio: None,
tool_calls: None,
function_call: None,
},
);
messages.push(assistant_message);
} }
println!(); println!();
Ok(()) Ok(())
......
...@@ -18,9 +18,8 @@ use std::{sync::Arc, time::Duration}; ...@@ -18,9 +18,8 @@ use std::{sync::Arc, time::Duration};
use async_stream::stream; use async_stream::stream;
use async_trait::async_trait; use async_trait::async_trait;
use triton_distributed_llm::protocols::openai::chat_completions::FinishReason;
use triton_distributed_llm::protocols::openai::chat_completions::{ use triton_distributed_llm::protocols::openai::chat_completions::{
ChatCompletionRequest, ChatCompletionResponseDelta, Content, ChatCompletionRequest, ChatCompletionResponseDelta,
}; };
use triton_distributed_llm::types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine; use triton_distributed_llm::types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine;
use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream}; use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
...@@ -53,25 +52,40 @@ impl ...@@ -53,25 +52,40 @@ impl
let (request, context) = incoming_request.transfer(()); let (request, context) = incoming_request.transfer(());
let deltas = request.response_generator(); let deltas = request.response_generator();
let ctx = context.context(); let ctx = context.context();
let req = request.messages.into_iter().last().unwrap(); let req = request.inner.messages.into_iter().last().unwrap();
let prompt = match req.content {
Content::Text(prompt) => prompt, let prompt = match req {
_ => { async_openai::types::ChatCompletionRequestMessage::User(user_msg) => {
anyhow::bail!("Invalid request content field, expected Content::Text"); match user_msg.content {
async_openai::types::ChatCompletionRequestUserMessageContent::Text(prompt) => {
prompt
}
_ => anyhow::bail!("Invalid request content field, expected Content::Text"),
} }
}
_ => anyhow::bail!("Invalid request type, expected User message"),
}; };
let output = stream! { let output = stream! {
let mut id = 1; let mut id = 1;
for c in prompt.chars() { for c in prompt.chars() {
// we are returning characters not tokens, so speed up some // we are returning characters not tokens, so speed up some
tokio::time::sleep(TOKEN_ECHO_DELAY/2).await; tokio::time::sleep(TOKEN_ECHO_DELAY/2).await;
let delta = deltas.create_choice(0, Some(c.to_string()), None, None); let inner = deltas.create_choice(0, Some(c.to_string()), None, None);
yield Annotated{ id: Some(id.to_string()), data: Some(delta), event: None, comment: None }; let response = ChatCompletionResponseDelta {
inner,
};
yield Annotated{ id: Some(id.to_string()), data: Some(response), event: None, comment: None };
id += 1; id += 1;
} }
let stop_delta = deltas.create_choice(0, None, Some(FinishReason::stop), None);
yield Annotated { id: Some(id.to_string()), data: Some(stop_delta), event: None, comment: None }; let inner = deltas.create_choice(0, None, Some(async_openai::types::FinishReason::Stop), None);
let response = ChatCompletionResponseDelta {
inner,
}; };
yield Annotated { id: Some(id.to_string()), data: Some(response), event: None, comment: None };
};
Ok(ResponseStream::new(Box::pin(output), ctx)) Ok(ResponseStream::new(Box::pin(output), ctx))
} }
} }
...@@ -151,6 +151,31 @@ version = "0.5.4" ...@@ -151,6 +151,31 @@ version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a" checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a"
[[package]]
name = "async-openai"
version = "0.27.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d126927c78e1562d7e8473008ac8b082318c04d69e3a83e3495a563f8b84a66"
dependencies = [
"backoff",
"base64 0.22.1",
"bytes",
"derive_builder",
"eventsource-stream",
"futures",
"rand",
"reqwest",
"reqwest-eventsource",
"secrecy",
"serde",
"serde_json",
"thiserror 2.0.11",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
]
[[package]] [[package]]
name = "async-stream" name = "async-stream"
version = "0.3.6" version = "0.3.6"
...@@ -320,6 +345,20 @@ dependencies = [ ...@@ -320,6 +345,20 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "backoff"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1"
dependencies = [
"futures-core",
"getrandom 0.2.15",
"instant",
"pin-project-lite",
"rand",
"tokio",
]
[[package]] [[package]]
name = "backtrace" name = "backtrace"
version = "0.3.74" version = "0.3.74"
...@@ -986,6 +1025,17 @@ dependencies = [ ...@@ -986,6 +1025,17 @@ dependencies = [
"tower-service", "tower-service",
] ]
[[package]]
name = "eventsource-stream"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab"
dependencies = [
"futures-core",
"nom",
"pin-project-lite",
]
[[package]] [[package]]
name = "fastrand" name = "fastrand"
version = "2.3.0" version = "2.3.0"
...@@ -1148,6 +1198,12 @@ version = "0.3.31" ...@@ -1148,6 +1198,12 @@ version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
[[package]]
name = "futures-timer"
version = "3.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
[[package]] [[package]]
name = "futures-util" name = "futures-util"
version = "0.3.31" version = "0.3.31"
...@@ -1193,8 +1249,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -1193,8 +1249,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [ dependencies = [
"cfg-if 1.0.0", "cfg-if 1.0.0",
"js-sys",
"libc", "libc",
"wasi 0.11.0+wasi-snapshot-preview1", "wasi 0.11.0+wasi-snapshot-preview1",
"wasm-bindgen",
] ]
[[package]] [[package]]
...@@ -1348,6 +1406,24 @@ dependencies = [ ...@@ -1348,6 +1406,24 @@ dependencies = [
"want", "want",
] ]
[[package]]
name = "hyper-rustls"
version = "0.27.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2"
dependencies = [
"futures-util",
"http",
"hyper",
"hyper-util",
"rustls",
"rustls-native-certs 0.8.1",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tower-service",
]
[[package]] [[package]]
name = "hyper-timeout" name = "hyper-timeout"
version = "0.5.2" version = "0.5.2"
...@@ -1593,6 +1669,15 @@ version = "0.1.15" ...@@ -1593,6 +1669,15 @@ version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
[[package]]
name = "instant"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [
"cfg-if 1.0.0",
]
[[package]] [[package]]
name = "iovec" name = "iovec"
version = "0.1.4" version = "0.1.4"
...@@ -1602,6 +1687,12 @@ dependencies = [ ...@@ -1602,6 +1687,12 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "ipnet"
version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
[[package]] [[package]]
name = "is_terminal_polyfill" name = "is_terminal_polyfill"
version = "1.70.1" version = "1.70.1"
...@@ -1842,6 +1933,16 @@ version = "0.3.17" ...@@ -1842,6 +1933,16 @@ version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "mime_guess"
version = "2.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
dependencies = [
"mime",
"unicase",
]
[[package]] [[package]]
name = "minijinja" name = "minijinja"
version = "2.7.0" version = "2.7.0"
...@@ -2545,6 +2646,58 @@ dependencies = [ ...@@ -2545,6 +2646,58 @@ dependencies = [
"syn 2.0.96", "syn 2.0.96",
] ]
[[package]]
name = "quinn"
version = "0.11.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef"
dependencies = [
"bytes",
"pin-project-lite",
"quinn-proto",
"quinn-udp",
"rustc-hash",
"rustls",
"socket2",
"thiserror 2.0.11",
"tokio",
"tracing",
]
[[package]]
name = "quinn-proto"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d"
dependencies = [
"bytes",
"getrandom 0.2.15",
"rand",
"ring",
"rustc-hash",
"rustls",
"rustls-pki-types",
"slab",
"thiserror 2.0.11",
"tinyvec",
"tracing",
"web-time",
]
[[package]]
name = "quinn-udp"
version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944"
dependencies = [
"cfg_aliases",
"libc",
"once_cell",
"socket2",
"tracing",
"windows-sys 0.59.0",
]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.38" version = "1.0.38"
...@@ -2679,6 +2832,68 @@ version = "0.8.5" ...@@ -2679,6 +2832,68 @@ version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "reqwest"
version = "0.12.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da"
dependencies = [
"base64 0.22.1",
"bytes",
"futures-core",
"futures-util",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-rustls",
"hyper-util",
"ipnet",
"js-sys",
"log",
"mime",
"mime_guess",
"once_cell",
"percent-encoding",
"pin-project-lite",
"quinn",
"rustls",
"rustls-native-certs 0.8.1",
"rustls-pemfile",
"rustls-pki-types",
"serde",
"serde_json",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tokio-rustls",
"tokio-util",
"tower 0.5.2",
"tower-service",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
"wasm-streams",
"web-sys",
"windows-registry",
]
[[package]]
name = "reqwest-eventsource"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "632c55746dbb44275691640e7b40c907c16a2dc1a5842aa98aaec90da6ec6bde"
dependencies = [
"eventsource-stream",
"futures-core",
"futures-timer",
"mime",
"nom",
"pin-project-lite",
"reqwest",
"thiserror 1.0.69",
]
[[package]] [[package]]
name = "ring" name = "ring"
version = "0.17.8" version = "0.17.8"
...@@ -2700,6 +2915,12 @@ version = "0.1.24" ...@@ -2700,6 +2915,12 @@ version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]] [[package]]
name = "rustc_version" name = "rustc_version"
version = "0.4.1" version = "0.4.1"
...@@ -2776,6 +2997,9 @@ name = "rustls-pki-types" ...@@ -2776,6 +2997,9 @@ name = "rustls-pki-types"
version = "1.11.0" version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c"
dependencies = [
"web-time",
]
[[package]] [[package]]
name = "rustls-webpki" name = "rustls-webpki"
...@@ -2824,6 +3048,16 @@ version = "1.2.0" ...@@ -2824,6 +3048,16 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "secrecy"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e891af845473308773346dc847b2c23ee78fe442e0472ac50e22a18a93d3ae5a"
dependencies = [
"serde",
"zeroize",
]
[[package]] [[package]]
name = "security-framework" name = "security-framework"
version = "2.11.1" version = "2.11.1"
...@@ -3148,6 +3382,9 @@ name = "sync_wrapper" ...@@ -3148,6 +3382,9 @@ name = "sync_wrapper"
version = "1.0.2" version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
dependencies = [
"futures-core",
]
[[package]] [[package]]
name = "synstructure" name = "synstructure"
...@@ -3286,6 +3523,21 @@ dependencies = [ ...@@ -3286,6 +3523,21 @@ dependencies = [
"zerovec", "zerovec",
] ]
[[package]]
name = "tinyvec"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]] [[package]]
name = "tokenizers" name = "tokenizers"
version = "0.21.0" version = "0.21.0"
...@@ -3637,6 +3889,7 @@ name = "triton-distributed-llm" ...@@ -3637,6 +3889,7 @@ name = "triton-distributed-llm"
version = "0.2.0" version = "0.2.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-openai",
"async-stream", "async-stream",
"async-trait", "async-trait",
"axum 0.8.1", "axum 0.8.1",
...@@ -3765,6 +4018,12 @@ version = "0.2.2" ...@@ -3765,6 +4018,12 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c" checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c"
[[package]]
name = "unicase"
version = "2.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.14" version = "1.0.14"
...@@ -3982,6 +4241,19 @@ dependencies = [ ...@@ -3982,6 +4241,19 @@ dependencies = [
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
dependencies = [
"cfg-if 1.0.0",
"js-sys",
"once_cell",
"wasm-bindgen",
"web-sys",
]
[[package]] [[package]]
name = "wasm-bindgen-macro" name = "wasm-bindgen-macro"
version = "0.2.100" version = "0.2.100"
...@@ -4014,6 +4286,29 @@ dependencies = [ ...@@ -4014,6 +4286,29 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "wasm-streams"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
dependencies = [
"futures-util",
"js-sys",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]
[[package]]
name = "web-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]] [[package]]
name = "web-time" name = "web-time"
version = "1.1.0" version = "1.1.0"
...@@ -4085,6 +4380,36 @@ dependencies = [ ...@@ -4085,6 +4380,36 @@ dependencies = [
"windows-targets 0.52.6", "windows-targets 0.52.6",
] ]
[[package]]
name = "windows-registry"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0"
dependencies = [
"windows-result",
"windows-strings",
"windows-targets 0.52.6",
]
[[package]]
name = "windows-result"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
dependencies = [
"windows-targets 0.52.6",
]
[[package]]
name = "windows-strings"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
dependencies = [
"windows-result",
"windows-targets 0.52.6",
]
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.48.0" version = "0.48.0"
......
...@@ -163,6 +163,31 @@ version = "0.5.4" ...@@ -163,6 +163,31 @@ version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a" checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a"
[[package]]
name = "async-openai"
version = "0.27.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d126927c78e1562d7e8473008ac8b082318c04d69e3a83e3495a563f8b84a66"
dependencies = [
"backoff",
"base64 0.22.1",
"bytes",
"derive_builder",
"eventsource-stream",
"futures",
"rand",
"reqwest",
"reqwest-eventsource",
"secrecy",
"serde",
"serde_json",
"thiserror 2.0.11",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
]
[[package]] [[package]]
name = "async-stream" name = "async-stream"
version = "0.3.6" version = "0.3.6"
...@@ -332,6 +357,20 @@ dependencies = [ ...@@ -332,6 +357,20 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "backoff"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1"
dependencies = [
"futures-core",
"getrandom 0.2.15",
"instant",
"pin-project-lite",
"rand",
"tokio",
]
[[package]] [[package]]
name = "backtrace" name = "backtrace"
version = "0.3.74" version = "0.3.74"
...@@ -1009,6 +1048,17 @@ dependencies = [ ...@@ -1009,6 +1048,17 @@ dependencies = [
"pin-project-lite", "pin-project-lite",
] ]
[[package]]
name = "eventsource-stream"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab"
dependencies = [
"futures-core",
"nom",
"pin-project-lite",
]
[[package]] [[package]]
name = "fastrand" name = "fastrand"
version = "2.3.0" version = "2.3.0"
...@@ -1171,6 +1221,12 @@ version = "0.3.31" ...@@ -1171,6 +1221,12 @@ version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
[[package]]
name = "futures-timer"
version = "3.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
[[package]] [[package]]
name = "futures-util" name = "futures-util"
version = "0.3.31" version = "0.3.31"
...@@ -1216,8 +1272,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -1216,8 +1272,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [ dependencies = [
"cfg-if 1.0.0", "cfg-if 1.0.0",
"js-sys",
"libc", "libc",
"wasi 0.11.0+wasi-snapshot-preview1", "wasi 0.11.0+wasi-snapshot-preview1",
"wasm-bindgen",
] ]
[[package]] [[package]]
...@@ -1365,6 +1423,24 @@ dependencies = [ ...@@ -1365,6 +1423,24 @@ dependencies = [
"want", "want",
] ]
[[package]]
name = "hyper-rustls"
version = "0.27.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2"
dependencies = [
"futures-util",
"http",
"hyper",
"hyper-util",
"rustls",
"rustls-native-certs 0.8.1",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tower-service",
]
[[package]] [[package]]
name = "hyper-timeout" name = "hyper-timeout"
version = "0.5.2" version = "0.5.2"
...@@ -1610,6 +1686,15 @@ version = "0.1.15" ...@@ -1610,6 +1686,15 @@ version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
[[package]]
name = "instant"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [
"cfg-if 1.0.0",
]
[[package]] [[package]]
name = "inventory" name = "inventory"
version = "0.3.19" version = "0.3.19"
...@@ -1628,6 +1713,12 @@ dependencies = [ ...@@ -1628,6 +1713,12 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "ipnet"
version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
[[package]] [[package]]
name = "is_terminal_polyfill" name = "is_terminal_polyfill"
version = "1.70.1" version = "1.70.1"
...@@ -1847,6 +1938,16 @@ version = "0.3.17" ...@@ -1847,6 +1938,16 @@ version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "mime_guess"
version = "2.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
dependencies = [
"mime",
"unicase",
]
[[package]] [[package]]
name = "minijinja" name = "minijinja"
version = "2.7.0" version = "2.7.0"
...@@ -2594,6 +2695,58 @@ dependencies = [ ...@@ -2594,6 +2695,58 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "quinn"
version = "0.11.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef"
dependencies = [
"bytes",
"pin-project-lite",
"quinn-proto",
"quinn-udp",
"rustc-hash",
"rustls",
"socket2",
"thiserror 2.0.11",
"tokio",
"tracing",
]
[[package]]
name = "quinn-proto"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d"
dependencies = [
"bytes",
"getrandom 0.2.15",
"rand",
"ring",
"rustc-hash",
"rustls",
"rustls-pki-types",
"slab",
"thiserror 2.0.11",
"tinyvec",
"tracing",
"web-time",
]
[[package]]
name = "quinn-udp"
version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944"
dependencies = [
"cfg_aliases",
"libc",
"once_cell",
"socket2",
"tracing",
"windows-sys 0.59.0",
]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.38" version = "1.0.38"
...@@ -2728,6 +2881,68 @@ version = "0.8.5" ...@@ -2728,6 +2881,68 @@ version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "reqwest"
version = "0.12.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da"
dependencies = [
"base64 0.22.1",
"bytes",
"futures-core",
"futures-util",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-rustls",
"hyper-util",
"ipnet",
"js-sys",
"log",
"mime",
"mime_guess",
"once_cell",
"percent-encoding",
"pin-project-lite",
"quinn",
"rustls",
"rustls-native-certs 0.8.1",
"rustls-pemfile",
"rustls-pki-types",
"serde",
"serde_json",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tokio-rustls",
"tokio-util",
"tower 0.5.2",
"tower-service",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
"wasm-streams",
"web-sys",
"windows-registry",
]
[[package]]
name = "reqwest-eventsource"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "632c55746dbb44275691640e7b40c907c16a2dc1a5842aa98aaec90da6ec6bde"
dependencies = [
"eventsource-stream",
"futures-core",
"futures-timer",
"mime",
"nom",
"pin-project-lite",
"reqwest",
"thiserror 1.0.69",
]
[[package]] [[package]]
name = "ring" name = "ring"
version = "0.17.8" version = "0.17.8"
...@@ -2749,6 +2964,12 @@ version = "0.1.24" ...@@ -2749,6 +2964,12 @@ version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]] [[package]]
name = "rustc_version" name = "rustc_version"
version = "0.4.1" version = "0.4.1"
...@@ -2825,6 +3046,9 @@ name = "rustls-pki-types" ...@@ -2825,6 +3046,9 @@ name = "rustls-pki-types"
version = "1.11.0" version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c"
dependencies = [
"web-time",
]
[[package]] [[package]]
name = "rustls-webpki" name = "rustls-webpki"
...@@ -2873,6 +3097,16 @@ version = "1.2.0" ...@@ -2873,6 +3097,16 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "secrecy"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e891af845473308773346dc847b2c23ee78fe442e0472ac50e22a18a93d3ae5a"
dependencies = [
"serde",
"zeroize",
]
[[package]] [[package]]
name = "security-framework" name = "security-framework"
version = "2.11.1" version = "2.11.1"
...@@ -3197,6 +3431,9 @@ name = "sync_wrapper" ...@@ -3197,6 +3431,9 @@ name = "sync_wrapper"
version = "1.0.2" version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
dependencies = [
"futures-core",
]
[[package]] [[package]]
name = "synstructure" name = "synstructure"
...@@ -3335,6 +3572,21 @@ dependencies = [ ...@@ -3335,6 +3572,21 @@ dependencies = [
"zerovec", "zerovec",
] ]
[[package]]
name = "tinyvec"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]] [[package]]
name = "tokenizers" name = "tokenizers"
version = "0.21.0" version = "0.21.0"
...@@ -3686,6 +3938,7 @@ name = "triton-distributed-llm" ...@@ -3686,6 +3938,7 @@ name = "triton-distributed-llm"
version = "0.2.0" version = "0.2.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-openai",
"async-stream", "async-stream",
"async-trait", "async-trait",
"axum 0.8.1", "axum 0.8.1",
...@@ -3834,6 +4087,12 @@ version = "0.2.2" ...@@ -3834,6 +4087,12 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c" checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c"
[[package]]
name = "unicase"
version = "2.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.16" version = "1.0.16"
...@@ -4051,6 +4310,19 @@ dependencies = [ ...@@ -4051,6 +4310,19 @@ dependencies = [
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
dependencies = [
"cfg-if 1.0.0",
"js-sys",
"once_cell",
"wasm-bindgen",
"web-sys",
]
[[package]] [[package]]
name = "wasm-bindgen-macro" name = "wasm-bindgen-macro"
version = "0.2.100" version = "0.2.100"
...@@ -4083,6 +4355,29 @@ dependencies = [ ...@@ -4083,6 +4355,29 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "wasm-streams"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
dependencies = [
"futures-util",
"js-sys",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]
[[package]]
name = "web-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]] [[package]]
name = "web-time" name = "web-time"
version = "1.1.0" version = "1.1.0"
...@@ -4154,6 +4449,36 @@ dependencies = [ ...@@ -4154,6 +4449,36 @@ dependencies = [
"windows-targets 0.52.6", "windows-targets 0.52.6",
] ]
[[package]]
name = "windows-registry"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0"
dependencies = [
"windows-result",
"windows-strings",
"windows-targets 0.52.6",
]
[[package]]
name = "windows-result"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
dependencies = [
"windows-targets 0.52.6",
]
[[package]]
name = "windows-strings"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
dependencies = [
"windows-result",
"windows-targets 0.52.6",
]
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.48.0" version = "0.48.0"
......
This diff is collapsed.
...@@ -81,6 +81,7 @@ uuid = { workspace = true } ...@@ -81,6 +81,7 @@ uuid = { workspace = true }
xxhash-rust = { workspace = true } xxhash-rust = { workspace = true }
strum = { workspace = true } strum = { workspace = true }
async-openai = "0.27.2"
blake3 = "1" blake3 = "1"
regex = "1" regex = "1"
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
use std::{cmp::min, num::NonZero, path::Path, sync::Arc}; use std::{cmp::min, num::NonZero, path::Path, sync::Arc};
use async_openai::types::FinishReason;
use async_stream::stream; use async_stream::stream;
use async_trait::async_trait; use async_trait::async_trait;
use either::Either; use either::Either;
...@@ -33,8 +34,7 @@ use triton_distributed_runtime::pipeline::{Error, ManyOut, SingleIn}; ...@@ -33,8 +34,7 @@ use triton_distributed_runtime::pipeline::{Error, ManyOut, SingleIn};
use triton_distributed_runtime::protocols::annotated::Annotated; use triton_distributed_runtime::protocols::annotated::Annotated;
use crate::protocols::openai::chat_completions::{ use crate::protocols::openai::chat_completions::{
ChatCompletionChoiceDelta, ChatCompletionContent, ChatCompletionRequest, ChatCompletionRequest, ChatCompletionResponseDelta,
ChatCompletionResponseDelta, Content, FinishReason, MessageRole,
}; };
use crate::types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine; use crate::types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine;
...@@ -174,11 +174,14 @@ impl ...@@ -174,11 +174,14 @@ impl
let (tx, mut rx) = channel(10_000); let (tx, mut rx) = channel(10_000);
let maybe_tok = self.pipeline.lock().await.tokenizer(); let maybe_tok = self.pipeline.lock().await.tokenizer();
let mut prompt_tokens = 0; let mut prompt_tokens = 0i32;
let mut messages = vec![]; let mut messages = vec![];
for m in request.messages { for m in request.inner.messages {
let content = match m.content { let async_openai::types::ChatCompletionRequestMessage::User(inner_m) = m else {
Content::Text(prompt) => { continue;
};
let content = match inner_m.content {
async_openai::types::ChatCompletionRequestUserMessageContent::Text(prompt) => {
if let Some(tok) = maybe_tok.as_ref() { if let Some(tok) = maybe_tok.as_ref() {
prompt_tokens = tok prompt_tokens = tok
.encode(prompt.clone(), false) .encode(prompt.clone(), false)
...@@ -187,12 +190,12 @@ impl ...@@ -187,12 +190,12 @@ impl
} }
prompt prompt
} }
Content::ImageUrl(_) => { _ => {
anyhow::bail!("Content::ImageUrl type is not supported"); anyhow::bail!("Only Text type is supported");
} }
}; };
let r = IndexMap::from([ let r = IndexMap::from([
("role".to_string(), Either::Left(m.role.to_string())), ("role".to_string(), Either::Left("user".to_string())),
("content".to_string(), Either::Left(content)), ("content".to_string(), Either::Left(content)),
]); ]);
messages.push(r); messages.push(r);
...@@ -204,7 +207,11 @@ impl ...@@ -204,7 +207,11 @@ impl
// level. // level.
//tracing::info!(prompt_tokens, "Received prompt"); //tracing::info!(prompt_tokens, "Received prompt");
let limit = DEFAULT_MAX_TOKENS - prompt_tokens; let limit = DEFAULT_MAX_TOKENS - prompt_tokens;
let max_output_tokens = min(request.max_tokens.unwrap_or(limit), limit); #[allow(deprecated)]
let max_output_tokens = min(
request.inner.max_tokens.map(|x| x as i32).unwrap_or(limit),
limit,
);
let mistralrs_request = Request::Normal(NormalRequest { let mistralrs_request = Request::Normal(NormalRequest {
messages: RequestMessage::Chat(messages), messages: RequestMessage::Chat(messages),
...@@ -247,35 +254,39 @@ impl ...@@ -247,35 +254,39 @@ impl
.unwrap_or(0); .unwrap_or(0);
} }
let finish_reason = match &c.choices[0].finish_reason { let finish_reason = match &c.choices[0].finish_reason {
Some(fr) => Some(fr.parse::<FinishReason>().unwrap_or(FinishReason::null)), Some(_fr) => Some(FinishReason::Stop), //Some(fr.parse::<FinishReason>().unwrap_or(FinishReason::Stop)),
None if used_output_tokens >= max_output_tokens => { None if used_output_tokens >= max_output_tokens => {
tracing::debug!(used_output_tokens, max_output_tokens, "Met or exceed max_tokens. Stopping."); tracing::debug!(used_output_tokens, max_output_tokens, "Met or exceed max_tokens. Stopping.");
Some(FinishReason::length) Some(FinishReason::Length)
} }
None => None, None => None,
}; };
//tracing::trace!("from_assistant: {from_assistant}"); //tracing::trace!("from_assistant: {from_assistant}");
let delta = ChatCompletionResponseDelta{ #[allow(deprecated)]
let inner = async_openai::types::CreateChatCompletionStreamResponse{
id: c.id, id: c.id,
choices: vec![ChatCompletionChoiceDelta{ choices: vec![async_openai::types::ChatChoiceStream{
index: 0, index: 0,
delta: ChatCompletionContent{ delta: async_openai::types::ChatCompletionStreamResponseDelta{
//role: c.choices[0].delta.role, //role: c.choices[0].delta.role,
role: Some(MessageRole::assistant), role: Some(async_openai::types::Role::Assistant),
content: Some(from_assistant), content: Some(from_assistant),
tool_calls: None, tool_calls: None,
refusal: None,
function_call: None,
}, },
logprobs: None, logprobs: None,
finish_reason, finish_reason,
}], }],
model: c.model, model: c.model,
created: c.created as u64, created: c.created as u32,
object: c.object.clone(), object: c.object.clone(),
usage: None, usage: None,
system_fingerprint: Some(c.system_fingerprint), system_fingerprint: Some(c.system_fingerprint),
service_tier: None, service_tier: None,
}; };
let delta = ChatCompletionResponseDelta{inner};
let ann = Annotated{ let ann = Annotated{
id: None, id: None,
data: Some(delta), data: Some(delta),
......
...@@ -220,17 +220,21 @@ async fn chat_completions( ...@@ -220,17 +220,21 @@ async fn chat_completions(
let request_id = uuid::Uuid::new_v4().to_string(); let request_id = uuid::Uuid::new_v4().to_string();
// todo - decide on default // todo - decide on default
let streaming = request.stream.unwrap_or(false); let streaming = request.inner.stream.unwrap_or(false);
// update the request to always stream // update the request to always stream
let request = ChatCompletionRequest { let inner_request = async_openai::types::CreateChatCompletionRequest {
stream: Some(true), stream: Some(true),
..request ..request.inner
};
let request = ChatCompletionRequest {
inner: inner_request,
nvext: None,
}; };
// todo - make the protocols be optional for model name // todo - make the protocols be optional for model name
// todo - when optional, if none, apply a default // todo - when optional, if none, apply a default
let model = &request.model; let model = &request.inner.model;
// todo - determine the proper error code for when a request model is not present // todo - determine the proper error code for when a request model is not present
tracing::trace!("Getting chat completions engine for model: {}", model); tracing::trace!("Getting chat completions engine for model: {}", model);
......
...@@ -275,7 +275,7 @@ impl ...@@ -275,7 +275,7 @@ impl
let (common_request, annotations) = self.preprocess_request(&request)?; let (common_request, annotations) = self.preprocess_request(&request)?;
// update isl // update isl
response_generator.update_isl(common_request.token_ids.len() as i32); response_generator.update_isl(common_request.token_ids.len() as u32);
// repack the common completion request // repack the common completion request
let common_request = context.map(|_| common_request); let common_request = context.map(|_| common_request);
......
...@@ -18,35 +18,37 @@ use super::*; ...@@ -18,35 +18,37 @@ use super::*;
use minijinja::{context, value::Value}; use minijinja::{context, value::Value};
use crate::protocols::openai::{ use crate::protocols::openai::{
chat_completions::{ChatCompletionMessage, ChatCompletionRequest, Content, MessageRole}, chat_completions::ChatCompletionRequest, completions::CompletionRequest,
completions::CompletionRequest,
}; };
use tracing; use tracing;
impl OAIChatLikeRequest for ChatCompletionRequest { impl OAIChatLikeRequest for ChatCompletionRequest {
fn messages(&self) -> Value { fn messages(&self) -> Value {
Value::from_serialize(&self.messages) Value::from_serialize(&self.inner.messages)
} }
fn tools(&self) -> Option<Value> { fn tools(&self) -> Option<Value> {
if self.tools.is_none() { if self.inner.tools.is_none() {
None None
} else { } else {
Some(Value::from_serialize(&self.tools)) Some(Value::from_serialize(&self.inner.tools))
} }
} }
fn tool_choice(&self) -> Option<Value> { fn tool_choice(&self) -> Option<Value> {
if self.tool_choice.is_none() { if self.inner.tool_choice.is_none() {
None None
} else { } else {
Some(Value::from_serialize(&self.tool_choice)) Some(Value::from_serialize(&self.inner.tool_choice))
} }
} }
fn should_add_generation_prompt(&self) -> bool { fn should_add_generation_prompt(&self) -> bool {
if let Some(last) = self.messages.last() { if let Some(last) = self.inner.messages.last() {
last.role == MessageRole::user matches!(
last,
async_openai::types::ChatCompletionRequestMessage::User(_)
)
} else { } else {
true true
} }
...@@ -54,13 +56,22 @@ impl OAIChatLikeRequest for ChatCompletionRequest { ...@@ -54,13 +56,22 @@ impl OAIChatLikeRequest for ChatCompletionRequest {
} }
impl OAIChatLikeRequest for CompletionRequest { impl OAIChatLikeRequest for CompletionRequest {
fn messages(&self) -> Value { fn messages(&self) -> minijinja::value::Value {
let message = ChatCompletionMessage { let message = async_openai::types::ChatCompletionRequestMessage::User(
role: MessageRole::user, async_openai::types::ChatCompletionRequestUserMessage {
content: Content::Text(self.prompt.clone()), content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(
self.prompt.clone(),
),
name: None, name: None,
}; },
Value::from_serialize(vec![message]) );
// Convert to a JSON string first
let json_string =
serde_json::to_string(&vec![message]).expect("Serialization to JSON string failed");
// Convert to MiniJinja Value
minijinja::value::Value::from_safe_string(json_string)
} }
fn should_add_generation_prompt(&self) -> bool { fn should_add_generation_prompt(&self) -> bool {
......
...@@ -66,6 +66,33 @@ pub enum FinishReason { ...@@ -66,6 +66,33 @@ pub enum FinishReason {
Cancelled, Cancelled,
} }
impl std::fmt::Display for FinishReason {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
FinishReason::EoS => write!(f, "eos"),
FinishReason::Length => write!(f, "length"),
FinishReason::Stop => write!(f, "stop"),
FinishReason::Error(msg) => write!(f, "error: {}", msg),
FinishReason::Cancelled => write!(f, "cancelled"),
}
}
}
impl std::str::FromStr for FinishReason {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"eos" => Ok(FinishReason::EoS),
"length" => Ok(FinishReason::Length),
"stop" => Ok(FinishReason::Stop),
"cancelled" => Ok(FinishReason::Cancelled),
s if s.starts_with("error: ") => Ok(FinishReason::Error(s[7..].to_string())),
_ => Err(anyhow::anyhow!("Invalid FinishReason variant: '{}'", s)),
}
}
}
/// LLM Inference Engines can accept a variety of input types. Not all Engines will support all /// LLM Inference Engines can accept a variety of input types. Not all Engines will support all
/// input types. For example, the trtllm::AsyncEngine only supports `PromptType::Tokens` as an /// input types. For example, the trtllm::AsyncEngine only supports `PromptType::Tokens` as an
/// input type. The higher-level `Backend` class is a general wrapper around Engines that will /// input type. The higher-level `Backend` class is a general wrapper around Engines that will
......
...@@ -147,9 +147,9 @@ trait OpenAISamplingOptionsProvider { ...@@ -147,9 +147,9 @@ trait OpenAISamplingOptionsProvider {
} }
trait OpenAIStopConditionsProvider { trait OpenAIStopConditionsProvider {
fn get_max_tokens(&self) -> Option<i32>; fn get_max_tokens(&self) -> Option<u32>;
fn get_min_tokens(&self) -> Option<i32>; fn get_min_tokens(&self) -> Option<u32>;
fn get_stop(&self) -> Option<Vec<String>>; fn get_stop(&self) -> Option<Vec<String>>;
...@@ -200,7 +200,7 @@ impl<T: OpenAISamplingOptionsProvider> SamplingOptionsProvider for T { ...@@ -200,7 +200,7 @@ impl<T: OpenAISamplingOptionsProvider> SamplingOptionsProvider for T {
impl<T: OpenAIStopConditionsProvider> StopConditionsProvider for T { impl<T: OpenAIStopConditionsProvider> StopConditionsProvider for T {
fn extract_stop_conditions(&self) -> Result<common::StopConditions> { fn extract_stop_conditions(&self) -> Result<common::StopConditions> {
let max_tokens = self.get_max_tokens().map(|x| x as u32); let max_tokens = self.get_max_tokens();
let min_tokens = self.get_min_tokens(); let min_tokens = self.get_min_tokens();
let stop = self.get_stop(); let stop = self.get_stop();
...@@ -218,7 +218,7 @@ impl<T: OpenAIStopConditionsProvider> StopConditionsProvider for T { ...@@ -218,7 +218,7 @@ impl<T: OpenAIStopConditionsProvider> StopConditionsProvider for T {
Ok(common::StopConditions { Ok(common::StopConditions {
max_tokens, max_tokens,
min_tokens: min_tokens.map(|v| v as u32), min_tokens,
stop, stop,
stop_token_ids_hidden: None, stop_token_ids_hidden: None,
ignore_eos, ignore_eos,
...@@ -321,6 +321,7 @@ pub trait DeltaGeneratorExt<ResponseType: Send + Sync + 'static + std::fmt::Debu ...@@ -321,6 +321,7 @@ pub trait DeltaGeneratorExt<ResponseType: Send + Sync + 'static + std::fmt::Debu
response: common::llm_backend::BackendOutput, response: common::llm_backend::BackendOutput,
) -> Result<ResponseType>; ) -> Result<ResponseType>;
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
......
...@@ -22,7 +22,7 @@ use validator::Validate; ...@@ -22,7 +22,7 @@ use validator::Validate;
mod aggregator; mod aggregator;
mod delta; mod delta;
pub use aggregator::DeltaAggregator; // pub use aggregator::DeltaAggregator;
use super::{ use super::{
common::{self, SamplingOptionsProvider, StopConditionsProvider}, common::{self, SamplingOptionsProvider, StopConditionsProvider},
...@@ -56,13 +56,13 @@ pub struct CompletionRequest { ...@@ -56,13 +56,13 @@ pub struct CompletionRequest {
/// The token count of your prompt plus max_tokens cannot exceed the model's context length. /// The token count of your prompt plus max_tokens cannot exceed the model's context length.
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
#[builder(default, setter(into, strip_option))] #[builder(default, setter(into, strip_option))]
pub max_tokens: Option<i32>, pub max_tokens: Option<u32>,
/// The minimum number of tokens to generate. We ignore stop tokens until we see this many /// The minimum number of tokens to generate. We ignore stop tokens until we see this many
/// tokens. Leave this None unless you are working on the pre-processor. /// tokens. Leave this None unless you are working on the pre-processor.
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
#[builder(default, setter(into, strip_option))] #[builder(default, setter(into, strip_option))]
pub min_tokens: Option<i32>, pub min_tokens: Option<u32>,
/// If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only /// If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only
/// server-sent events as they become available, with the stream terminated by a data: \[DONE\] /// server-sent events as they become available, with the stream terminated by a data: \[DONE\]
...@@ -248,7 +248,7 @@ impl CompletionRequestBuilder { ...@@ -248,7 +248,7 @@ impl CompletionRequestBuilder {
/// let request = CompletionRequest::builder() /// let request = CompletionRequest::builder()
/// .model("mixtral-8x7b-instruct-v0.1") /// .model("mixtral-8x7b-instruct-v0.1")
/// .prompt("Hello") /// .prompt("Hello")
/// .max_tokens(16) /// .max_tokens(16_u32)
/// .build() /// .build()
/// .expect("Failed to build CompletionRequest"); /// .expect("Failed to build CompletionRequest");
/// ``` /// ```
...@@ -433,11 +433,11 @@ impl OpenAISamplingOptionsProvider for CompletionRequest { ...@@ -433,11 +433,11 @@ impl OpenAISamplingOptionsProvider for CompletionRequest {
} }
impl OpenAIStopConditionsProvider for CompletionRequest { impl OpenAIStopConditionsProvider for CompletionRequest {
fn get_max_tokens(&self) -> Option<i32> { fn get_max_tokens(&self) -> Option<u32> {
self.max_tokens self.max_tokens
} }
fn get_min_tokens(&self) -> Option<i32> { fn get_min_tokens(&self) -> Option<u32> {
self.min_tokens self.min_tokens
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment