v1.4.5 (#1686)

4ee0a0c4 · OlivierDehaene · GitHub · f04255c6 · 4ee0a0c4 · 4ee0a0c4
Unverified Commit 4ee0a0c4 authored Mar 29, 2024 by OlivierDehaene Committed by GitHub Mar 29, 2024
14 changed files
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -96,9 +96,9 @@ checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247"
 [[package]]
 name = "arc-swap"
-version = "1.7.0"
+version = "1.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b3d0060af21e8d11a926981cc00c6c1541aa91dd64b9f881985c3da1094425f"
+checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
 [[package]]
 name = "async-rustls"
@@ -130,25 +130,25 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
 name = "async-trait"
-version = "0.1.78"
+version = "0.1.79"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "461abc97219de0eaaf81fe3ef974a540158f3d079c2ab200f891f1a2ef201e85"
+checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
 name = "autocfg"
-version = "1.1.0"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80"
 [[package]]
 name = "average"
@@ -242,9 +242,9 @@ dependencies = [
 [[package]]
 name = "backtrace"
-version = "0.3.70"
+version = "0.3.71"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95d8e92cac0961e91dbd517496b00f7e9b92363dbe6d42c3198268323798860c"
+checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d"
 dependencies = [
 "addr2line",
 "cc",
@@ -323,9 +323,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 [[package]]
 name = "bytes"
-version = "1.5.0"
+version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223"
+checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
 [[package]]
 name = "camino"
@@ -385,9 +385,9 @@ checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e"
 [[package]]
 name = "clap"
-version = "4.5.3"
+version = "4.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "949626d00e063efc93b6dca932419ceb5432f99769911c0b995f7e884c778813"
+checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0"
 dependencies = [
 "clap_builder",
 "clap_derive",
@@ -407,14 +407,14 @@ dependencies = [
 [[package]]
 name = "clap_derive"
-version = "4.5.3"
+version = "4.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90239a040c80f5e14809ca132ddc4176ab33d5e17e49691793296e3fcb34d72f"
+checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64"
 dependencies = [
 "heck 0.5.0",
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
@@ -748,9 +748,9 @@ dependencies = [
 [[package]]
 name = "fastrand"
-version = "2.0.1"
+version = "2.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"
+checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984"
 [[package]]
 name = "fixedbitset"
@@ -876,7 +876,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
@@ -969,7 +969,7 @@ dependencies = [
 "futures-sink",
 "futures-util",
 "http",
- "indexmap 2.2.5",
+ "indexmap 2.2.6",
 "slab",
 "tokio",
 "tokio-util",
@@ -1173,9 +1173,9 @@ dependencies = [
 [[package]]
 name = "indexmap"
-version = "2.2.5"
+version = "2.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4"
+checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
 dependencies = [
 "equivalent",
 "hashbrown 0.14.3",
@@ -1197,9 +1197,9 @@ dependencies = [
 [[package]]
 name = "indoc"
-version = "2.0.4"
+version = "2.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8"
+checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
 [[package]]
 name = "init-tracing-opentelemetry"
@@ -1267,9 +1267,9 @@ dependencies = [
 [[package]]
 name = "itoa"
-version = "1.0.10"
+version = "1.0.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
+checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
 [[package]]
 name = "js-sys"
@@ -1409,9 +1409,9 @@ checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
 [[package]]
 name = "memchr"
-version = "2.7.1"
+version = "2.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
+checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
 [[package]]
 name = "metrics"
@@ -1450,7 +1450,7 @@ checksum = "38b4faf00617defe497754acde3024865bc143d44a86799b24e191ecff91354f"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
@@ -1486,8 +1486,8 @@ dependencies = [
 [[package]]
 name = "minijinja"
-version = "1.0.15"
+version = "1.0.16"
-source = "git+https://github.com/mitsuhiko/minijinja.git?branch=main#045d6a0f7ee1ea9cb4aa7725bc8fb1c39ab1d90b"
+source = "git+https://github.com/mitsuhiko/minijinja.git?branch=main#82d0160b5513844e5429db084f1cbdd3313ed482"
 dependencies = [
 "serde",
 ]
@@ -1537,7 +1537,7 @@ checksum = "f686d68a09079e63b1d2c64aa305095887ce50565f00a922ebfaeeee0d9ba6ce"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
@@ -1834,7 +1834,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
@@ -1845,9 +1845,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
 [[package]]
 name = "openssl-sys"
-version = "0.9.101"
+version = "0.9.102"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dda2b0f344e78efc2facf7d195d098df0dd72151b26ab98da807afc26c198dff"
+checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2"
 dependencies = [
 "cc",
 "libc",
@@ -2030,7 +2030,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9"
 dependencies = [
 "fixedbitset",
- "indexmap 2.2.5",
+ "indexmap 2.2.6",
 ]
 [[package]]
@@ -2050,7 +2050,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
@@ -2091,12 +2091,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
 [[package]]
 name = "prettyplease"
-version = "0.2.16"
+version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5"
+checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7"
 dependencies = [
 "proc-macro2",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
@@ -2169,7 +2169,7 @@ dependencies = [
 "prost 0.12.3",
 "prost-types",
 "regex",
- "syn 2.0.53",
+ "syn 2.0.55",
 "tempfile",
 "which",
 ]
@@ -2197,7 +2197,7 @@ dependencies = [
 "itertools 0.11.0",
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
@@ -2292,9 +2292,9 @@ dependencies = [
 [[package]]
 name = "rayon"
-version = "1.9.0"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd"
+checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
 dependencies = [
 "either",
 "rayon-core",
@@ -2343,14 +2343,14 @@ dependencies = [
 [[package]]
 name = "regex"
-version = "1.10.3"
+version = "1.10.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
+checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
 dependencies = [
 "aho-corasick",
 "memchr",
 "regex-automata 0.4.6",
- "regex-syntax 0.8.2",
+ "regex-syntax 0.8.3",
 ]
 [[package]]
@@ -2370,7 +2370,7 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
 dependencies = [
 "aho-corasick",
 "memchr",
- "regex-syntax 0.8.2",
+ "regex-syntax 0.8.3",
 ]
 [[package]]
@@ -2387,9 +2387,9 @@ checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
 [[package]]
 name = "regex-syntax"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
+checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
 [[package]]
 name = "reqwest"
@@ -2482,7 +2482,7 @@ dependencies = [
 "quote",
 "rust-embed-utils",
 "shellexpand",
- "syn 2.0.53",
+ "syn 2.0.55",
 "walkdir",
 ]
@@ -2538,9 +2538,9 @@ dependencies = [
 [[package]]
 name = "rustls"
-version = "0.22.2"
+version = "0.22.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e87c9956bd9807afa1f77e0f7594af32566e830e088a5576d27c5b6f30f49d41"
+checksum = "99008d7ad0bbbea527ec27bddbc0e432c5b87d8175178cee68d2eec9c4a1813c"
 dependencies = [
 "log",
 "ring 0.17.8",
@@ -2561,9 +2561,9 @@ dependencies = [
 [[package]]
 name = "rustls-pki-types"
-version = "1.3.1"
+version = "1.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ede67b28608b4c60685c7d54122d4400d90f62b40caee7700e700380a390fa8"
+checksum = "ecd36cc4259e3e4514335c4a138c6b43171a8d61d8f5c9348f9fc7529416f247"
 [[package]]
 name = "rustls-webpki"
@@ -2671,14 +2671,14 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
 name = "serde_json"
-version = "1.0.114"
+version = "1.0.115"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0"
+checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd"
 dependencies = [
 "itoa",
 "ryu",
@@ -2861,7 +2861,7 @@ dependencies = [
 "proc-macro2",
 "quote",
 "rustversion",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
@@ -2883,9 +2883,9 @@ dependencies = [
 [[package]]
 name = "syn"
-version = "2.0.53"
+version = "2.0.55"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032"
+checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -2971,7 +2971,7 @@ dependencies = [
 [[package]]
 name = "text-generation-benchmark"
-version = "1.4.4"
+version = "1.4.5"
 dependencies = [
 "average",
 "clap",
@@ -2992,7 +2992,7 @@ dependencies = [
 [[package]]
 name = "text-generation-client"
-version = "1.4.4"
+version = "1.4.5"
 dependencies = [
 "futures",
 "grpc-metadata",
@@ -3008,7 +3008,7 @@ dependencies = [
 [[package]]
 name = "text-generation-launcher"
-version = "1.4.4"
+version = "1.4.5"
 dependencies = [
 "clap",
 "ctrlc",
@@ -3024,7 +3024,7 @@ dependencies = [
 [[package]]
 name = "text-generation-router"
-version = "1.4.4"
+version = "1.4.5"
 dependencies = [
 "async-stream",
 "axum",
@@ -3079,7 +3079,7 @@ checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
@@ -3198,7 +3198,7 @@ dependencies = [
 "rayon",
 "rayon-cond",
 "regex",
- "regex-syntax 0.8.2",
+ "regex-syntax 0.8.3",
 "serde",
 "serde_json",
 "spm_precompiled",
@@ -3210,9 +3210,9 @@ dependencies = [
 [[package]]
 name = "tokio"
-version = "1.36.0"
+version = "1.37.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931"
+checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787"
 dependencies = [
 "backtrace",
 "bytes",
@@ -3245,7 +3245,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
@@ -3360,7 +3360,7 @@ dependencies = [
 "proc-macro2",
 "prost-build",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
@@ -3433,7 +3433,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
@@ -3620,7 +3620,7 @@ dependencies = [
 "log",
 "native-tls",
 "once_cell",
- "rustls 0.22.2",
+ "rustls 0.22.3",
 "rustls-pki-types",
 "rustls-webpki",
 "serde",
@@ -3658,7 +3658,7 @@ version = "3.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d82b1bc5417102a73e8464c686eef947bdfb99fcdfc0a4f228e81afa9526470a"
 dependencies = [
- "indexmap 2.2.5",
+ "indexmap 2.2.6",
 "serde",
 "serde_json",
 "utoipa-gen",
@@ -3674,7 +3674,7 @@ dependencies = [
 "proc-macro2",
 "quote",
 "regex",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]
@@ -3779,7 +3779,7 @@ dependencies = [
 "once_cell",
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 "wasm-bindgen-shared",
 ]
@@ -3813,7 +3813,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 "wasm-bindgen-backend",
 "wasm-bindgen-shared",
 ]
@@ -4140,7 +4140,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
 dependencies = [
 "proc-macro2",
 "quote",
- "syn 2.0.53",
+ "syn 2.0.55",
 ]
 [[package]]

--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,7 +9,7 @@ members = [
 resolver = "2"
 [workspace.package]
-version = "1.4.4"
+version = "1.4.5"
 edition = "2021"
 authors = ["Olivier Dehaene"]
 homepage = "https://github.com/huggingface/text-generation-inference"

--- a/benchmark/src/app.rs
+++ b/benchmark/src/app.rs
@@ -444,7 +444,7 @@ fn progress_gauge(title: &str, label: String, progress: f64, color: Color) -> Ga
 }
 /// Throughput paragraph
-fn throughput_paragraph<'a>(throughput: &Vec<f64>, name: &'static str) -> Paragraph<'a> {
+fn throughput_paragraph<'a>(throughput: &[f64], name: &'static str) -> Paragraph<'a> {
    // Throughput average/high/low texts
    let throughput_texts = statis_spans(throughput, "tokens/secs");
@@ -457,7 +457,7 @@ fn throughput_paragraph<'a>(throughput: &Vec<f64>, name: &'static str) -> Paragr
 }
 /// Latency paragraph
-fn latency_paragraph<'a>(latency: &mut Vec<f64>, name: &'static str) -> Paragraph<'a> {
+fn latency_paragraph<'a>(latency: &mut [f64], name: &'static str) -> Paragraph<'a> {
    // Latency average/high/low texts
    let mut latency_texts = statis_spans(latency, "ms");
@@ -483,7 +483,7 @@ fn latency_paragraph<'a>(latency: &mut Vec<f64>, name: &'static str) -> Paragrap
 }
 /// Average/High/Low spans
-fn statis_spans<'a>(data: &Vec<f64>, unit: &'static str) -> Vec<Line<'a>> {
+fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec<Line<'a>> {
    vec![
        Line::from(vec![Span::styled(
            format!(
@@ -543,7 +543,7 @@ fn latency_histogram<'a>(
 /// Latency/Throughput chart
 fn latency_throughput_chart<'a>(
-    latency_throughput: &'a Vec<(f64, f64)>,
+    latency_throughput: &'a [(f64, f64)],
    batch_sizes: &'a [u32],
    zoom: bool,
    name: &'static str,

--- a/benchmark/src/table.rs
+++ b/benchmark/src/table.rs
@@ -151,7 +151,7 @@ fn add_throuhgputs(
    }
 }
-fn avg_min_max(data: &Vec<f64>) -> (f64, f64, f64) {
+fn avg_min_max(data: &[f64]) -> (f64, f64, f64) {
    let average = data.iter().sum::<f64>() / data.len() as f64;
    let min = data
        .iter()
@@ -164,7 +164,7 @@ fn avg_min_max(data: &Vec<f64>) -> (f64, f64, f64) {
    (average, *min, *max)
 }
-fn px(data: &Vec<f64>, p: u32) -> f64 {
+fn px(data: &[f64], p: u32) -> f64 {
    let i = (f64::from(p) / 100.0 * data.len() as f64) as usize;
    *data.get(i).unwrap_or(&std::f64::NAN)
 }

--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -10,7 +10,7 @@
      "name": "Apache 2.0",
      "url": "https://www.apache.org/licenses/LICENSE-2.0"
    },
-    "version": "1.4.4"
+    "version": "1.4.5"
  },
  "paths": {
    "/": {
@@ -471,6 +471,90 @@
          }
        }
      }
+    },
+    "/v1/completions": {
+      "post": {
+        "tags": [
+          "Text Generation Inference"
+        ],
+        "summary": "Generate tokens",
+        "description": "Generate tokens",
+        "operationId": "completions",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/CompletionRequest"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Generated Text",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ChatCompletionChunk"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Input validation error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Input validation error"
+                }
+              }
+            }
+          },
+          "424": {
+            "description": "Generation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Request failed during generation"
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Model is overloaded",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Model is overloaded"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Incomplete generation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Incomplete generation"
+                }
+              }
+            }
+          }
+        }
+      }
    }
  },
  "components": {
@@ -669,17 +753,25 @@
      "ChatCompletionDelta": {
        "type": "object",
        "required": [
-          "role",
+          "role"
-          "content"
        ],
        "properties": {
          "content": {
            "type": "string",
-            "example": "What is Deep Learning?"
+            "example": "What is Deep Learning?",
+            "nullable": true
          },
          "role": {
            "type": "string",
            "example": "user"
+          },
+          "tool_calls": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/DeltaToolCall"
+              }
+            ],
+            "nullable": true
          }
        }
      },
@@ -739,7 +831,8 @@
      "ChatRequest": {
        "type": "object",
        "required": [
-          "model"
+          "model",
+          "messages"
        ],
        "properties": {
          "frequency_penalty": {
@@ -777,11 +870,12 @@
            "items": {
              "$ref": "#/components/schemas/Message"
            },
-            "description": "A list of messages comprising the conversation so far."
+            "description": "A list of messages comprising the conversation so far.",
+            "example": "[{\"role\": \"user\", \"content\": \"What is Deep Learning?\"}]"
          },
          "model": {
            "type": "string",
-            "description": "UNUSED\nID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.",
+            "description": "[UNUSED] ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.",
            "example": "mistralai/Mistral-7B-Instruct-v0.2"
          },
          "n": {
@@ -806,6 +900,15 @@
            "nullable": true,
            "minimum": 0
          },
+          "stop": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Up to 4 sequences where the API will stop generating further tokens.",
+            "example": "null",
+            "nullable": true
+          },
          "stream": {
            "type": "boolean"
          },
@@ -816,6 +919,29 @@
            "example": 1.0,
            "nullable": true
          },
+          "tool_choice": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ToolType"
+              }
+            ],
+            "nullable": true
+          },
+          "tool_prompt": {
+            "type": "string",
+            "description": "A prompt to be appended before the tools",
+            "example": "\"Based on the conversation, please choose the most appropriate tool to use: \"",
+            "nullable": true
+          },
+          "tools": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Tool"
+            },
+            "description": "A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of\nfunctions the model may generate JSON inputs for.",
+            "example": "null",
+            "nullable": true
+          },
          "top_logprobs": {
            "type": "integer",
            "format": "int32",
@@ -852,6 +978,164 @@
          }
        }
      },
+      "CompletionComplete": {
+        "type": "object",
+        "required": [
+          "index",
+          "text",
+          "finish_reason"
+        ],
+        "properties": {
+          "finish_reason": {
+            "type": "string"
+          },
+          "index": {
+            "type": "integer",
+            "format": "int32",
+            "minimum": 0
+          },
+          "logprobs": {
+            "type": "array",
+            "items": {
+              "type": "number",
+              "format": "float"
+            },
+            "nullable": true
+          },
+          "text": {
+            "type": "string"
+          }
+        }
+      },
+      "CompletionCompleteChunk": {
+        "type": "object",
+        "required": [
+          "id",
+          "object",
+          "created",
+          "choices",
+          "model",
+          "system_fingerprint"
+        ],
+        "properties": {
+          "choices": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/CompletionComplete"
+            }
+          },
+          "created": {
+            "type": "integer",
+            "format": "int64",
+            "minimum": 0
+          },
+          "id": {
+            "type": "string"
+          },
+          "model": {
+            "type": "string"
+          },
+          "object": {
+            "type": "string"
+          },
+          "system_fingerprint": {
+            "type": "string"
+          }
+        }
+      },
+      "CompletionRequest": {
+        "type": "object",
+        "required": [
+          "model",
+          "prompt"
+        ],
+        "properties": {
+          "frequency_penalty": {
+            "type": "number",
+            "format": "float",
+            "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.",
+            "example": "1.0",
+            "nullable": true
+          },
+          "max_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "description": "The maximum number of tokens that can be generated in the chat completion.",
+            "default": "32",
+            "nullable": true,
+            "minimum": 0
+          },
+          "model": {
+            "type": "string",
+            "description": "UNUSED\nID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.",
+            "example": "mistralai/Mistral-7B-Instruct-v0.2"
+          },
+          "prompt": {
+            "type": "string",
+            "description": "The prompt to generate completions for.",
+            "example": "What is Deep Learning?"
+          },
+          "repetition_penalty": {
+            "type": "number",
+            "format": "float",
+            "nullable": true
+          },
+          "seed": {
+            "type": "integer",
+            "format": "int64",
+            "example": 42,
+            "nullable": true,
+            "minimum": 0
+          },
+          "stream": {
+            "type": "boolean"
+          },
+          "suffix": {
+            "type": "string",
+            "description": "The text to append to the prompt. This is useful for completing sentences or generating a paragraph of text.\nplease see the completion_template field in the model's tokenizer_config.json file for completion template.",
+            "nullable": true
+          },
+          "temperature": {
+            "type": "number",
+            "format": "float",
+            "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while\nlower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.",
+            "example": 1.0,
+            "nullable": true
+          },
+          "top_p": {
+            "type": "number",
+            "format": "float",
+            "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the\ntokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.",
+            "example": 0.95,
+            "nullable": true
+          }
+        }
+      },
+      "DeltaToolCall": {
+        "type": "object",
+        "required": [
+          "index",
+          "id",
+          "type",
+          "function"
+        ],
+        "properties": {
+          "function": {
+            "$ref": "#/components/schemas/Function"
+          },
+          "id": {
+            "type": "string"
+          },
+          "index": {
+            "type": "integer",
+            "format": "int32",
+            "minimum": 0
+          },
+          "type": {
+            "type": "string"
+          }
+        }
+      },
      "Details": {
        "type": "object",
        "required": [
@@ -931,6 +1215,38 @@
        ],
        "example": "Length"
      },
+      "Function": {
+        "type": "object",
+        "required": [
+          "arguments"
+        ],
+        "properties": {
+          "arguments": {
+            "type": "string"
+          },
+          "name": {
+            "type": "string",
+            "nullable": true
+          }
+        }
+      },
+      "FunctionDefinition": {
+        "type": "object",
+        "required": [
+          "name",
+          "parameters"
+        ],
+        "properties": {
+          "description": {
+            "type": "string",
+            "nullable": true
+          },
+          "name": {
+            "type": "string"
+          },
+          "parameters": {}
+        }
+      },
      "GenerateParameters": {
        "type": "object",
        "properties": {
@@ -1261,13 +1577,13 @@
      "Message": {
        "type": "object",
        "required": [
-          "role",
+          "role"
-          "content"
        ],
        "properties": {
          "content": {
            "type": "string",
-            "example": "My name is David and I"
+            "example": "My name is David and I",
+            "nullable": true
          },
          "name": {
            "type": "string",
@@ -1277,6 +1593,13 @@
          "role": {
            "type": "string",
            "example": "user"
+          },
+          "tool_calls": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ToolCall"
+            },
+            "nullable": true
          }
        }
      },
@@ -1437,6 +1760,64 @@
          "$ref": "#/components/schemas/SimpleToken"
        }
      },
+      "Tool": {
+        "type": "object",
+        "required": [
+          "type",
+          "function"
+        ],
+        "properties": {
+          "function": {
+            "$ref": "#/components/schemas/FunctionDefinition"
+          },
+          "type": {
+            "type": "string",
+            "example": "function"
+          }
+        }
+      },
+      "ToolCall": {
+        "type": "object",
+        "required": [
+          "id",
+          "type",
+          "function"
+        ],
+        "properties": {
+          "function": {
+            "$ref": "#/components/schemas/FunctionDefinition"
+          },
+          "id": {
+            "type": "integer",
+            "format": "int32",
+            "minimum": 0
+          },
+          "type": {
+            "type": "string"
+          }
+        }
+      },
+      "ToolType": {
+        "oneOf": [
+          {
+            "type": "object",
+            "required": [
+              "FunctionName"
+            ],
+            "properties": {
+              "FunctionName": {
+                "type": "string"
+              }
+            }
+          },
+          {
+            "type": "string",
+            "enum": [
+              "OneOf"
+            ]
+          }
+        ]
+      },
      "Usage": {
        "type": "object",
        "required": [

--- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_no_tools.json
+++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_no_tools.json
@@ -17,7 +17,7 @@
  "id": "",
  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
  "object": "text_completion",
-  "system_fingerprint": "1.4.4-native",
+  "system_fingerprint": "1.4.5-native",
  "usage": {
    "completion_tokens": 100,
    "prompt_tokens": 60,

--- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json
+++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json
@@ -31,7 +31,7 @@
  "id": "",
  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
  "object": "text_completion",
-  "system_fingerprint": "1.4.4-native",
+  "system_fingerprint": "1.4.5-native",
  "usage": {
    "completion_tokens": 29,
    "prompt_tokens": 316,

--- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json
+++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json
@@ -31,7 +31,7 @@
  "id": "",
  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
  "object": "text_completion",
-  "system_fingerprint": "1.4.4-native",
+  "system_fingerprint": "1.4.5-native",
  "usage": {
    "completion_tokens": 29,
    "prompt_tokens": 316,

--- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json
+++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json
@@ -30,7 +30,7 @@
  "id": "",
  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
  "object": "text_completion",
-  "system_fingerprint": "1.4.4-native",
+  "system_fingerprint": "1.4.5-native",
  "usage": {
    "completion_tokens": 21,
    "prompt_tokens": 187,

--- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json
+++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json
@@ -23,5 +23,5 @@
  "id": "",
  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
  "object": "text_completion",
-  "system_fingerprint": "1.4.4-native"
+  "system_fingerprint": "1.4.5-native"
 }
--- a/integration-tests/pyproject.toml
+++ b/integration-tests/pyproject.toml
 [tool.poetry]
 name = "text-generation-integration-tests"
-version = "1.4.4"
+version = "1.4.5"
 description = "Text Generation Inference integration tests"
 authors = ["Nicolas Patry <nicolas@huggingface.co>"]

--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -604,7 +604,7 @@ fn shard_manager(
    // We read stderr in another thread as it seems that lines() can block in some cases
    let (err_sender, err_receiver) = mpsc::channel();
    thread::spawn(move || {
-        for line in shard_stderr_reader.lines().flatten() {
+        for line in shard_stderr_reader.lines().map_while(Result::ok) {
            err_sender.send(line).unwrap_or(());
        }
    });
@@ -722,7 +722,7 @@ impl TryFrom<&String> for PythonLogMessage {
 }
 fn log_lines<S: Sized + BufRead>(lines: Lines<S>) {
-    for line in lines.flatten() {
+    for line in lines.map_while(Result::ok) {
        match PythonLogMessage::try_from(&line) {
            Ok(log) => log.trace(),
            Err(_) => tracing::debug!("{line}"),
@@ -874,7 +874,7 @@ fn download_convert_model(args: &Args, running: Arc<AtomicBool>) -> Result<(), L
    // We read stderr in another thread as it seems that lines() can block in some cases
    let (err_sender, err_receiver) = mpsc::channel();
    thread::spawn(move || {
-        for line in download_stderr.lines().flatten() {
+        for line in download_stderr.lines().map_while(Result::ok) {
            err_sender.send(line).unwrap_or(());
        }
    });

--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -12,7 +12,7 @@ use crate::{
    ChatCompletion, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionComplete,
    ChatCompletionDelta, ChatCompletionLogprob, ChatCompletionLogprobs, ChatCompletionTopLogprob,
    ChatRequest, CompatGenerateRequest, Completion, CompletionComplete, CompletionCompleteChunk,
-    CompletionRequest, VertexRequest, VertexResponse,
+    CompletionRequest, DeltaToolCall, Function, Tool, VertexRequest, VertexResponse,
 };
 use crate::{FunctionDefinition, FunctionRef, FunctionsMap, Properties, ToolCall, ToolType, Tools};
 use axum::extract::Extension;
@@ -1211,6 +1211,12 @@ pub async fn run(
    ErrorResponse,
    GrammarType,
    Usage,
+    DeltaToolCall,
+    ToolType,
+    Tool,
+    ToolCall,
+    Function,
+    FunctionDefinition,
    )
    ),
    tags(

--- a/server/pyproject.toml
+++ b/server/pyproject.toml
 [tool.poetry]
 name = "text-generation-server"
-version = "1.4.4"
+version = "1.4.5"
 description = "Text Generation Inference Python gRPC Server"
 authors = ["Olivier Dehaene <olivier@huggingface.co>"]