Unverified Commit 3036e60b authored by nachiketb-nvidia's avatar nachiketb-nvidia Committed by GitHub
Browse files

feat: add gpt oss reasoning parser through harmony (#2656)

- couple of refactors
- added a new dependency, openai-harmony
- implemented the gpt oss parser
parent b39382ba
......@@ -172,6 +172,17 @@ version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
[[package]]
name = "arg_enum_proc_macro"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.104",
]
[[package]]
name = "arrayref"
version = "0.3.9"
......@@ -341,6 +352,29 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "av1-grain"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f3efb2ca85bc610acfa917b5aaa36f3fcbebed5b3182d7f877b02531c4b80c8"
dependencies = [
"anyhow",
"arrayvec",
"log",
"nom 7.1.3",
"num-rational",
"v_frame",
]
[[package]]
name = "avif-serialize"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47c8fbc0f831f4519fe8b810b6a7a91410ec83031b8233f730a0480029f6a23f"
dependencies = [
"arrayvec",
]
[[package]]
name = "aws-lc-rs"
version = "1.13.3"
......@@ -693,6 +727,12 @@ version = "2.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
[[package]]
name = "bitstream-io"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6099cdc01846bc367c4e7dd630dc5966dccf36b652fae7a74e17b640411a91b2"
[[package]]
name = "blake3"
version = "1.8.2"
......@@ -753,9 +793,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [
"memchr",
"regex-automata 0.4.9",
"serde",
]
[[package]]
name = "built"
version = "0.7.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56ed6191a7e78c36abdb16ab65341eefd73d64d303fffccdbb00d51e4205967b"
[[package]]
name = "bumpalo"
version = "3.19.0"
......@@ -1991,6 +2038,8 @@ version = "0.4.1"
dependencies = [
"anyhow",
"dynamo-async-openai",
"lazy_static",
"openai-harmony",
"regex",
"serde",
"serde_json",
......@@ -2353,6 +2402,17 @@ dependencies = [
"regex",
]
[[package]]
name = "fancy-regex"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
dependencies = [
"bit-set 0.5.3",
"regex-automata 0.4.9",
"regex-syntax 0.8.5",
]
[[package]]
name = "fancy-regex"
version = "0.14.0"
......@@ -3130,6 +3190,12 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
[[package]]
name = "hex"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "hf-hub"
version = "0.4.3"
......@@ -3548,6 +3614,9 @@ dependencies = [
"num-traits",
"png",
"qoi",
"ravif",
"rayon",
"rgb",
"tiff",
"zune-core",
"zune-jpeg",
......@@ -3563,6 +3632,12 @@ dependencies = [
"quick-error 2.0.1",
]
[[package]]
name = "imgref"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0263a3d970d5c054ed9312c0057b4f3bde9c0b33836d3637361d4a9e6e7a408"
[[package]]
name = "indexmap"
version = "1.9.3"
......@@ -3571,6 +3646,7 @@ checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg",
"hashbrown 0.12.3",
"serde",
]
[[package]]
......@@ -3630,6 +3706,17 @@ dependencies = [
"cfg-if 1.0.1",
]
[[package]]
name = "interpolate_name"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.104",
]
[[package]]
name = "interprocess"
version = "2.2.3"
......@@ -3703,6 +3790,15 @@ dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.14.0"
......@@ -3863,6 +3959,16 @@ dependencies = [
"uuid 1.17.0",
]
[[package]]
name = "libfuzzer-sys"
version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5037190e1f70cbeef565bd267599242926f724d3b8a9f510fd7e0b540cfa4404"
dependencies = [
"arbitrary",
"cc",
]
[[package]]
name = "libloading"
version = "0.8.8"
......@@ -3976,6 +4082,15 @@ version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "loop9"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fae87c125b03c1d2c0150c90365d7d6bcc53fb73a9acaef207d2d065860f062"
dependencies = [
"imgref",
]
[[package]]
name = "lrtable"
version = "0.13.10"
......@@ -4093,6 +4208,16 @@ dependencies = [
"rawpointer",
]
[[package]]
name = "maybe-rayon"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519"
dependencies = [
"cfg-if 1.0.1",
"rayon",
]
[[package]]
name = "memchr"
version = "2.7.5"
......@@ -4383,7 +4508,7 @@ dependencies = [
"rustc-hash 2.1.1",
"rustfft",
"safetensors 0.6.1",
"schemars",
"schemars 0.8.22",
"scraper",
"serde",
"serde-big-array",
......@@ -4702,6 +4827,12 @@ version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "610a5acd306ec67f907abe5567859a3c693fb9886eb1f012ab8f2a47bef3db51"
[[package]]
name = "noop_proc_macro"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8"
[[package]]
name = "ntapi"
version = "0.4.1"
......@@ -4776,6 +4907,17 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num-derive"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.104",
]
[[package]]
name = "num-integer"
version = "0.1.46"
......@@ -4948,6 +5090,30 @@ version = "11.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
[[package]]
name = "openai-harmony"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6b7fd6b7d01a317c58d85a22b7cc314e14a2fef0dfdb93b819738d09caece16"
dependencies = [
"anyhow",
"base64 0.22.1",
"bstr",
"clap 4.5.42",
"fancy-regex 0.13.0",
"futures",
"image",
"regex",
"reqwest 0.12.22",
"rustc-hash 1.1.0",
"serde",
"serde_json",
"serde_with",
"sha1",
"sha2",
"thiserror 2.0.12",
]
[[package]]
name = "openssl"
version = "0.10.73"
......@@ -5437,6 +5603,25 @@ dependencies = [
"yansi",
]
[[package]]
name = "profiling"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3eb8486b569e12e2c32ad3e204dbaba5e4b5b216e9367044f25f1dba42341773"
dependencies = [
"profiling-procmacros",
]
[[package]]
name = "profiling-procmacros"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52717f9a02b6965224f95ca2a81e2e0c5c43baacd28ca057577988930b6c3d5b"
dependencies = [
"quote",
"syn 2.0.104",
]
[[package]]
name = "prometheus"
version = "0.14.0"
......@@ -5777,6 +5962,56 @@ dependencies = [
"rand_core 0.9.3",
]
[[package]]
name = "rav1e"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd87ce80a7665b1cce111f8a16c1f3929f6547ce91ade6addf4ec86a8dda5ce9"
dependencies = [
"arbitrary",
"arg_enum_proc_macro",
"arrayvec",
"av1-grain",
"bitstream-io",
"built",
"cfg-if 1.0.1",
"interpolate_name",
"itertools 0.12.1",
"libc",
"libfuzzer-sys",
"log",
"maybe-rayon",
"new_debug_unreachable",
"noop_proc_macro",
"num-derive",
"num-traits",
"once_cell",
"paste",
"profiling",
"rand 0.8.5",
"rand_chacha 0.3.1",
"simd_helpers",
"system-deps",
"thiserror 1.0.69",
"v_frame",
"wasm-bindgen",
]
[[package]]
name = "ravif"
version = "0.11.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5825c26fddd16ab9f515930d49028a630efec172e903483c94796cfe31893e6b"
dependencies = [
"avif-serialize",
"imgref",
"loop9",
"quick-error 2.0.1",
"rav1e",
"rayon",
"rgb",
]
[[package]]
name = "raw-cpuid"
version = "10.7.0"
......@@ -5873,6 +6108,26 @@ dependencies = [
"thiserror 2.0.12",
]
[[package]]
name = "ref-cast"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a0ae411dbe946a674d89546582cea4ba2bb8defac896622d6496f14c23ba5cf"
dependencies = [
"ref-cast-impl",
]
[[package]]
name = "ref-cast-impl"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.104",
]
[[package]]
name = "regex"
version = "1.11.1"
......@@ -6025,6 +6280,12 @@ dependencies = [
"thiserror 1.0.69",
]
[[package]]
name = "rgb"
version = "0.8.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c6a884d2998352bb4daf0183589aec883f16a6da1f4dde84d8e2e9a5409a1ce"
[[package]]
name = "ring"
version = "0.17.14"
......@@ -6418,6 +6679,30 @@ dependencies = [
"serde_json",
]
[[package]]
name = "schemars"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f"
dependencies = [
"dyn-clone",
"ref-cast",
"serde",
"serde_json",
]
[[package]]
name = "schemars"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0"
dependencies = [
"dyn-clone",
"ref-cast",
"serde",
"serde_json",
]
[[package]]
name = "schemars_derive"
version = "0.8.22"
......@@ -6663,6 +6948,38 @@ dependencies = [
"serde",
]
[[package]]
name = "serde_with"
version = "3.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5"
dependencies = [
"base64 0.22.1",
"chrono",
"hex",
"indexmap 1.9.3",
"indexmap 2.10.0",
"schemars 0.9.0",
"schemars 1.0.4",
"serde",
"serde_derive",
"serde_json",
"serde_with_macros",
"time",
]
[[package]]
name = "serde_with_macros"
version = "3.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f"
dependencies = [
"darling 0.20.11",
"proc-macro2",
"quote",
"syn 2.0.104",
]
[[package]]
name = "serde_yaml"
version = "0.9.34+deprecated"
......@@ -6824,6 +7141,15 @@ version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
[[package]]
name = "simd_helpers"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95890f873bec569a0362c235787f3aca6e1e887302ba4840839bcc6459c42da6"
dependencies = [
"quote",
]
[[package]]
name = "similar"
version = "2.7.0"
......@@ -8313,6 +8639,17 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "v_frame"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "666b7727c8875d6ab5db9533418d7c764233ac9c0cff1d469aec8fa127597be2"
dependencies = [
"aligned-vec",
"num-traits",
"wasm-bindgen",
]
[[package]]
name = "validator"
version = "0.20.0"
......
......@@ -32,7 +32,8 @@ allow = [
"BSL-1.0",
"MPL-2.0",
"CDLA-Permissive-2.0",
"Zlib"
"Zlib",
"NCSA"
]
# TODO exceptions
......
......@@ -46,6 +46,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1763692fc1416554cf051efc56a3de5595eca47299d731cc5c2b583adf8b4d2f"
[[package]]
name = "aligned-vec"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc890384c8602f339876ded803c97ad529f3842aba97f6392b3dba0dd171769b"
dependencies = [
"equator",
]
[[package]]
name = "android-tzdata"
version = "0.1.1"
......@@ -132,6 +141,17 @@ version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
[[package]]
name = "arg_enum_proc_macro"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
]
[[package]]
name = "arrayref"
version = "0.3.9"
......@@ -290,6 +310,29 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "av1-grain"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f3efb2ca85bc610acfa917b5aaa36f3fcbebed5b3182d7f877b02531c4b80c8"
dependencies = [
"anyhow",
"arrayvec",
"log",
"nom",
"num-rational",
"v_frame",
]
[[package]]
name = "avif-serialize"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47c8fbc0f831f4519fe8b810b6a7a91410ec83031b8233f730a0480029f6a23f"
dependencies = [
"arrayvec",
]
[[package]]
name = "aws-lc-rs"
version = "1.13.3"
......@@ -500,21 +543,42 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "bit-set"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
dependencies = [
"bit-vec 0.6.3",
]
[[package]]
name = "bit-set"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
dependencies = [
"bit-vec",
"bit-vec 0.8.0",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bit-vec"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
[[package]]
name = "bit_field"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc827186963e592360843fb5ba4b973e145841266c1357f7180c43526f2e5b61"
[[package]]
name = "bitflags"
version = "1.3.2"
......@@ -527,6 +591,12 @@ version = "2.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a65b545ab31d687cff52899d4890855fec459eb6afe0da6417b8a18da87aa29"
[[package]]
name = "bitstream-io"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6099cdc01846bc367c4e7dd630dc5966dccf36b652fae7a74e17b640411a91b2"
[[package]]
name = "blake3"
version = "1.8.2"
......@@ -560,6 +630,23 @@ dependencies = [
"num-traits",
]
[[package]]
name = "bstr"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [
"memchr",
"regex-automata 0.4.9",
"serde",
]
[[package]]
name = "built"
version = "0.7.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56ed6191a7e78c36abdb16ab65341eefd73d64d303fffccdbb00d51e4205967b"
[[package]]
name = "bumpalo"
version = "3.19.0"
......@@ -592,6 +679,12 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "byteorder-lite"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495"
[[package]]
name = "bytes"
version = "1.10.1"
......@@ -711,6 +804,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fc0e74a703892159f5ae7d3aac52c8e6c392f5ae5f359c70b5881d60aaac318"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
......@@ -725,6 +819,18 @@ dependencies = [
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14cb31bb0a7d536caef2639baa7fad459e15c3144efefa6dbd1c84562c4739f6"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.106",
]
[[package]]
name = "clap_lex"
version = "0.7.5"
......@@ -740,6 +846,12 @@ dependencies = [
"cc",
]
[[package]]
name = "color_quant"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
[[package]]
name = "colorchoice"
version = "1.0.4"
......@@ -1188,6 +1300,12 @@ version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
[[package]]
name = "dyn-clone"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
[[package]]
name = "dyn-stack"
version = "0.10.0"
......@@ -1310,9 +1428,12 @@ version = "0.4.1"
dependencies = [
"anyhow",
"dynamo-async-openai",
"lazy_static",
"openai-harmony",
"regex",
"serde",
"serde_json",
"tokio",
"tracing",
"uuid",
]
......@@ -1488,6 +1609,26 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "equator"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc"
dependencies = [
"equator-macro",
]
[[package]]
name = "equator-macro"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
]
[[package]]
name = "equivalent"
version = "1.0.2"
......@@ -1571,13 +1712,39 @@ dependencies = [
"pin-project-lite",
]
[[package]]
name = "exr"
version = "1.73.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f83197f59927b46c04a183a619b7c29df34e63e63c7869320862268c0ef687e0"
dependencies = [
"bit_field",
"half",
"lebe",
"miniz_oxide",
"rayon-core",
"smallvec",
"zune-inflate",
]
[[package]]
name = "fancy-regex"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
dependencies = [
"bit-set 0.5.3",
"regex-automata 0.4.9",
"regex-syntax 0.8.5",
]
[[package]]
name = "fancy-regex"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
dependencies = [
"bit-set",
"bit-set 0.8.0",
"regex-automata 0.4.9",
"regex-syntax 0.8.5",
]
......@@ -1588,6 +1755,15 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "fdeflate"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c"
dependencies = [
"simd-adler32",
]
[[package]]
name = "fiat-crypto"
version = "0.2.9"
......@@ -2070,13 +2246,23 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ac5654356c6f7f6116905aeaf92ab002c3d03414ada5dbe0bb2e32aa5fea173"
dependencies = [
"fancy-regex",
"fancy-regex 0.14.0",
"ggml-quants",
"indexmap",
"indexmap 2.10.0",
"log",
"num_enum",
]
[[package]]
name = "gif"
version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ae047235e33e2829703574b54fdec96bfbad892062d97fed2f76022287de61b"
dependencies = [
"color_quant",
"weezl",
]
[[package]]
name = "gimli"
version = "0.31.1"
......@@ -2101,7 +2287,7 @@ dependencies = [
"futures-core",
"futures-sink",
"http",
"indexmap",
"indexmap 2.10.0",
"slab",
"tokio",
"tokio-util",
......@@ -2122,6 +2308,12 @@ dependencies = [
"rand_distr",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "hashbrown"
version = "0.14.5"
......@@ -2146,6 +2338,12 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
[[package]]
name = "hex"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "hf-hub"
version = "0.4.3"
......@@ -2447,6 +2645,56 @@ dependencies = [
"icu_properties",
]
[[package]]
name = "image"
version = "0.25.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db35664ce6b9810857a38a906215e75a9c879f0696556a39f59c62829710251a"
dependencies = [
"bytemuck",
"byteorder-lite",
"color_quant",
"exr",
"gif",
"image-webp",
"num-traits",
"png",
"qoi",
"ravif",
"rayon",
"rgb",
"tiff",
"zune-core",
"zune-jpeg",
]
[[package]]
name = "image-webp"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6970fe7a5300b4b42e62c52efa0187540a5bef546c60edaf554ef595d2e6f0b"
dependencies = [
"byteorder-lite",
"quick-error",
]
[[package]]
name = "imgref"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0263a3d970d5c054ed9312c0057b4f3bde9c0b33836d3637361d4a9e6e7a408"
[[package]]
name = "indexmap"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg",
"hashbrown 0.12.3",
"serde",
]
[[package]]
name = "indexmap"
version = "2.10.0"
......@@ -2455,6 +2703,7 @@ checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
dependencies = [
"equivalent",
"hashbrown 0.15.5",
"serde",
]
[[package]]
......@@ -2491,6 +2740,17 @@ dependencies = [
"cfg-if 1.0.1",
]
[[package]]
name = "interpolate_name"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
]
[[package]]
name = "inventory"
version = "0.3.20"
......@@ -2585,6 +2845,12 @@ dependencies = [
"libc",
]
[[package]]
name = "jpeg-decoder"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00810f1d8b74be64b13dbf3db89ac67740615d6c891f0e7b6179326533011a07"
[[package]]
name = "js-sys"
version = "0.3.77"
......@@ -2627,12 +2893,28 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "lebe"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8"
[[package]]
name = "libc"
version = "0.2.175"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
[[package]]
name = "libfuzzer-sys"
version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5037190e1f70cbeef565bd267599242926f724d3b8a9f510fd7e0b540cfa4404"
dependencies = [
"arbitrary",
"cc",
]
[[package]]
name = "libloading"
version = "0.8.8"
......@@ -2705,6 +2987,15 @@ version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "loop9"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fae87c125b03c1d2c0150c90365d7d6bcc53fb73a9acaef207d2d065860f062"
dependencies = [
"imgref",
]
[[package]]
name = "lru-slab"
version = "0.1.2"
......@@ -2752,6 +3043,16 @@ dependencies = [
"rawpointer",
]
[[package]]
name = "maybe-rayon"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519"
dependencies = [
"cfg-if 1.0.1",
"rayon",
]
[[package]]
name = "memchr"
version = "2.7.5"
......@@ -2842,6 +3143,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"adler2",
"simd-adler32",
]
[[package]]
......@@ -2964,6 +3266,12 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
[[package]]
name = "nid"
version = "3.0.0"
......@@ -3047,6 +3355,12 @@ version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "610a5acd306ec67f907abe5567859a3c693fb9886eb1f012ab8f2a47bef3db51"
[[package]]
name = "noop_proc_macro"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8"
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
......@@ -3106,6 +3420,17 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num-derive"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
]
[[package]]
name = "num-integer"
version = "0.1.46"
......@@ -3253,6 +3578,30 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "openai-harmony"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6b7fd6b7d01a317c58d85a22b7cc314e14a2fef0dfdb93b819738d09caece16"
dependencies = [
"anyhow",
"base64 0.22.1",
"bstr",
"clap",
"fancy-regex 0.13.0",
"futures",
"image",
"regex",
"reqwest",
"rustc-hash 1.1.0",
"serde",
"serde_json",
"serde_with",
"sha1",
"sha2",
"thiserror 2.0.15",
]
[[package]]
name = "openssl-probe"
version = "0.1.6"
......@@ -3363,7 +3712,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
dependencies = [
"fixedbitset",
"indexmap",
"indexmap 2.10.0",
]
[[package]]
......@@ -3421,12 +3770,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1"
dependencies = [
"base64 0.22.1",
"indexmap",
"indexmap 2.10.0",
"quick-xml",
"serde",
"time",
]
[[package]]
name = "png"
version = "0.17.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82151a2fc869e011c153adc57cf2789ccb8d9906ce52c0b39a6b5697749d7526"
dependencies = [
"bitflags 1.3.2",
"crc32fast",
"fdeflate",
"flate2",
"miniz_oxide",
]
[[package]]
name = "portable-atomic"
version = "1.11.1"
......@@ -3529,6 +3891,25 @@ dependencies = [
"yansi",
]
[[package]]
name = "profiling"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3eb8486b569e12e2c32ad3e204dbaba5e4b5b216e9367044f25f1dba42341773"
dependencies = [
"profiling-procmacros",
]
[[package]]
name = "profiling-procmacros"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52717f9a02b6965224f95ca2a81e2e0c5c43baacd28ca057577988930b6c3d5b"
dependencies = [
"quote",
"syn 2.0.106",
]
[[package]]
name = "prometheus"
version = "0.14.0"
......@@ -3743,6 +4124,21 @@ dependencies = [
"serde",
]
[[package]]
name = "qoi"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001"
dependencies = [
"bytemuck",
]
[[package]]
name = "quick-error"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
[[package]]
name = "quick-xml"
version = "0.38.1"
......@@ -3891,6 +4287,56 @@ dependencies = [
"rand 0.9.2",
]
[[package]]
name = "rav1e"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd87ce80a7665b1cce111f8a16c1f3929f6547ce91ade6addf4ec86a8dda5ce9"
dependencies = [
"arbitrary",
"arg_enum_proc_macro",
"arrayvec",
"av1-grain",
"bitstream-io",
"built",
"cfg-if 1.0.1",
"interpolate_name",
"itertools 0.12.1",
"libc",
"libfuzzer-sys",
"log",
"maybe-rayon",
"new_debug_unreachable",
"noop_proc_macro",
"num-derive",
"num-traits",
"once_cell",
"paste",
"profiling",
"rand 0.8.5",
"rand_chacha 0.3.1",
"simd_helpers",
"system-deps",
"thiserror 1.0.69",
"v_frame",
"wasm-bindgen",
]
[[package]]
name = "ravif"
version = "0.11.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5825c26fddd16ab9f515930d49028a630efec172e903483c94796cfe31893e6b"
dependencies = [
"avif-serialize",
"imgref",
"loop9",
"quick-error",
"rav1e",
"rayon",
"rgb",
]
[[package]]
name = "raw-cpuid"
version = "10.7.0"
......@@ -3972,6 +4418,26 @@ dependencies = [
"thiserror 2.0.15",
]
[[package]]
name = "ref-cast"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a0ae411dbe946a674d89546582cea4ba2bb8defac896622d6496f14c23ba5cf"
dependencies = [
"ref-cast-impl",
]
[[package]]
name = "ref-cast-impl"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
]
[[package]]
name = "regex"
version = "1.11.1"
......@@ -4031,6 +4497,7 @@ dependencies = [
"base64 0.22.1",
"bytes",
"encoding_rs",
"futures-channel",
"futures-core",
"futures-util",
"h2",
......@@ -4084,6 +4551,12 @@ dependencies = [
"thiserror 1.0.69",
]
[[package]]
name = "rgb"
version = "0.8.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c6a884d2998352bb4daf0183589aec883f16a6da1f4dde84d8e2e9a5409a1ce"
[[package]]
name = "ring"
version = "0.17.14"
......@@ -4325,6 +4798,30 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "schemars"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f"
dependencies = [
"dyn-clone",
"ref-cast",
"serde",
"serde_json",
]
[[package]]
name = "schemars"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0"
dependencies = [
"dyn-clone",
"ref-cast",
"serde",
"serde_json",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
......@@ -4421,6 +4918,7 @@ version = "1.0.143"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
dependencies = [
"indexmap 2.10.0",
"itoa",
"memchr",
"ryu",
......@@ -4478,6 +4976,49 @@ dependencies = [
"serde",
]
[[package]]
name = "serde_with"
version = "3.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5"
dependencies = [
"base64 0.22.1",
"chrono",
"hex",
"indexmap 1.9.3",
"indexmap 2.10.0",
"schemars 0.9.0",
"schemars 1.0.4",
"serde",
"serde_derive",
"serde_json",
"serde_with_macros",
"time",
]
[[package]]
name = "serde_with_macros"
version = "3.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f"
dependencies = [
"darling",
"proc-macro2",
"quote",
"syn 2.0.106",
]
[[package]]
name = "sha1"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
dependencies = [
"cfg-if 1.0.1",
"cpufeatures",
"digest",
]
[[package]]
name = "sha2"
version = "0.10.9"
......@@ -4541,6 +5082,21 @@ dependencies = [
"rand_core 0.6.4",
]
[[package]]
name = "simd-adler32"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
[[package]]
name = "simd_helpers"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95890f873bec569a0362c235787f3aca6e1e887302ba4840839bcc6459c42da6"
dependencies = [
"quote",
]
[[package]]
name = "slab"
version = "0.4.11"
......@@ -4823,6 +5379,17 @@ dependencies = [
"cfg-if 1.0.1",
]
[[package]]
name = "tiff"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba1310fcea54c6a9a4fd1aad794ecc02c31682f6bfbecdf460bf19533eed1e3e"
dependencies = [
"flate2",
"jpeg-decoder",
"weezl",
]
[[package]]
name = "time"
version = "0.3.41"
......@@ -4906,7 +5473,7 @@ dependencies = [
"dary_heap",
"derive_builder",
"esaxx-rs",
"fancy-regex",
"fancy-regex 0.14.0",
"getrandom 0.3.3",
"hf-hub",
"itertools 0.14.0",
......@@ -5071,7 +5638,7 @@ version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
"indexmap",
"indexmap 2.10.0",
"serde",
"serde_spanned",
"toml_datetime",
......@@ -5137,7 +5704,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
dependencies = [
"futures-core",
"futures-util",
"indexmap",
"indexmap 2.10.0",
"pin-project-lite",
"slab",
"sync_wrapper",
......@@ -5425,6 +5992,17 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "v_frame"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "666b7727c8875d6ab5db9533418d7c764233ac9c0cff1d469aec8fa127597be2"
dependencies = [
"aligned-vec",
"num-traits",
"wasm-bindgen",
]
[[package]]
name = "validator"
version = "0.20.0"
......@@ -5629,6 +6207,12 @@ dependencies = [
"rustls-pki-types",
]
[[package]]
name = "weezl"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3"
[[package]]
name = "which"
version = "4.4.2"
......@@ -6131,7 +6715,7 @@ dependencies = [
"crc32fast",
"crossbeam-utils",
"displaydoc",
"indexmap",
"indexmap 2.10.0",
"num_enum",
"thiserror 1.0.69",
]
......@@ -6157,3 +6741,27 @@ dependencies = [
"system-deps",
"zeromq-src",
]
[[package]]
name = "zune-core"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a"
[[package]]
name = "zune-inflate"
version = "0.2.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02"
dependencies = [
"simd-adler32",
]
[[package]]
name = "zune-jpeg"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc1f7e205ce79eb2da3cd71c5f55f3589785cb7c79f6a03d1c8d1491bda5d089"
dependencies = [
"zune-core",
]
......@@ -204,12 +204,12 @@ impl
for c in prompt.chars() {
// we are returning characters not tokens, so there will be some postprocessing overhead
tokio::time::sleep(*TOKEN_ECHO_DELAY).await;
let response = deltas.create_choice(0, Some(c.to_string()), None, None);
let response = deltas.create_choice(0, Some(c.to_string()), None, None, None);
yield Annotated{ id: Some(id.to_string()), data: Some(response), event: None, comment: None };
id += 1;
}
let response = deltas.create_choice(0, None, Some(dynamo_async_openai::types::FinishReason::Stop), None);
let response = deltas.create_choice(0, None, None, Some(dynamo_async_openai::types::FinishReason::Stop), None);
yield Annotated { id: Some(id.to_string()), data: Some(response), event: None, comment: None };
};
......
......@@ -91,6 +91,7 @@ impl DeltaGenerator {
// Reasoning parser type
// This is hardcoded for now, but can be made configurable later.
// TODO: Make parser type configurable once front-end integration is determined
// Change to GptOss to test GptOSS parser
let reasoning_parser_type = ReasoningParserType::Basic;
// Reasoning parser wrapper
......@@ -121,7 +122,7 @@ impl DeltaGenerator {
pub fn create_logprobs(
&self,
tokens: Vec<common::llm_backend::TokenType>,
token_ids: Vec<TokenIdType>,
token_ids: &[TokenIdType],
logprobs: Option<common::llm_backend::LogProbs>,
top_logprobs: Option<common::llm_backend::TopLogprobs>,
) -> Option<dynamo_async_openai::types::ChatChoiceLogprobs> {
......@@ -132,7 +133,7 @@ impl DeltaGenerator {
let toks = tokens
.into_iter()
.zip(token_ids)
.map(|(token, token_id)| (token.unwrap_or_default(), token_id))
.map(|(token, token_id)| (token.unwrap_or_default(), *token_id))
.collect::<Vec<(String, TokenIdType)>>();
let tok_lps = toks
.iter()
......@@ -183,11 +184,18 @@ impl DeltaGenerator {
})
}
fn create_reasoning_content(&mut self, text: Option<String>) -> Option<ParserResult> {
let text = text?;
fn create_reasoning_content(
&mut self,
text: &Option<String>,
token_ids: &[u32],
) -> Option<ParserResult> {
let text_ref = text.as_deref().unwrap_or("");
if text_ref.is_empty() && token_ids.is_empty() {
return None;
}
let parser_result = self
.reasoning_parser
.parse_reasoning_streaming_incremental(&text);
.parse_reasoning_streaming_incremental(text_ref, token_ids);
Some(parser_result)
}
......@@ -207,17 +215,12 @@ impl DeltaGenerator {
&mut self,
index: u32,
text: Option<String>,
reasoning_content: Option<String>,
finish_reason: Option<dynamo_async_openai::types::FinishReason>,
logprobs: Option<dynamo_async_openai::types::ChatChoiceLogprobs>,
) -> NvCreateChatCompletionStreamResponse {
let reasoning_parser_result = self.create_reasoning_content(text).unwrap_or_default();
let (normal_text, reasoning_content) = (
reasoning_parser_result.get_some_normal_text(),
reasoning_parser_result.get_some_reasoning(),
);
let delta = dynamo_async_openai::types::ChatCompletionStreamResponseDelta {
content: normal_text,
content: text,
function_call: None,
tool_calls: None,
role: if self.msg_counter == 0 {
......@@ -292,7 +295,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
let logprobs = self.create_logprobs(
delta.tokens,
delta.token_ids,
&delta.token_ids,
delta.log_probs,
delta.top_logprobs,
);
......@@ -318,9 +321,24 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
None => None,
};
let reasoning_parser_result = self
.create_reasoning_content(&delta.text, &delta.token_ids)
.unwrap_or_default();
let (normal_text, reasoning_content) = (
reasoning_parser_result.get_some_normal_text(),
reasoning_parser_result.get_some_reasoning(),
);
// Create the streaming response.
let index = 0;
let stream_response = self.create_choice(index, delta.text, finish_reason, logprobs);
let stream_response = self.create_choice(
index,
normal_text,
reasoning_content,
finish_reason,
logprobs,
);
Ok(stream_response)
}
......
......@@ -100,7 +100,7 @@ impl
let stream = stream! {
tokio::time::sleep(std::time::Duration::from_millis(max_tokens)).await;
for i in 0..10 {
let output = generator.create_choice(i,Some(format!("choice {i}")), None, None);
let output = generator.create_choice(i,Some(format!("choice {i}")), None, None, None);
yield Annotated::from_data(output);
}
......
......@@ -32,4 +32,6 @@ serde_json = { workspace = true }
tracing = { workspace = true }
uuid = { workspace = true }
regex = "1"
\ No newline at end of file
regex = "1"
openai-harmony = "0.0.3"
lazy_static = "1.5.0"
......@@ -33,7 +33,7 @@ impl BasicReasoningParser {
}
impl ReasoningParser for BasicReasoningParser {
fn detect_and_parse_reasoning(&self, text: &str) -> ParserResult {
fn detect_and_parse_reasoning(&mut self, text: &str, _token_ids: &[u32]) -> ParserResult {
log::debug!("detect_and_parse_reasoning called with text: {:?}", text);
let in_reasoning = self._in_reasoning || text.contains(&self.think_start_token);
......@@ -82,7 +82,11 @@ impl ReasoningParser for BasicReasoningParser {
}
}
fn parse_reasoning_streaming_incremental(&mut self, text: &str) -> ParserResult {
fn parse_reasoning_streaming_incremental(
&mut self,
text: &str,
_token_ids: &[u32],
) -> ParserResult {
// Incrementally parse the streaming text
self._buffer.push_str(text);
let mut current_text = self._buffer.to_string();
......@@ -180,26 +184,26 @@ mod tests {
#[test]
fn test_detect_and_parse_reasoning_reasoning() {
let parser =
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
let result =
parser.detect_and_parse_reasoning("<think>with reasoning</think> and more text.");
parser.detect_and_parse_reasoning("<think>with reasoning</think> and more text.", &[]);
assert_eq!(result.normal_text, "and more text.");
assert_eq!(result.reasoning_text, "with reasoning");
}
#[test]
fn test_detect_and_parse_reasoning_reasoning_no_reasoning() {
let parser =
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
let result = parser.detect_and_parse_reasoning("This is a test without reasoning.");
let result = parser.detect_and_parse_reasoning("This is a test without reasoning.", &[]);
assert_eq!(result.normal_text, "This is a test without reasoning.");
assert_eq!(result.reasoning_text, "");
}
#[test]
fn test_detect_and_parse_reasoning_reasoning_truncated_reasoning() {
let parser =
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
let result = parser.detect_and_parse_reasoning("<think>with truncated reasoning");
let result = parser.detect_and_parse_reasoning("<think>with truncated reasoning", &[]);
assert_eq!(result.normal_text, "");
assert_eq!(result.reasoning_text, "with truncated reasoning");
}
......@@ -208,7 +212,7 @@ mod tests {
fn test_parse_reasoning_streaming_incremental() {
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
let result = parser.parse_reasoning_streaming_incremental("<thi");
let result = parser.parse_reasoning_streaming_incremental("<thi", &[]);
assert_eq!(result.normal_text, "");
assert_eq!(result.reasoning_text, "");
}
......@@ -217,8 +221,10 @@ mod tests {
fn test_parse_reasoning_streaming_incremental_complete() {
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
let result = parser
.parse_reasoning_streaming_incremental("<think>with reasoning</think> and more text.");
let result = parser.parse_reasoning_streaming_incremental(
"<think>with reasoning</think> and more text.",
&[],
);
assert_eq!(result.normal_text, " and more text.");
assert_eq!(result.reasoning_text, "with reasoning");
}
......@@ -227,17 +233,18 @@ mod tests {
fn test_parse_reasoning_streaming_incremental_no_end_token() {
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), true, true);
let result = parser.parse_reasoning_streaming_incremental("<think>with reasoning");
let result = parser.parse_reasoning_streaming_incremental("<think>with reasoning", &[]);
assert_eq!(result.normal_text, "");
assert_eq!(result.reasoning_text, "with reasoning");
}
#[test]
fn test_detect_and_parse_reasoning_multiple_reasoning_blocks() {
let parser =
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
let result = parser.detect_and_parse_reasoning(
"<think>first reasoning</think> middle <think>second reasoning</think> end",
&[],
);
// The current implementation only handles the first occurrence properly
assert_eq!(result.normal_text, "middle second reasoning</think> end");
......@@ -248,14 +255,14 @@ mod tests {
fn test_streaming_multiple_reasoning_blocks() {
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, false);
let result1 =
parser.parse_reasoning_streaming_incremental("<think>first reasoning</think> middle");
let result1 = parser
.parse_reasoning_streaming_incremental("<think>first reasoning</think> middle", &[]);
assert_eq!(result1.normal_text, " middle");
assert_eq!(result1.reasoning_text, "first reasoning");
// Basic parser assumes only one reasoning block at a time
let result2 =
parser.parse_reasoning_streaming_incremental(" <think>second reasoning</think> end");
let result2 = parser
.parse_reasoning_streaming_incremental(" <think>second reasoning</think> end", &[]);
assert_eq!(result2.normal_text, " <think>second reasoning</think> end");
assert_eq!(result2.reasoning_text, "");
}
......@@ -266,13 +273,15 @@ mod tests {
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
// Feed partial opening tag
let result1 = parser.parse_reasoning_streaming_incremental("<th");
let result1 = parser.parse_reasoning_streaming_incremental("<th", &[]);
assert_eq!(result1.normal_text, "");
assert_eq!(result1.reasoning_text, "");
// Complete the opening tag and add content
let result2 = parser
.parse_reasoning_streaming_incremental("ink>reasoning content</think> normal text");
let result2 = parser.parse_reasoning_streaming_incremental(
"ink>reasoning content</think> normal text",
&[],
);
assert_eq!(result2.normal_text, " normal text");
assert_eq!(result2.reasoning_text, "reasoning content");
}
......@@ -283,12 +292,13 @@ mod tests {
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, false);
// Start with complete opening and partial content
let result1 = parser.parse_reasoning_streaming_incremental("<think>reasoning content</th");
let result1 =
parser.parse_reasoning_streaming_incremental("<think>reasoning content</th", &[]);
assert_eq!(result1.normal_text, "");
assert_eq!(result1.reasoning_text, "");
// Complete the closing tag
let result2 = parser.parse_reasoning_streaming_incremental("ink> normal text");
let result2 = parser.parse_reasoning_streaming_incremental("ink> normal text", &[]);
assert_eq!(result2.normal_text, " normal text");
assert_eq!(result2.reasoning_text, "reasoning content");
}
......@@ -299,22 +309,22 @@ mod tests {
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, false);
// First call - partial opening tag
let result1 = parser.parse_reasoning_streaming_incremental("<th");
let result1 = parser.parse_reasoning_streaming_incremental("<th", &[]);
assert_eq!(result1.normal_text, "");
assert_eq!(result1.reasoning_text, "");
// Second call - complete opening tag, start reasoning
let result2 = parser.parse_reasoning_streaming_incremental("ink>part1 ");
let result2 = parser.parse_reasoning_streaming_incremental("ink>part1 ", &[]);
assert_eq!(result2.normal_text, "");
assert_eq!(result2.reasoning_text, "");
// Third call - more reasoning content
let result3 = parser.parse_reasoning_streaming_incremental("part2 ");
let result3 = parser.parse_reasoning_streaming_incremental("part2 ", &[]);
assert_eq!(result3.normal_text, "");
assert_eq!(result3.reasoning_text, "");
// Fourth call - end reasoning and normal text
let result4 = parser.parse_reasoning_streaming_incremental("part3</think> normal");
let result4 = parser.parse_reasoning_streaming_incremental("part3</think> normal", &[]);
assert_eq!(result4.normal_text, " normal");
assert_eq!(result4.reasoning_text, "part1 part2 part3");
}
......@@ -325,27 +335,28 @@ mod tests {
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
// Start reasoning block
let result1 = parser.parse_reasoning_streaming_incremental("<think>reasoning ");
let result1 = parser.parse_reasoning_streaming_incremental("<think>reasoning ", &[]);
assert_eq!(result1.normal_text, "");
assert_eq!(result1.reasoning_text, "reasoning ");
// Continue streaming reasoning
let result2 = parser.parse_reasoning_streaming_incremental("content ");
let result2 = parser.parse_reasoning_streaming_incremental("content ", &[]);
assert_eq!(result2.normal_text, "");
assert_eq!(result2.reasoning_text, "content ");
// End reasoning block
let result3 = parser.parse_reasoning_streaming_incremental("more</think> normal");
let result3 = parser.parse_reasoning_streaming_incremental("more</think> normal", &[]);
assert_eq!(result3.normal_text, " normal");
assert_eq!(result3.reasoning_text, "more");
}
#[test]
fn test_nested_reasoning_blocks() {
let parser =
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
let result = parser.detect_and_parse_reasoning(
"<think>outer <think>inner</think> reasoning</think> normal",
&[],
);
// Current implementation should handle this by finding the first closing tag
assert_eq!(result.normal_text, "reasoning</think> normal");
......@@ -355,28 +366,28 @@ mod tests {
#[test]
fn test_malformed_missing_closing_tag() {
let parser =
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
let result = parser.detect_and_parse_reasoning("<think>reasoning without closing tag");
let result = parser.detect_and_parse_reasoning("<think>reasoning without closing tag", &[]);
assert_eq!(result.normal_text, "");
assert_eq!(result.reasoning_text, "reasoning without closing tag");
}
#[test]
fn test_malformed_stray_closing_tag() {
let parser =
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
let result = parser.detect_and_parse_reasoning("normal text</think> more normal");
let result = parser.detect_and_parse_reasoning("normal text</think> more normal", &[]);
assert_eq!(result.normal_text, "normal text</think> more normal");
assert_eq!(result.reasoning_text, "");
}
#[test]
fn test_malformed_multiple_opening_tags() {
let parser =
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
let result = parser
.detect_and_parse_reasoning("<think>first <think>second reasoning</think> normal");
.detect_and_parse_reasoning("<think>first <think>second reasoning</think> normal", &[]);
// Should handle by replacing all opening tags and using first closing tag
assert_eq!(result.normal_text, "normal");
assert_eq!(result.reasoning_text, "first second reasoning");
......@@ -384,27 +395,27 @@ mod tests {
#[test]
fn test_empty_reasoning_block() {
let parser =
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
let result = parser.detect_and_parse_reasoning("<think></think> normal text");
let result = parser.detect_and_parse_reasoning("<think></think> normal text", &[]);
assert_eq!(result.normal_text, "normal text");
assert_eq!(result.reasoning_text, "");
}
#[test]
fn test_whitespace_only_reasoning_block() {
let parser =
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
let result = parser.detect_and_parse_reasoning("<think> \n\t </think> normal text");
let result = parser.detect_and_parse_reasoning("<think> \n\t </think> normal text", &[]);
assert_eq!(result.normal_text, "normal text");
assert_eq!(result.reasoning_text, ""); // Should be empty after trim
}
#[test]
fn test_force_reasoning_mode() {
let parser =
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), true, true);
let result = parser.detect_and_parse_reasoning("no think tags here");
let result = parser.detect_and_parse_reasoning("no think tags here", &[]);
assert_eq!(result.normal_text, "");
assert_eq!(result.reasoning_text, "no think tags here");
}
......@@ -416,19 +427,19 @@ mod tests {
// Process complete reasoning block
let result1 =
parser.parse_reasoning_streaming_incremental("<think>reasoning</think> normal");
parser.parse_reasoning_streaming_incremental("<think>reasoning</think> normal", &[]);
assert_eq!(result1.normal_text, " normal");
assert_eq!(result1.reasoning_text, "reasoning");
// Process normal text - should not be affected by previous state
let result2 = parser.parse_reasoning_streaming_incremental(" more normal text");
let result2 = parser.parse_reasoning_streaming_incremental(" more normal text", &[]);
assert_eq!(result2.normal_text, " more normal text");
assert_eq!(result2.reasoning_text, "");
// Basic parser does not expect more than one reasoning block at a time
// So this should not affect the state
let result3 =
parser.parse_reasoning_streaming_incremental(" <think>new reasoning</think> final");
let result3 = parser
.parse_reasoning_streaming_incremental(" <think>new reasoning</think> final", &[]);
assert_eq!(result3.normal_text, " <think>new reasoning</think> final");
assert_eq!(result3.reasoning_text, "");
}
......
......@@ -24,11 +24,16 @@ impl DeepseekR1ReasoningParser {
}
impl ReasoningParser for DeepseekR1ReasoningParser {
fn parse_reasoning_streaming_incremental(&mut self, text: &str) -> ParserResult {
self.base.parse_reasoning_streaming_incremental(text)
fn parse_reasoning_streaming_incremental(
&mut self,
text: &str,
token_ids: &[u32],
) -> ParserResult {
self.base
.parse_reasoning_streaming_incremental(text, token_ids)
}
fn detect_and_parse_reasoning(&self, text: &str) -> ParserResult {
self.base.detect_and_parse_reasoning(text)
fn detect_and_parse_reasoning(&mut self, text: &str, token_ids: &[u32]) -> ParserResult {
self.base.detect_and_parse_reasoning(text, token_ids)
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use std::fmt::Debug;
use crate::ParserResult;
use crate::ReasoningParser;
use openai_harmony::StreamableParser;
use openai_harmony::chat::TextContent;
use openai_harmony::{HarmonyEncoding, HarmonyEncodingName, chat::Role, load_harmony_encoding};
///// Static initialization of harmony encoder to not affect performance every time a parser is created
/// This is because load_harmony_encoding downloads some tiktoken files into a directory and we don't want to do this every time we create a parser.
use std::sync::OnceLock;
static GLOBAL_HARMONY_GPTOSS_ENCODING: OnceLock<Result<HarmonyEncoding, anyhow::Error>> =
OnceLock::new();
fn get_harmony_encoding() -> &'static Result<HarmonyEncoding, anyhow::Error> {
GLOBAL_HARMONY_GPTOSS_ENCODING
.get_or_init(|| load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss))
}
pub struct GptOssReasoningParser {
parser: StreamableParser,
}
/// Implement Debug for GptOssReasoningParser separately because StreamableParser does not implement Debug
impl Debug for GptOssReasoningParser {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("GptOssReasoningParser")
.field("parser", &self.parser.state_json())
.finish()
}
}
impl GptOssReasoningParser {
pub fn new() -> anyhow::Result<Self> {
let parser = match get_harmony_encoding().as_ref() {
Ok(enc) => match StreamableParser::new(enc.clone(), Some(Role::Assistant)) {
Ok(p) => p,
Err(e) => {
tracing::warn!("Harmony StreamableParser init failed for GPT OSS: {e}");
return Err(anyhow::anyhow!(
"Failed to load Harmony StreamableParser: {e}"
));
}
},
Err(e) => {
tracing::warn!("Failed to load Harmony encoding for GPT OSS: {e}");
return Err(anyhow::anyhow!("Failed to load Harmony encoding: {e}"));
}
};
Ok(Self { parser })
}
}
impl ReasoningParser for GptOssReasoningParser {
fn detect_and_parse_reasoning(&mut self, _text: &str, token_ids: &[u32]) -> ParserResult {
tracing::debug!(
"detect_and_parse_reasoning called with {} token_ids",
token_ids.len()
);
let parser = &mut self.parser;
for (i, token_id) in token_ids.iter().enumerate() {
tracing::debug!(
"Processing token {} of {}: {}",
i + 1,
token_ids.len(),
token_id
);
if let Err(e) = parser.process(*token_id) {
tracing::warn!("Harmony parse error for token_id {token_id}: {e}");
return ParserResult::default();
}
}
let output_msgs = parser.messages();
tracing::debug!("Parser has {} output messages", output_msgs.len());
match output_msgs.len() {
0 => {
tracing::debug!("No output messages, using current content");
let current = parser.current_content().unwrap_or_default();
tracing::debug!("Current content length: {}", current.len());
ParserResult {
normal_text: String::new(),
reasoning_text: current,
}
}
1 => {
tracing::debug!("Single output message detected");
let mut reasoning_text = String::new();
if let Some(openai_harmony::chat::Content::Text(TextContent { text })) =
output_msgs[0].content.first()
{
reasoning_text.push_str(text);
tracing::debug!("Extracted reasoning text length: {}", reasoning_text.len());
}
let current = parser.current_content().unwrap_or_default();
tracing::debug!("Current content length: {}", current.len());
ParserResult {
normal_text: current,
reasoning_text,
}
}
_ => {
tracing::debug!("Multiple output messages detected: {}", output_msgs.len());
let mut reasoning_text = String::new();
let mut normal_text = String::new();
// Loop until second last message
for (i, parse_msg) in output_msgs.iter().take(output_msgs.len() - 1).enumerate() {
tracing::debug!("Processing reasoning message {}", i + 1);
if let Some(openai_harmony::chat::Content::Text(TextContent { text })) =
parse_msg.content.first()
{
reasoning_text.push_str(text);
tracing::debug!("Added {} chars to reasoning text", text.len());
}
}
let last_msg = &output_msgs[output_msgs.len() - 1];
tracing::debug!("Processing final message");
// Handle the last message
if let Some(openai_harmony::chat::Content::Text(TextContent { text })) =
last_msg.content.first()
{
normal_text.push_str(text);
tracing::debug!("Added {} chars to normal text", text.len());
}
tracing::debug!(
"Final result - normal_text: {} chars, reasoning_text: {} chars",
normal_text.len(),
reasoning_text.len()
);
ParserResult {
normal_text,
reasoning_text,
}
}
}
}
fn parse_reasoning_streaming_incremental(
&mut self,
_text: &str,
token_ids: &[u32],
) -> ParserResult {
tracing::debug!(
"parse_reasoning_streaming_incremental called with {} token_ids",
token_ids.len()
);
let parser: &mut StreamableParser = &mut self.parser;
for (i, token_id) in token_ids.iter().enumerate() {
tracing::debug!(
"Processing streaming token {} of {}: {}",
i + 1,
token_ids.len(),
token_id
);
if let Err(e) = parser.process(*token_id) {
tracing::warn!("Harmony parse error for token_id {token_id}: {e}");
return ParserResult::default();
}
}
if let Some(channel) = self.parser.current_channel() {
tracing::debug!("Current channel: {}", channel);
if channel == "final" {
tracing::debug!("In final channel, processing normal text");
// If we're in the final channel, we should not parse reasoning
if let Some(current) = self.parser.last_content_delta().unwrap_or_default() {
tracing::debug!("Got normal text delta of {} chars", current.len());
return ParserResult {
normal_text: current,
reasoning_text: String::new(),
};
}
tracing::debug!("No content delta in final channel");
ParserResult::default()
} else {
tracing::debug!("In reasoning channel: {}", channel);
if let Some(current) = self.parser.last_content_delta().unwrap_or_default() {
tracing::debug!("Got reasoning text delta of {} chars", current.len());
return ParserResult {
normal_text: String::new(),
reasoning_text: current,
};
}
tracing::debug!("No content delta in reasoning channel");
ParserResult::default()
}
} else {
tracing::debug!("No current channel detected");
ParserResult::default()
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_gpt_oss_reasoning_parser() {
let mut parser = GptOssReasoningParser::new().expect("Failed to create parser");
let enc = get_harmony_encoding()
.as_ref()
.expect("Failed to get encoding");
let text = "<|channel|>analysis<|message|>The user asks a simple factual question: capital of Brazil. The answer is Brasília. No additional explanation needed.<|end|><|start|>assistant<|channel|>final<|message|>The capital of Brazil is Brasília.";
let token_ids = enc.tokenizer().encode_with_special_tokens(text); // Example token IDs
let result = parser.detect_and_parse_reasoning("Test text", &token_ids);
assert!(result.normal_text == "The capital of Brazil is Brasília.");
assert!(
result.reasoning_text
== "The user asks a simple factual question: capital of Brazil. The answer is Brasília. No additional explanation needed."
);
}
#[test]
fn test_gpt_oss_reasoning_parser_streaming() {
let mut parser = GptOssReasoningParser::new().expect("Failed to create parser");
let enc = get_harmony_encoding()
.as_ref()
.expect("Failed to get encoding");
let text = "<|channel|>analysis<|message|>The user asks a simple factual question: capital of Brazil. The answer is Brasília. No additional explanation needed.<|end|><|start|>assistant<|channel|>final<|message|>The capital of Brazil is Brasília.";
let token_ids = enc.tokenizer().encode_with_special_tokens(text); // Example token IDs
let mut reasoning_text_incr = String::new();
let mut normal_text_incr = String::new();
for token in token_ids.iter() {
let result = parser.parse_reasoning_streaming_incremental("Test text", &[(*token)]);
normal_text_incr.push_str(&result.normal_text);
reasoning_text_incr.push_str(&result.reasoning_text);
}
assert!(normal_text_incr == "The capital of Brazil is Brasília.");
assert!(
reasoning_text_incr
== "The user asks a simple factual question: capital of Brazil. The answer is Brasília. No additional explanation needed."
);
}
}
......@@ -3,10 +3,12 @@
mod base_parser;
mod deepseek_r1_parser;
mod gpt_oss_parser;
// Re-export main types and functions for convenience
pub use base_parser::BasicReasoningParser;
pub use deepseek_r1_parser::DeepseekR1ReasoningParser;
pub use gpt_oss_parser::GptOssReasoningParser;
#[derive(Debug, Clone, Default)]
pub struct ParserResult {
......@@ -39,12 +41,16 @@ pub trait ReasoningParser: Send + std::fmt::Debug {
/// Parses a standalone, non-streaming input chunk. Implementations may reset or ignore
/// internal streaming state and should return the split of normal vs reasoning text for
/// this complete input. Marker tokens must not be included in either output.
fn detect_and_parse_reasoning(&self, text: &str) -> ParserResult;
fn detect_and_parse_reasoning(&mut self, text: &str, token_ids: &[u32]) -> ParserResult;
/// Parses a streaming chunk and updates internal state. The return value should be the
/// delta: only the newly discovered normal and reasoning text attributable to this chunk
/// (not the cumulative totals). Marker tokens must not be included in either output.
fn parse_reasoning_streaming_incremental(&mut self, text: &str) -> ParserResult;
fn parse_reasoning_streaming_incremental(
&mut self,
text: &str,
token_ids: &[u32],
) -> ParserResult;
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
......@@ -52,6 +58,7 @@ pub trait ReasoningParser: Send + std::fmt::Debug {
pub enum ReasoningParserType {
DeepseekR1,
Basic,
GptOss,
}
#[derive(std::fmt::Debug)]
......@@ -60,12 +67,17 @@ pub struct ReasoningParserWrapper {
}
impl ReasoningParser for ReasoningParserWrapper {
fn detect_and_parse_reasoning(&self, text: &str) -> ParserResult {
self.parser.detect_and_parse_reasoning(text)
fn detect_and_parse_reasoning(&mut self, text: &str, token_ids: &[u32]) -> ParserResult {
self.parser.detect_and_parse_reasoning(text, token_ids)
}
fn parse_reasoning_streaming_incremental(&mut self, text: &str) -> ParserResult {
self.parser.parse_reasoning_streaming_incremental(text)
fn parse_reasoning_streaming_incremental(
&mut self,
text: &str,
token_ids: &[u32],
) -> ParserResult {
self.parser
.parse_reasoning_streaming_incremental(text, token_ids)
}
}
......@@ -83,6 +95,24 @@ impl ReasoningParserType {
true,
)),
},
ReasoningParserType::GptOss => match GptOssReasoningParser::new() {
Ok(parser) => ReasoningParserWrapper {
parser: Box::new(parser),
},
Err(e) => {
tracing::warn!(
"GptOssReasoningParser could not be initialized, falling back to Basic Reasoning Parser: {e}"
);
ReasoningParserWrapper {
parser: Box::new(BasicReasoningParser::new(
"<think>".into(),
"</think>".into(),
false,
true,
)),
}
}
},
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment