Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
da810a26
Unverified
Commit
da810a26
authored
Mar 15, 2026
by
Biswa Panda
Committed by
GitHub
Mar 15, 2026
Browse files
feat: integrate fastokens BPE tokenizer backend (#7387)
parent
cdf66b11
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
551 additions
and
12 deletions
+551
-12
.cargo/config.toml
.cargo/config.toml
+6
-0
Cargo.lock
Cargo.lock
+78
-4
Cargo.toml
Cargo.toml
+1
-0
components/src/dynamo/frontend/frontend_args.py
components/src/dynamo/frontend/frontend_args.py
+22
-0
components/src/dynamo/frontend/main.py
components/src/dynamo/frontend/main.py
+4
-0
lib/bindings/kvbm/Cargo.lock
lib/bindings/kvbm/Cargo.lock
+93
-4
lib/bindings/python/Cargo.lock
lib/bindings/python/Cargo.lock
+93
-4
lib/llm/Cargo.toml
lib/llm/Cargo.toml
+1
-0
lib/llm/src/model_card.rs
lib/llm/src/model_card.rs
+39
-0
lib/llm/src/tokenizers.rs
lib/llm/src/tokenizers.rs
+2
-0
lib/llm/src/tokenizers/fastokens.rs
lib/llm/src/tokenizers/fastokens.rs
+160
-0
lib/llm/tests/data/sample-models/minimal-bpe/tokenizer.json
lib/llm/tests/data/sample-models/minimal-bpe/tokenizer.json
+52
-0
No files found.
.cargo/config.toml
View file @
da810a26
...
@@ -13,3 +13,9 @@ rustflags = ["-C", "target-cpu=x86-64-v3", "--cfg", "tokio_unstable"]
...
@@ -13,3 +13,9 @@ rustflags = ["-C", "target-cpu=x86-64-v3", "--cfg", "tokio_unstable"]
[target.aarch64-unknown-linux-gnu]
[target.aarch64-unknown-linux-gnu]
rustflags
=
[
"-C"
,
"target-cpu=neoverse-n1"
,
"--cfg"
,
"tokio_unstable"
]
rustflags
=
[
"-C"
,
"target-cpu=neoverse-n1"
,
"--cfg"
,
"tokio_unstable"
]
# Static-link pcre2 C library (used by the fastokens tokenizer crate).
# Without this, pcre2-sys tries to find a system libpcre2 via pkg-config,
# which breaks Docker builds and bundles a .so into the Python wheel.
[env]
PCRE2_SYS_STATIC
=
"1"
Cargo.lock
View file @
da810a26
...
@@ -1063,7 +1063,7 @@ version = "3.1.1"
...
@@ -1063,7 +1063,7 @@ version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34"
checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34"
dependencies = [
dependencies = [
"windows-sys 0.
61.2
",
"windows-sys 0.
48.0
",
]
]
[[package]]
[[package]]
...
@@ -1481,6 +1481,12 @@ dependencies = [
...
@@ -1481,6 +1481,12 @@ dependencies = [
"syn 2.0.117",
"syn 2.0.117",
]
]
[[package]]
name = "daachorse"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63b7ef7a4be509357f4804d0a22e830daddb48f19fd604e4ad32ddce04a94c36"
[[package]]
[[package]]
name = "darling"
name = "darling"
version = "0.20.11"
version = "0.20.11"
...
@@ -1986,6 +1992,7 @@ dependencies = [
...
@@ -1986,6 +1992,7 @@ dependencies = [
"dynamo-runtime",
"dynamo-runtime",
"dynamo-tokens",
"dynamo-tokens",
"either",
"either",
"fastokens",
"ffmpeg-next",
"ffmpeg-next",
"flate2",
"flate2",
"futures",
"futures",
...
@@ -2418,6 +2425,36 @@ dependencies = [
...
@@ -2418,6 +2425,36 @@ dependencies = [
"regex-syntax",
"regex-syntax",
]
]
[[package]]
name = "fancy-regex"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8"
dependencies = [
"bit-set 0.8.0",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "fastokens"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aca43986686f3dff724cc465e0afcf883c361112474072b1d825058852b25f9c"
dependencies = [
"daachorse",
"fancy-regex 0.17.0",
"hf-hub",
"icu_normalizer",
"memchr",
"pcre2",
"rayon",
"serde",
"serde_json",
"strum",
"thiserror 2.0.18",
]
[[package]]
[[package]]
name = "fastrand"
name = "fastrand"
version = "2.3.0"
version = "2.3.0"
...
@@ -3275,6 +3312,9 @@ dependencies = [
...
@@ -3275,6 +3312,9 @@ dependencies = [
"icu_properties",
"icu_properties",
"icu_provider",
"icu_provider",
"smallvec",
"smallvec",
"utf16_iter",
"utf8_iter",
"write16",
"zerovec",
"zerovec",
]
]
...
@@ -5374,6 +5414,28 @@ version = "0.2.3"
...
@@ -5374,6 +5414,28 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
[[package]]
name = "pcre2"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e970b0fcce0c7ee6ef662744ff711f21ccd6f11b7cf03cd187a80e89797fc67"
dependencies = [
"libc",
"log",
"pcre2-sys",
]
[[package]]
name = "pcre2-sys"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18b9073c1a2549bd409bf4a32c94d903bb1a09bf845bc306ae148897fa0760a4"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
[[package]]
name = "pear"
name = "pear"
version = "0.2.9"
version = "0.2.9"
...
@@ -5800,7 +5862,7 @@ version = "0.13.5"
...
@@ -5800,7 +5862,7 @@ version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
dependencies = [
dependencies = [
"heck 0.
5.0
",
"heck 0.
4.1
",
"itertools 0.14.0",
"itertools 0.14.0",
"log",
"log",
"multimap",
"multimap",
...
@@ -5820,7 +5882,7 @@ version = "0.14.3"
...
@@ -5820,7 +5882,7 @@ version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7"
checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7"
dependencies = [
dependencies = [
"heck 0.
5.0
",
"heck 0.
4.1
",
"itertools 0.14.0",
"itertools 0.14.0",
"log",
"log",
"multimap",
"multimap",
...
@@ -8559,6 +8621,12 @@ version = "0.7.6"
...
@@ -8559,6 +8621,12 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf16_iter"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
[[package]]
[[package]]
name = "utf8_iter"
name = "utf8_iter"
version = "1.0.4"
version = "1.0.4"
...
@@ -9011,7 +9079,7 @@ version = "0.1.11"
...
@@ -9011,7 +9079,7 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
dependencies = [
"windows-sys 0.
61.2
",
"windows-sys 0.
48.0
",
]
]
[[package]]
[[package]]
...
@@ -9428,6 +9496,12 @@ dependencies = [
...
@@ -9428,6 +9496,12 @@ dependencies = [
"wasmparser",
"wasmparser",
]
]
[[package]]
name = "write16"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
[[package]]
[[package]]
name = "writeable"
name = "writeable"
version = "0.6.2"
version = "0.6.2"
...
...
Cargo.toml
View file @
da810a26
...
@@ -46,6 +46,7 @@ dynamo-mocker = { path = "lib/mocker", version = "1.0.0" }
...
@@ -46,6 +46,7 @@ dynamo-mocker = { path = "lib/mocker", version = "1.0.0" }
dynamo-kv-router
=
{
path
=
"lib/kv-router"
,
version
=
"1.0.0"
,
features
=
[
"metrics"
,
"runtime-protocols"
]
}
dynamo-kv-router
=
{
path
=
"lib/kv-router"
,
version
=
"1.0.0"
,
features
=
[
"metrics"
,
"runtime-protocols"
]
}
dynamo-async-openai
=
{
path
=
"lib/async-openai"
,
version
=
"1.0.0"
,
features
=
["byot"]
}
dynamo-async-openai
=
{
path
=
"lib/async-openai"
,
version
=
"1.0.0"
,
features
=
["byot"]
}
dynamo-parsers
=
{
path
=
"lib/parsers"
,
version
=
"1.0.0"
}
dynamo-parsers
=
{
path
=
"lib/parsers"
,
version
=
"1.0.0"
}
fastokens
=
{
version
=
"0.1.0"
}
# kvbm
# kvbm
kvbm-common
=
{
path
=
"lib/kvbm-common"
,
version
=
"0.1.0"
}
kvbm-common
=
{
path
=
"lib/kvbm-common"
,
version
=
"0.1.0"
}
...
...
components/src/dynamo/frontend/frontend_args.py
View file @
da810a26
...
@@ -76,6 +76,9 @@ class FrontendConfig(KvRouterConfigBase):
...
@@ -76,6 +76,9 @@ class FrontendConfig(KvRouterConfigBase):
enable_streaming_tool_dispatch
:
bool
enable_streaming_tool_dispatch
:
bool
enable_streaming_reasoning_dispatch
:
bool
enable_streaming_reasoning_dispatch
:
bool
preprocess_workers
:
int
preprocess_workers
:
int
tokenizer_backend
:
str
_VALID_TOKENIZER_BACKENDS
=
{
"default"
,
"fastokens"
}
def
validate
(
self
)
->
None
:
def
validate
(
self
)
->
None
:
if
bool
(
self
.
tls_cert_path
)
^
bool
(
self
.
tls_key_path
):
# ^ is XOR
if
bool
(
self
.
tls_cert_path
)
^
bool
(
self
.
tls_key_path
):
# ^ is XOR
...
@@ -88,6 +91,11 @@ class FrontendConfig(KvRouterConfigBase):
...
@@ -88,6 +91,11 @@ class FrontendConfig(KvRouterConfigBase):
)
)
if
self
.
router_enable_cache_control
and
self
.
router_mode
!=
"kv"
:
if
self
.
router_enable_cache_control
and
self
.
router_mode
!=
"kv"
:
raise
ValueError
(
"--enable-cache-control requires --router-mode=kv"
)
raise
ValueError
(
"--enable-cache-control requires --router-mode=kv"
)
if
self
.
tokenizer_backend
not
in
self
.
_VALID_TOKENIZER_BACKENDS
:
raise
ValueError
(
f
"--tokenizer: invalid value '
{
self
.
tokenizer_backend
}
' "
f
"(choose from
{
sorted
(
self
.
_VALID_TOKENIZER_BACKENDS
)
}
)"
)
@
register_encoder
(
FrontendConfig
)
@
register_encoder
(
FrontendConfig
)
...
@@ -424,3 +432,17 @@ class FrontendArgGroup(ArgGroup):
...
@@ -424,3 +432,17 @@ class FrontendArgGroup(ArgGroup):
),
),
arg_type
=
int
,
arg_type
=
int
,
)
)
add_argument
(
g
,
flag_name
=
"--tokenizer"
,
env_var
=
"DYN_TOKENIZER"
,
default
=
"default"
,
dest
=
"tokenizer_backend"
,
help
=
(
"Tokenizer backend for BPE models: 'default' (HuggingFace tokenizers library) "
"or 'fastokens' (fastokens crate for high-performance BPE encoding). "
"Decoding always uses HuggingFace. Has no effect on TikToken models."
),
choices
=
[
"default"
,
"fastokens"
],
)
components/src/dynamo/frontend/main.py
View file @
da810a26
...
@@ -165,6 +165,10 @@ async def async_main():
...
@@ -165,6 +165,10 @@ async def async_main():
config
,
vllm_flags
,
sglang_flags
=
parse_args
()
config
,
vllm_flags
,
sglang_flags
=
parse_args
()
dump_config
(
config
.
dump_config_to
,
config
)
dump_config
(
config
.
dump_config_to
,
config
)
os
.
environ
[
"DYN_EVENT_PLANE"
]
=
config
.
event_plane
os
.
environ
[
"DYN_EVENT_PLANE"
]
=
config
.
event_plane
if
config
.
tokenizer_backend
==
"fastokens"
:
os
.
environ
[
"DYN_TOKENIZER"
]
=
"fastokens"
else
:
os
.
environ
.
pop
(
"DYN_TOKENIZER"
,
None
)
logger
.
info
(
logger
.
info
(
f
"Request migration
{
'enabled'
if
config
.
migration_limit
>
0
else
'disabled'
}
"
f
"Request migration
{
'enabled'
if
config
.
migration_limit
>
0
else
'disabled'
}
"
f
"(limit:
{
config
.
migration_limit
}
)"
f
"(limit:
{
config
.
migration_limit
}
)"
...
...
lib/bindings/kvbm/Cargo.lock
View file @
da810a26
...
@@ -585,7 +585,16 @@ version = "0.5.3"
...
@@ -585,7 +585,16 @@ version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
dependencies = [
dependencies = [
"bit-vec",
"bit-vec 0.6.3",
]
[[package]]
name = "bit-set"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
dependencies = [
"bit-vec 0.8.0",
]
]
[[package]]
[[package]]
...
@@ -594,6 +603,12 @@ version = "0.6.3"
...
@@ -594,6 +603,12 @@ version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bit-vec"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
[[package]]
[[package]]
name = "bit_field"
name = "bit_field"
version = "0.10.3"
version = "0.10.3"
...
@@ -1139,6 +1154,12 @@ dependencies = [
...
@@ -1139,6 +1154,12 @@ dependencies = [
"syn",
"syn",
]
]
[[package]]
name = "daachorse"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63b7ef7a4be509357f4804d0a22e830daddb48f19fd604e4ad32ddce04a94c36"
[[package]]
[[package]]
name = "darling"
name = "darling"
version = "0.20.11"
version = "0.20.11"
...
@@ -1608,6 +1629,7 @@ dependencies = [
...
@@ -1608,6 +1629,7 @@ dependencies = [
"dynamo-runtime",
"dynamo-runtime",
"dynamo-tokens",
"dynamo-tokens",
"either",
"either",
"fastokens",
"flate2",
"flate2",
"futures",
"futures",
"futures-util",
"futures-util",
...
@@ -2004,11 +2026,41 @@ version = "0.13.0"
...
@@ -2004,11 +2026,41 @@ version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
dependencies = [
dependencies = [
"bit-set",
"bit-set
0.5.3
",
"regex-automata",
"regex-automata",
"regex-syntax",
"regex-syntax",
]
]
[[package]]
name = "fancy-regex"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8"
dependencies = [
"bit-set 0.8.0",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "fastokens"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aca43986686f3dff724cc465e0afcf883c361112474072b1d825058852b25f9c"
dependencies = [
"daachorse",
"fancy-regex 0.17.0",
"hf-hub",
"icu_normalizer",
"memchr",
"pcre2",
"rayon",
"serde",
"serde_json",
"strum",
"thiserror 2.0.18",
]
[[package]]
[[package]]
name = "fastrand"
name = "fastrand"
version = "2.3.0"
version = "2.3.0"
...
@@ -2705,6 +2757,9 @@ dependencies = [
...
@@ -2705,6 +2757,9 @@ dependencies = [
"icu_properties",
"icu_properties",
"icu_provider",
"icu_provider",
"smallvec",
"smallvec",
"utf16_iter",
"utf8_iter",
"write16",
"zerovec",
"zerovec",
]
]
...
@@ -4351,7 +4406,7 @@ dependencies = [
...
@@ -4351,7 +4406,7 @@ dependencies = [
"base64 0.22.1",
"base64 0.22.1",
"bstr",
"bstr",
"clap",
"clap",
"fancy-regex",
"fancy-regex
0.13.0
",
"futures",
"futures",
"image",
"image",
"regex",
"regex",
...
@@ -4562,6 +4617,28 @@ version = "0.2.3"
...
@@ -4562,6 +4617,28 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
[[package]]
name = "pcre2"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e970b0fcce0c7ee6ef662744ff711f21ccd6f11b7cf03cd187a80e89797fc67"
dependencies = [
"libc",
"log",
"pcre2-sys",
]
[[package]]
name = "pcre2-sys"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18b9073c1a2549bd409bf4a32c94d903bb1a09bf845bc306ae148897fa0760a4"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
[[package]]
name = "pear"
name = "pear"
version = "0.2.9"
version = "0.2.9"
...
@@ -6516,7 +6593,7 @@ dependencies = [
...
@@ -6516,7 +6593,7 @@ dependencies = [
"anyhow",
"anyhow",
"base64 0.22.1",
"base64 0.22.1",
"bstr",
"bstr",
"fancy-regex",
"fancy-regex
0.13.0
",
"lazy_static",
"lazy_static",
"regex",
"regex",
"rustc-hash 1.1.0",
"rustc-hash 1.1.0",
...
@@ -7312,6 +7389,12 @@ dependencies = [
...
@@ -7312,6 +7389,12 @@ dependencies = [
"serde_derive",
"serde_derive",
]
]
[[package]]
name = "utf16_iter"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
[[package]]
[[package]]
name = "utf8_iter"
name = "utf8_iter"
version = "1.0.4"
version = "1.0.4"
...
@@ -8079,6 +8162,12 @@ dependencies = [
...
@@ -8079,6 +8162,12 @@ dependencies = [
"wasmparser",
"wasmparser",
]
]
[[package]]
name = "write16"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
[[package]]
[[package]]
name = "writeable"
name = "writeable"
version = "0.6.2"
version = "0.6.2"
...
...
lib/bindings/python/Cargo.lock
View file @
da810a26
...
@@ -603,7 +603,16 @@ version = "0.5.3"
...
@@ -603,7 +603,16 @@ version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
dependencies = [
dependencies = [
"bit-vec",
"bit-vec 0.6.3",
]
[[package]]
name = "bit-set"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
dependencies = [
"bit-vec 0.8.0",
]
]
[[package]]
[[package]]
...
@@ -612,6 +621,12 @@ version = "0.6.3"
...
@@ -612,6 +621,12 @@ version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bit-vec"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
[[package]]
[[package]]
name = "bit_field"
name = "bit_field"
version = "0.10.3"
version = "0.10.3"
...
@@ -1157,6 +1172,12 @@ dependencies = [
...
@@ -1157,6 +1172,12 @@ dependencies = [
"syn",
"syn",
]
]
[[package]]
name = "daachorse"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63b7ef7a4be509357f4804d0a22e830daddb48f19fd604e4ad32ddce04a94c36"
[[package]]
[[package]]
name = "darling"
name = "darling"
version = "0.20.11"
version = "0.20.11"
...
@@ -1621,6 +1642,7 @@ dependencies = [
...
@@ -1621,6 +1642,7 @@ dependencies = [
"dynamo-runtime",
"dynamo-runtime",
"dynamo-tokens",
"dynamo-tokens",
"either",
"either",
"fastokens",
"ffmpeg-next",
"ffmpeg-next",
"flate2",
"flate2",
"futures",
"futures",
...
@@ -2048,11 +2070,41 @@ version = "0.13.0"
...
@@ -2048,11 +2070,41 @@ version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
dependencies = [
dependencies = [
"bit-set",
"bit-set
0.5.3
",
"regex-automata",
"regex-automata",
"regex-syntax",
"regex-syntax",
]
]
[[package]]
name = "fancy-regex"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8"
dependencies = [
"bit-set 0.8.0",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "fastokens"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aca43986686f3dff724cc465e0afcf883c361112474072b1d825058852b25f9c"
dependencies = [
"daachorse",
"fancy-regex 0.17.0",
"hf-hub",
"icu_normalizer",
"memchr",
"pcre2",
"rayon",
"serde",
"serde_json",
"strum",
"thiserror 2.0.18",
]
[[package]]
[[package]]
name = "fastrand"
name = "fastrand"
version = "2.3.0"
version = "2.3.0"
...
@@ -2774,6 +2826,9 @@ dependencies = [
...
@@ -2774,6 +2826,9 @@ dependencies = [
"icu_properties",
"icu_properties",
"icu_provider",
"icu_provider",
"smallvec",
"smallvec",
"utf16_iter",
"utf8_iter",
"write16",
"zerovec",
"zerovec",
]
]
...
@@ -4408,7 +4463,7 @@ dependencies = [
...
@@ -4408,7 +4463,7 @@ dependencies = [
"base64 0.22.1",
"base64 0.22.1",
"bstr",
"bstr",
"clap",
"clap",
"fancy-regex",
"fancy-regex
0.13.0
",
"futures",
"futures",
"image",
"image",
"regex",
"regex",
...
@@ -4619,6 +4674,28 @@ version = "0.2.3"
...
@@ -4619,6 +4674,28 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
[[package]]
name = "pcre2"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e970b0fcce0c7ee6ef662744ff711f21ccd6f11b7cf03cd187a80e89797fc67"
dependencies = [
"libc",
"log",
"pcre2-sys",
]
[[package]]
name = "pcre2-sys"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18b9073c1a2549bd409bf4a32c94d903bb1a09bf845bc306ae148897fa0760a4"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
[[package]]
name = "pear"
name = "pear"
version = "0.2.9"
version = "0.2.9"
...
@@ -6583,7 +6660,7 @@ dependencies = [
...
@@ -6583,7 +6660,7 @@ dependencies = [
"anyhow",
"anyhow",
"base64 0.22.1",
"base64 0.22.1",
"bstr",
"bstr",
"fancy-regex",
"fancy-regex
0.13.0
",
"lazy_static",
"lazy_static",
"regex",
"regex",
"rustc-hash 1.1.0",
"rustc-hash 1.1.0",
...
@@ -7379,6 +7456,12 @@ dependencies = [
...
@@ -7379,6 +7456,12 @@ dependencies = [
"serde_derive",
"serde_derive",
]
]
[[package]]
name = "utf16_iter"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
[[package]]
[[package]]
name = "utf8_iter"
name = "utf8_iter"
version = "1.0.4"
version = "1.0.4"
...
@@ -8163,6 +8246,12 @@ dependencies = [
...
@@ -8163,6 +8246,12 @@ dependencies = [
"wasmparser",
"wasmparser",
]
]
[[package]]
name = "write16"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
[[package]]
[[package]]
name = "writeable"
name = "writeable"
version = "0.6.2"
version = "0.6.2"
...
...
lib/llm/Cargo.toml
View file @
da810a26
...
@@ -143,6 +143,7 @@ tokenizers = { version = "0.21.4", default-features = false, features = [
...
@@ -143,6 +143,7 @@ tokenizers = { version = "0.21.4", default-features = false, features = [
]
}
]
}
tiktoken-rs
=
{
version
=
"0.9"
,
default-features
=
false
}
tiktoken-rs
=
{
version
=
"0.9"
,
default-features
=
false
}
rustc-hash
=
"1.1"
rustc-hash
=
"1.1"
fastokens
=
{
workspace
=
true
}
# backend
# backend
galil-seiferas
=
{
version
=
"0.1"
}
galil-seiferas
=
{
version
=
"0.1"
}
...
...
lib/llm/src/model_card.rs
View file @
da810a26
...
@@ -378,12 +378,51 @@ impl ModelDeploymentCard {
...
@@ -378,12 +378,51 @@ impl ModelDeploymentCard {
/// Load the tokenizer as a generic, backend-agnostic `Tokenizer` trait object.
/// Load the tokenizer as a generic, backend-agnostic `Tokenizer` trait object.
/// This supports both HuggingFace `tokenizer.json` and tiktoken `.model`/`.tiktoken` files.
/// This supports both HuggingFace `tokenizer.json` and tiktoken `.model`/`.tiktoken` files.
///
/// When the `DYN_TOKENIZER=fastokens` env var is set, uses `fastokens` for encoding
pub
fn
tokenizer
(
&
self
)
->
anyhow
::
Result
<
crate
::
tokenizers
::
Tokenizer
>
{
pub
fn
tokenizer
(
&
self
)
->
anyhow
::
Result
<
crate
::
tokenizers
::
Tokenizer
>
{
let
use_fast
=
match
std
::
env
::
var
(
"DYN_TOKENIZER"
)
{
Ok
(
v
)
if
v
==
"fastokens"
=>
true
,
Ok
(
v
)
if
v
==
"default"
||
v
.is_empty
()
=>
false
,
Ok
(
v
)
=>
{
tracing
::
warn!
(
value
=
%
v
,
"Unrecognized DYN_TOKENIZER value, expected 'fastokens' or 'default'; falling back to default"
);
false
}
Err
(
_
)
=>
false
,
};
match
&
self
.tokenizer
{
match
&
self
.tokenizer
{
Some
(
TokenizerKind
::
HfTokenizerJson
(
checked_file
))
=>
{
Some
(
TokenizerKind
::
HfTokenizerJson
(
checked_file
))
=>
{
let
p
=
checked_file
.path
()
.ok_or_else
(||
{
let
p
=
checked_file
.path
()
.ok_or_else
(||
{
anyhow
::
anyhow!
(
"Tokenizer is URL-backed ({:?})"
,
checked_file
.url
())
anyhow
::
anyhow!
(
"Tokenizer is URL-backed ({:?})"
,
checked_file
.url
())
})
?
;
})
?
;
// Try fastokens backend if requested
if
use_fast
{
if
let
Some
(
path_str
)
=
p
.to_str
()
{
match
crate
::
tokenizers
::
FastTokenizer
::
from_file
(
path_str
)
{
Ok
(
fast
)
=>
{
tracing
::
info!
(
"Using fastokens tokenizer backend"
);
return
Ok
(
crate
::
tokenizers
::
Tokenizer
::
from
(
Arc
::
new
(
fast
)));
}
Err
(
e
)
=>
{
tracing
::
warn!
(
%
e
,
"Failed to load fastokens, falling back to HuggingFace"
);
}
}
}
else
{
tracing
::
warn!
(
path
=
%
p
.display
(),
"Tokenizer path contains non-UTF-8 characters, skipping fastokens; falling back to HuggingFace"
);
}
}
let
hf
=
HfTokenizer
::
from_file
(
p
)
let
hf
=
HfTokenizer
::
from_file
(
p
)
.inspect_err
(|
err
|
{
.inspect_err
(|
err
|
{
if
let
Some
(
serde_err
)
=
err
.downcast_ref
::
<
serde_json
::
Error
>
()
if
let
Some
(
serde_err
)
=
err
.downcast_ref
::
<
serde_json
::
Error
>
()
...
...
lib/llm/src/tokenizers.rs
View file @
da810a26
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: Apache-2.0
pub
mod
fastokens
;
pub
mod
hf
;
pub
mod
hf
;
pub
mod
tiktoken
;
pub
mod
tiktoken
;
...
@@ -15,6 +16,7 @@ use std::{ops::Deref, path::Path};
...
@@ -15,6 +16,7 @@ use std::{ops::Deref, path::Path};
use
crate
::
protocols
::
TokenIdType
;
use
crate
::
protocols
::
TokenIdType
;
pub
use
anyhow
::{
Error
,
Result
};
pub
use
anyhow
::{
Error
,
Result
};
pub
use
fastokens
::
FastTokenizer
;
pub
use
hf
::
HuggingFaceTokenizer
;
pub
use
hf
::
HuggingFaceTokenizer
;
pub
use
tiktoken
::
TikTokenTokenizer
;
pub
use
tiktoken
::
TikTokenTokenizer
;
...
...
lib/llm/src/tokenizers/fastokens.rs
0 → 100644
View file @
da810a26
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Fastokens backend using the `fastokens` crate for high-performance BPE encoding.
//!
//! `fastokens` only supports encoding, so this module provides a hybrid tokenizer that
//! uses `fastokens` for encoding and falls back to `HuggingFaceTokenizer` for decoding.
//! Both are loaded from the same `tokenizer.json` file.
use
std
::
path
::
Path
;
use
rayon
::
prelude
::
*
;
use
super
::{
Encoding
,
Error
,
Result
,
TokenIdType
,
hf
::
HuggingFaceTokenizer
,
traits
::{
Decoder
,
Encoder
,
Tokenizer
},
};
/// Hybrid tokenizer: fast BPE encoding via `fastokens`, decoding via HuggingFace.
///
/// Both backends are loaded from the same `tokenizer.json` file.
pub
struct
FastTokenizer
{
fast_encoder
:
fastokens
::
Tokenizer
,
hf_decoder
:
HuggingFaceTokenizer
,
}
impl
FastTokenizer
{
pub
fn
from_file
(
path
:
&
str
)
->
Result
<
Self
>
{
let
fast_encoder
=
fastokens
::
Tokenizer
::
from_file
(
Path
::
new
(
path
))
.map_err
(|
e
|
Error
::
msg
(
format!
(
"Error loading fastokens tokenizer: {e}"
)))
?
;
let
hf_decoder
=
HuggingFaceTokenizer
::
from_file
(
path
)
?
;
Ok
(
Self
{
fast_encoder
,
hf_decoder
,
})
}
}
impl
Encoder
for
FastTokenizer
{
fn
encode
(
&
self
,
input
:
&
str
)
->
Result
<
Encoding
>
{
let
ids
=
self
.fast_encoder
.encode
(
input
)
.map_err
(|
e
|
Error
::
msg
(
format!
(
"Fastokens encode error: {e}"
)))
?
;
Ok
(
Encoding
::
Sp
(
ids
))
}
fn
encode_batch
(
&
self
,
inputs
:
&
[
&
str
])
->
Result
<
Vec
<
Encoding
>>
{
inputs
.par_iter
()
.map
(|
input
|
self
.encode
(
input
))
.collect
()
}
}
impl
Decoder
for
FastTokenizer
{
fn
decode
(
&
self
,
token_ids
:
&
[
TokenIdType
],
skip_special_tokens
:
bool
)
->
Result
<
String
>
{
self
.hf_decoder
.decode
(
token_ids
,
skip_special_tokens
)
}
}
impl
Tokenizer
for
FastTokenizer
{}
#[cfg(test)]
mod
tests
{
use
super
::
*
;
use
crate
::
tokenizers
::
HuggingFaceTokenizer
;
// Minimal synthetic BPE tokenizer with no normalizer or post-processor --
// compatible with fastokens. Vocab covers: H,T,a,d,e,h,i,l,o,r,s,t,w + punctuation.
const
TOKENIZER_PATH
:
&
str
=
concat!
(
env!
(
"CARGO_MANIFEST_DIR"
),
"/tests/data/sample-models/minimal-bpe/tokenizer.json"
);
#[test]
fn
test_fast_encode_decode_roundtrip
()
{
let
tokenizer
=
FastTokenizer
::
from_file
(
TOKENIZER_PATH
)
.unwrap
();
// Encode then decode: verifies both paths execute without error.
// With a null decoder, HF inserts spaces between tokens so exact equality
// is not expected here -- we just verify the operations succeed and produce
// non-empty results.
let
text
=
"Hello, world!"
;
let
encoding
=
tokenizer
.encode
(
text
)
.unwrap
();
assert
!
(
!
encoding
.token_ids
()
.is_empty
());
let
decoded
=
tokenizer
.decode
(
encoding
.token_ids
(),
true
)
.unwrap
();
assert
!
(
!
decoded
.is_empty
());
// The decoded text should contain the same non-space characters
let
enc_chars
:
String
=
text
.chars
()
.filter
(|
c
|
!
c
.is_whitespace
())
.collect
();
let
dec_chars
:
String
=
decoded
.chars
()
.filter
(|
c
|
!
c
.is_whitespace
())
.collect
();
assert_eq!
(
enc_chars
,
dec_chars
,
"non-space characters must be preserved"
);
}
#[test]
fn
test_fast_matches_hf_encoding
()
{
let
fast
=
FastTokenizer
::
from_file
(
TOKENIZER_PATH
)
.unwrap
();
let
hf
=
HuggingFaceTokenizer
::
from_file
(
TOKENIZER_PATH
)
.unwrap
();
for
text
in
&
[
"Hello, world!"
,
"Hello"
,
" world"
,
"He llo"
]
{
let
fast_ids
=
fast
.encode
(
text
)
.unwrap
();
let
hf_ids
=
hf
.encode
(
text
)
.unwrap
();
assert_eq!
(
fast_ids
.token_ids
(),
hf_ids
.token_ids
(),
"fastokens and HuggingFace must produce identical token IDs for '{text}'"
);
}
}
#[test]
fn
test_fast_batch_encode
()
{
let
tokenizer
=
FastTokenizer
::
from_file
(
TOKENIZER_PATH
)
.unwrap
();
let
inputs
=
&
[
"Hello"
,
" world"
,
"Hello, world!"
];
let
encodings
=
tokenizer
.encode_batch
(
inputs
)
.unwrap
();
assert_eq!
(
encodings
.len
(),
inputs
.len
());
for
(
enc
,
input
)
in
encodings
.iter
()
.zip
(
inputs
.iter
())
{
assert
!
(
!
enc
.token_ids
()
.is_empty
(),
"encoding for '{input}' must be non-empty"
);
}
}
#[test]
fn
test_fast_with_decode_stream
()
{
use
crate
::
tokenizers
::
Tokenizer
as
TokenizerWrapper
;
use
std
::
sync
::
Arc
;
let
tokenizer
=
Arc
::
new
(
FastTokenizer
::
from_file
(
TOKENIZER_PATH
)
.unwrap
());
let
wrapper
=
TokenizerWrapper
::
from
(
tokenizer
);
// Encode a prompt and a continuation, then step through the decode stream
let
prompt_ids
=
wrapper
.encode
(
"Hello"
)
.unwrap
()
.token_ids
()
.to_vec
();
let
continuation
=
", world!"
;
let
cont_ids
=
wrapper
.encode
(
continuation
)
.unwrap
()
.token_ids
()
.to_vec
();
let
mut
stream
=
wrapper
.decode_stream
(
&
prompt_ids
,
true
);
// Accumulate incremental chunks from decode_stream
let
mut
accumulated
=
String
::
new
();
for
id
in
&
cont_ids
{
if
let
Some
(
chunk
)
=
stream
.step
(
*
id
)
.unwrap
()
{
accumulated
.push_str
(
&
chunk
);
}
}
// DecodeStream uses prompt tokens as context, so the expected text is
// decode(prompt + continuation) minus decode(prompt) -- not a bare
// decode(continuation) which lacks the surrounding context.
let
mut
all_ids
=
prompt_ids
.clone
();
all_ids
.extend_from_slice
(
&
cont_ids
);
let
full_text
=
wrapper
.decode
(
&
all_ids
,
true
)
.unwrap
();
let
prompt_text
=
wrapper
.decode
(
&
prompt_ids
,
true
)
.unwrap
();
let
expected
=
&
full_text
[
prompt_text
.len
()
..
];
assert_eq!
(
accumulated
,
expected
,
"streamed chunks must equal context-aware decoded continuation"
);
}
}
lib/llm/tests/data/sample-models/minimal-bpe/tokenizer.json
0 → 100644
View file @
da810a26
{
"version"
:
"1.0"
,
"truncation"
:
null
,
"padding"
:
null
,
"added_tokens"
:
[],
"normalizer"
:
null
,
"pre_tokenizer"
:
null
,
"post_processor"
:
null
,
"decoder"
:
null
,
"model"
:
{
"type"
:
"BPE"
,
"dropout"
:
null
,
"unk_token"
:
"<unk>"
,
"continuing_subword_prefix"
:
null
,
"end_of_word_suffix"
:
null
,
"fuse_unk"
:
false
,
"byte_fallback"
:
false
,
"ignore_merges"
:
false
,
"vocab"
:
{
"<unk>"
:
0
,
" "
:
1
,
"!"
:
2
,
","
:
3
,
"."
:
4
,
"H"
:
5
,
"T"
:
6
,
"a"
:
7
,
"d"
:
8
,
"e"
:
9
,
"h"
:
10
,
"i"
:
11
,
"l"
:
12
,
"o"
:
13
,
"r"
:
14
,
"s"
:
15
,
"t"
:
16
,
"w"
:
17
,
"He"
:
18
,
"ll"
:
19
,
"llo"
:
20
,
"or"
:
21
,
"ld"
:
22
},
"merges"
:
[
"H e"
,
"l l"
,
"ll o"
,
"o r"
,
"l d"
]
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment