Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
66b7d2c7
Unverified
Commit
66b7d2c7
authored
Jul 23, 2025
by
Paul Hendricks
Committed by
GitHub
Jul 23, 2025
Browse files
fix: updates versions and adds ahashmap to BPE (#2072)
parent
f9b1757f
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
170 additions
and
86 deletions
+170
-86
Cargo.lock
Cargo.lock
+82
-49
Cargo.toml
Cargo.toml
+1
-1
lib/bindings/python/Cargo.lock
lib/bindings/python/Cargo.lock
+75
-28
lib/engines/mistralrs/Cargo.toml
lib/engines/mistralrs/Cargo.toml
+1
-1
lib/llm/Cargo.toml
lib/llm/Cargo.toml
+4
-3
lib/llm/src/gguf/gguf_tokenizer.rs
lib/llm/src/gguf/gguf_tokenizer.rs
+7
-4
No files found.
Cargo.lock
View file @
66b7d2c7
...
@@ -36,6 +36,7 @@ dependencies = [
...
@@ -36,6 +36,7 @@ dependencies = [
"cfg-if 1.0.0",
"cfg-if 1.0.0",
"getrandom 0.3.2",
"getrandom 0.3.2",
"once_cell",
"once_cell",
"serde",
"version_check",
"version_check",
"zerocopy",
"zerocopy",
]
]
...
@@ -897,6 +898,15 @@ version = "0.3.0"
...
@@ -897,6 +898,15 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "castaway"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a"
dependencies = [
"rustversion",
]
[[package]]
[[package]]
name = "cbindgen"
name = "cbindgen"
version = "0.27.0"
version = "0.27.0"
...
@@ -1077,6 +1087,21 @@ version = "1.0.3"
...
@@ -1077,6 +1087,21 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "compact_str"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a"
dependencies = [
"castaway",
"cfg-if 1.0.0",
"itoa",
"rustversion",
"ryu",
"serde",
"static_assertions",
]
[[package]]
[[package]]
name = "console"
name = "console"
version = "0.15.11"
version = "0.15.11"
...
@@ -1488,6 +1513,15 @@ dependencies = [
...
@@ -1488,6 +1513,15 @@ dependencies = [
"syn 2.0.100",
"syn 2.0.100",
]
]
[[package]]
name = "dary_heap"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728"
dependencies = [
"serde",
]
[[package]]
[[package]]
name = "dashmap"
name = "dashmap"
version = "5.5.3"
version = "5.5.3"
...
@@ -1824,6 +1858,7 @@ dependencies = [
...
@@ -1824,6 +1858,7 @@ dependencies = [
name = "dynamo-llm"
name = "dynamo-llm"
version = "0.3.2"
version = "0.3.2"
dependencies = [
dependencies = [
"ahash",
"akin",
"akin",
"aligned-vec",
"aligned-vec",
"anyhow",
"anyhow",
...
@@ -1885,8 +1920,8 @@ dependencies = [
...
@@ -1885,8 +1920,8 @@ dependencies = [
"tokio",
"tokio",
"tokio-stream",
"tokio-stream",
"tokio-util",
"tokio-util",
"toktrie
0.6.31
",
"toktrie
1.1.0
",
"toktrie_hf_tokenizers
0.6.31
",
"toktrie_hf_tokenizers
1.1.0
",
"tracing",
"tracing",
"unicode-segmentation",
"unicode-segmentation",
"url",
"url",
...
@@ -3541,15 +3576,6 @@ dependencies = [
...
@@ -3541,15 +3576,6 @@ dependencies = [
"either",
"either",
]
]
[[package]]
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]]
[[package]]
name = "itertools"
name = "itertools"
version = "0.12.1"
version = "0.12.1"
...
@@ -3777,8 +3803,8 @@ dependencies = [
...
@@ -3777,8 +3803,8 @@ dependencies = [
[[package]]
[[package]]
name = "llguidance"
name = "llguidance"
version = "
0.7.29
"
version = "
1.0.0
"
source = "git+https://github.com/guidance-ai/llguidance.git?rev=
2ce5ab8#2ce5ab8196f16dd8beba5a3d874eb1ab74e0268c
"
source = "git+https://github.com/guidance-ai/llguidance.git?rev=
c432092#c432092d37b8ccd1afeeff3e7f9c9a29aae0a87e
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"derivre",
"derivre",
...
@@ -3786,7 +3812,7 @@ dependencies = [
...
@@ -3786,7 +3812,7 @@ dependencies = [
"regex-syntax 0.8.5",
"regex-syntax 0.8.5",
"serde",
"serde",
"serde_json",
"serde_json",
"toktrie
0.7.29
",
"toktrie
1.0.0
",
]
]
[[package]]
[[package]]
...
@@ -4104,7 +4130,7 @@ dependencies = [
...
@@ -4104,7 +4130,7 @@ dependencies = [
[[package]]
[[package]]
name = "mistralrs"
name = "mistralrs"
version = "0.6.0"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git
#d38a7e198469eb88e883e36d153437c1fb326315
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git
?rev=8a4faf3#8a4faf312069cf87b215c6c79e48a44e17b71062
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"candle-core 0.8.0",
"candle-core 0.8.0",
...
@@ -4126,7 +4152,7 @@ dependencies = [
...
@@ -4126,7 +4152,7 @@ dependencies = [
[[package]]
[[package]]
name = "mistralrs-audio"
name = "mistralrs-audio"
version = "0.6.0"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git
#d38a7e198469eb88e883e36d153437c1fb326315
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git
?rev=8a4faf3#8a4faf312069cf87b215c6c79e48a44e17b71062
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"apodize",
"apodize",
...
@@ -4137,7 +4163,7 @@ dependencies = [
...
@@ -4137,7 +4163,7 @@ dependencies = [
[[package]]
[[package]]
name = "mistralrs-core"
name = "mistralrs-core"
version = "0.6.0"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git
#d38a7e198469eb88e883e36d153437c1fb326315
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git
?rev=8a4faf3#8a4faf312069cf87b215c6c79e48a44e17b71062
"
dependencies = [
dependencies = [
"ahash",
"ahash",
"akin",
"akin",
...
@@ -4217,7 +4243,7 @@ dependencies = [
...
@@ -4217,7 +4243,7 @@ dependencies = [
"tokio",
"tokio",
"tokio-rayon",
"tokio-rayon",
"tokio-tungstenite",
"tokio-tungstenite",
"toktrie_hf_tokenizers
0.7.29
",
"toktrie_hf_tokenizers
1.0.0
",
"toml",
"toml",
"tqdm",
"tqdm",
"tracing",
"tracing",
...
@@ -4231,7 +4257,7 @@ dependencies = [
...
@@ -4231,7 +4257,7 @@ dependencies = [
[[package]]
[[package]]
name = "mistralrs-mcp"
name = "mistralrs-mcp"
version = "0.6.0"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git
#d38a7e198469eb88e883e36d153437c1fb326315
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git
?rev=8a4faf3#8a4faf312069cf87b215c6c79e48a44e17b71062
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"async-trait",
"async-trait",
...
@@ -4251,7 +4277,7 @@ dependencies = [
...
@@ -4251,7 +4277,7 @@ dependencies = [
[[package]]
[[package]]
name = "mistralrs-paged-attn"
name = "mistralrs-paged-attn"
version = "0.6.0"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git
#d38a7e198469eb88e883e36d153437c1fb326315
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git
?rev=8a4faf3#8a4faf312069cf87b215c6c79e48a44e17b71062
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"bindgen_cuda 0.1.6",
"bindgen_cuda 0.1.6",
...
@@ -4266,7 +4292,7 @@ dependencies = [
...
@@ -4266,7 +4292,7 @@ dependencies = [
[[package]]
[[package]]
name = "mistralrs-quant"
name = "mistralrs-quant"
version = "0.6.0"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git
#d38a7e198469eb88e883e36d153437c1fb326315
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git
?rev=8a4faf3#8a4faf312069cf87b215c6c79e48a44e17b71062
"
dependencies = [
dependencies = [
"bindgen_cuda 0.1.6",
"bindgen_cuda 0.1.6",
"byteorder",
"byteorder",
...
@@ -4294,7 +4320,7 @@ dependencies = [
...
@@ -4294,7 +4320,7 @@ dependencies = [
[[package]]
[[package]]
name = "mistralrs-vision"
name = "mistralrs-vision"
version = "0.6.0"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git
#d38a7e198469eb88e883e36d153437c1fb326315
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git
?rev=8a4faf3#8a4faf312069cf87b215c6c79e48a44e17b71062
"
dependencies = [
dependencies = [
"candle-core 0.8.0",
"candle-core 0.8.0",
"image",
"image",
...
@@ -4695,11 +4721,11 @@ checksum = "b4ce411919553d3f9fa53a0880544cda985a112117a0444d5ff1e870a893d6ea"
...
@@ -4695,11 +4721,11 @@ checksum = "b4ce411919553d3f9fa53a0880544cda985a112117a0444d5ff1e870a893d6ea"
[[package]]
[[package]]
name = "onig"
name = "onig"
version = "6.
4.0
"
version = "6.
5.1
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f
"
checksum = "
336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0
"
dependencies = [
dependencies = [
"bitflags
1.3.2
",
"bitflags
2.9.0
",
"libc",
"libc",
"once_cell",
"once_cell",
"onig_sys",
"onig_sys",
...
@@ -4707,9 +4733,9 @@ dependencies = [
...
@@ -4707,9 +4733,9 @@ dependencies = [
[[package]]
[[package]]
name = "onig_sys"
name = "onig_sys"
version = "69.
8
.1"
version = "69.
9
.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c3
5b1f
086e7
"
checksum = "
c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d
5b
f
1f
5f8b7f6727dc
"
dependencies = [
dependencies = [
"cc",
"cc",
"pkg-config",
"pkg-config",
...
@@ -5519,12 +5545,12 @@ dependencies = [
...
@@ -5519,12 +5545,12 @@ dependencies = [
[[package]]
[[package]]
name = "rayon-cond"
name = "rayon-cond"
version = "0.
3
.0"
version = "0.
4
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9
"
checksum = "
2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f
"
dependencies = [
dependencies = [
"either",
"either",
"itertools 0.1
1
.0",
"itertools 0.1
4
.0",
"rayon",
"rayon",
]
]
...
@@ -6541,6 +6567,12 @@ version = "1.2.0"
...
@@ -6541,6 +6567,12 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
[[package]]
name = "stop-words"
name = "stop-words"
version = "0.8.1"
version = "0.8.1"
...
@@ -7071,25 +7103,26 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
...
@@ -7071,25 +7103,26 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
[[package]]
name = "tokenizers"
name = "tokenizers"
version = "0.21.
1
"
version = "0.21.
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
3169b3195f925496c895caee7978a335d49218488ef22375267fba5a46a40bd7
"
checksum = "
4c3846d8588abed0daba25a0e47edd58ea15e450a6088b2575f5116fdb0b27ca
"
dependencies = [
dependencies = [
"ahash",
"aho-corasick",
"aho-corasick",
"compact_str",
"dary_heap",
"derive_builder",
"derive_builder",
"esaxx-rs",
"esaxx-rs",
"fancy-regex",
"fancy-regex",
"getrandom 0.
2.16
",
"getrandom 0.
3.2
",
"hf-hub",
"hf-hub",
"indicatif",
"itertools 0.14.0",
"itertools 0.13.0",
"lazy_static",
"log",
"log",
"macro_rules_attribute",
"macro_rules_attribute",
"monostate",
"monostate",
"onig",
"onig",
"paste",
"paste",
"rand 0.
8.5
",
"rand 0.
9.1
",
"rayon",
"rayon",
"rayon-cond",
"rayon-cond",
"regex",
"regex",
...
@@ -7215,9 +7248,8 @@ dependencies = [
...
@@ -7215,9 +7248,8 @@ dependencies = [
[[package]]
[[package]]
name = "toktrie"
name = "toktrie"
version = "0.6.31"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "git+https://github.com/guidance-ai/llguidance.git?rev=c432092#c432092d37b8ccd1afeeff3e7f9c9a29aae0a87e"
checksum = "32dfaa37eed7e440ee93b236b736600ccbbfa47e4a0b15f0b74a4d3f4cf9b5a3"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"bytemuck",
"bytemuck",
...
@@ -7228,8 +7260,9 @@ dependencies = [
...
@@ -7228,8 +7260,9 @@ dependencies = [
[[package]]
[[package]]
name = "toktrie"
name = "toktrie"
version = "0.7.29"
version = "1.1.0"
source = "git+https://github.com/guidance-ai/llguidance.git?rev=2ce5ab8#2ce5ab8196f16dd8beba5a3d874eb1ab74e0268c"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "747b19d4f97f841cc720aaffb1fa3dbf08bc72abd9199dcf34b0fad7b1a3691c"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"bytemuck",
"bytemuck",
...
@@ -7240,29 +7273,29 @@ dependencies = [
...
@@ -7240,29 +7273,29 @@ dependencies = [
[[package]]
[[package]]
name = "toktrie_hf_tokenizers"
name = "toktrie_hf_tokenizers"
version = "0.6.31"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "git+https://github.com/guidance-ai/llguidance.git?rev=c432092#c432092d37b8ccd1afeeff3e7f9c9a29aae0a87e"
checksum = "8a5e426e5cfb8237a7244bc53e6136022987a16d66d07050ff81c0a56c5a8a25"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"log",
"log",
"serde",
"serde",
"serde_json",
"serde_json",
"tokenizers",
"tokenizers",
"toktrie
0.6.31
",
"toktrie
1.0.0
",
]
]
[[package]]
[[package]]
name = "toktrie_hf_tokenizers"
name = "toktrie_hf_tokenizers"
version = "0.7.29"
version = "1.1.0"
source = "git+https://github.com/guidance-ai/llguidance.git?rev=2ce5ab8#2ce5ab8196f16dd8beba5a3d874eb1ab74e0268c"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d77f942aa9bcd67f39dfeec0d5b80a40ae32e5ae38c0c58777b7d47fa393177f"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"log",
"log",
"serde",
"serde",
"serde_json",
"serde_json",
"tokenizers",
"tokenizers",
"toktrie
0.7.29
",
"toktrie
1.1.0
",
]
]
[[package]]
[[package]]
...
...
Cargo.toml
View file @
66b7d2c7
...
@@ -45,7 +45,7 @@ derive-getters = { version = "0.5" }
...
@@ -45,7 +45,7 @@ derive-getters = { version = "0.5" }
either
=
{
version
=
"1.13"
,
features
=
["serde"]
}
either
=
{
version
=
"1.13"
,
features
=
["serde"]
}
etcd-client
=
{
version
=
"0.14"
,
features
=
["tls"]
}
etcd-client
=
{
version
=
"0.14"
,
features
=
["tls"]
}
futures
=
{
version
=
"0.3"
}
futures
=
{
version
=
"0.3"
}
hf-hub
=
{
version
=
"0.4.2"
,
default-features
=
false
,
features
=
[
"tokio"
,
"rustls-tls"
]
}
hf-hub
=
{
version
=
"0.4.2"
,
default-features
=
false
,
features
=
[
"tokio"
,
"rustls-tls"
,
"ureq"
]
}
humantime
=
{
version
=
"2.2.0"
}
humantime
=
{
version
=
"2.2.0"
}
libc
=
{
version
=
"0.2"
}
libc
=
{
version
=
"0.2"
}
oneshot
=
{
version
=
"0.1.11"
,
features
=
[
"std"
,
"async"
]
}
oneshot
=
{
version
=
"0.1.11"
,
features
=
[
"std"
,
"async"
]
}
...
...
lib/bindings/python/Cargo.lock
View file @
66b7d2c7
...
@@ -17,6 +17,20 @@ version = "2.0.0"
...
@@ -17,6 +17,20 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
[[package]]
name = "ahash"
version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [
"cfg-if 1.0.0",
"getrandom 0.3.2",
"once_cell",
"serde",
"version_check",
"zerocopy",
]
[[package]]
[[package]]
name = "aho-corasick"
name = "aho-corasick"
version = "1.1.3"
version = "1.1.3"
...
@@ -593,6 +607,15 @@ dependencies = [
...
@@ -593,6 +607,15 @@ dependencies = [
"zip",
"zip",
]
]
[[package]]
name = "castaway"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a"
dependencies = [
"rustversion",
]
[[package]]
[[package]]
name = "cc"
name = "cc"
version = "1.2.24"
version = "1.2.24"
...
@@ -698,6 +721,21 @@ version = "1.0.3"
...
@@ -698,6 +721,21 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "compact_str"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a"
dependencies = [
"castaway",
"cfg-if 1.0.0",
"itoa",
"rustversion",
"ryu",
"serde",
"static_assertions",
]
[[package]]
[[package]]
name = "concurrent-queue"
name = "concurrent-queue"
version = "2.5.0"
version = "2.5.0"
...
@@ -918,6 +956,15 @@ dependencies = [
...
@@ -918,6 +956,15 @@ dependencies = [
"syn 2.0.100",
"syn 2.0.100",
]
]
[[package]]
name = "dary_heap"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728"
dependencies = [
"serde",
]
[[package]]
[[package]]
name = "dashmap"
name = "dashmap"
version = "5.5.3"
version = "5.5.3"
...
@@ -1124,6 +1171,7 @@ dependencies = [
...
@@ -1124,6 +1171,7 @@ dependencies = [
name = "dynamo-llm"
name = "dynamo-llm"
version = "0.3.2"
version = "0.3.2"
dependencies = [
dependencies = [
"ahash",
"akin",
"akin",
"anyhow",
"anyhow",
"async-nats",
"async-nats",
...
@@ -2425,15 +2473,6 @@ version = "1.70.1"
...
@@ -2425,15 +2473,6 @@ version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]]
[[package]]
name = "itertools"
name = "itertools"
version = "0.13.0"
version = "0.13.0"
...
@@ -3097,11 +3136,11 @@ checksum = "b4ce411919553d3f9fa53a0880544cda985a112117a0444d5ff1e870a893d6ea"
...
@@ -3097,11 +3136,11 @@ checksum = "b4ce411919553d3f9fa53a0880544cda985a112117a0444d5ff1e870a893d6ea"
[[package]]
[[package]]
name = "onig"
name = "onig"
version = "6.
4.0
"
version = "6.
5.1
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f
"
checksum = "
336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0
"
dependencies = [
dependencies = [
"bitflags
1.3.2
",
"bitflags
2.9.0
",
"libc",
"libc",
"once_cell",
"once_cell",
"onig_sys",
"onig_sys",
...
@@ -3109,9 +3148,9 @@ dependencies = [
...
@@ -3109,9 +3148,9 @@ dependencies = [
[[package]]
[[package]]
name = "onig_sys"
name = "onig_sys"
version = "69.
8
.1"
version = "69.
9
.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c3
5b1f
086e7
"
checksum = "
c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d
5b
f
1f
5f8b7f6727dc
"
dependencies = [
dependencies = [
"cc",
"cc",
"pkg-config",
"pkg-config",
...
@@ -3758,12 +3797,12 @@ dependencies = [
...
@@ -3758,12 +3797,12 @@ dependencies = [
[[package]]
[[package]]
name = "rayon-cond"
name = "rayon-cond"
version = "0.
3
.0"
version = "0.
4
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9
"
checksum = "
2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f
"
dependencies = [
dependencies = [
"either",
"either",
"itertools 0.1
1
.0",
"itertools 0.1
4
.0",
"rayon",
"rayon",
]
]
...
@@ -4378,6 +4417,12 @@ version = "1.2.0"
...
@@ -4378,6 +4417,12 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
[[package]]
name = "strsim"
name = "strsim"
version = "0.11.1"
version = "0.11.1"
...
@@ -4645,24 +4690,26 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
...
@@ -4645,24 +4690,26 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
[[package]]
name = "tokenizers"
name = "tokenizers"
version = "0.21.
1
"
version = "0.21.
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
3169b3195f925496c895caee7978a335d49218488ef22375267fba5a46a40bd7
"
checksum = "
4c3846d8588abed0daba25a0e47edd58ea15e450a6088b2575f5116fdb0b27ca
"
dependencies = [
dependencies = [
"ahash",
"aho-corasick",
"aho-corasick",
"compact_str",
"dary_heap",
"derive_builder",
"derive_builder",
"esaxx-rs",
"esaxx-rs",
"getrandom 0.2.16",
"fancy-regex",
"getrandom 0.3.2",
"hf-hub",
"hf-hub",
"indicatif",
"itertools 0.14.0",
"itertools 0.13.0",
"lazy_static",
"log",
"log",
"macro_rules_attribute",
"macro_rules_attribute",
"monostate",
"monostate",
"onig",
"onig",
"paste",
"paste",
"rand 0.
8.5
",
"rand 0.
9.1
",
"rayon",
"rayon",
"rayon-cond",
"rayon-cond",
"regex",
"regex",
...
@@ -4765,9 +4812,9 @@ dependencies = [
...
@@ -4765,9 +4812,9 @@ dependencies = [
[[package]]
[[package]]
name = "toktrie"
name = "toktrie"
version = "
0.6.31
"
version = "
1.1.0
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
32dfaa37eed7e440ee93b236b736600ccbbfa47e4a0b15f0b74a4d3f4cf9b5a3
"
checksum = "
747b19d4f97f841cc720aaffb1fa3dbf08bc72abd9199dcf34b0fad7b1a3691c
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"bytemuck",
"bytemuck",
...
@@ -4778,9 +4825,9 @@ dependencies = [
...
@@ -4778,9 +4825,9 @@ dependencies = [
[[package]]
[[package]]
name = "toktrie_hf_tokenizers"
name = "toktrie_hf_tokenizers"
version = "
0.6.31
"
version = "
1.1.0
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
8a5e426e5cfb8237a7244bc53e6136022987a16d66d07050ff81c0a56c5a8a25
"
checksum = "
d77f942aa9bcd67f39dfeec0d5b80a40ae32e5ae38c0c58777b7d47fa393177f
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"log",
"log",
...
...
lib/engines/mistralrs/Cargo.toml
View file @
66b7d2c7
...
@@ -39,7 +39,7 @@ async-stream = { workspace = true }
...
@@ -39,7 +39,7 @@ async-stream = { workspace = true }
async-trait
=
{
workspace
=
true
}
async-trait
=
{
workspace
=
true
}
either
=
{
workspace
=
true
}
either
=
{
workspace
=
true
}
indexmap
=
{
version
=
"2.9.0"
,
features
=
["serde"]
}
indexmap
=
{
version
=
"2.9.0"
,
features
=
["serde"]
}
mistralrs
=
{
git
=
"https://github.com/EricLBuehler/mistral.rs.git"
,
version
=
"0.6.0"
}
mistralrs
=
{
git
=
"https://github.com/EricLBuehler/mistral.rs.git"
,
version
=
"0.6.0"
,
rev
=
"8a4faf3"
}
serde_json
=
{
workspace
=
true
}
serde_json
=
{
workspace
=
true
}
tokio
=
{
workspace
=
true
}
tokio
=
{
workspace
=
true
}
tracing
=
{
workspace
=
true
}
tracing
=
{
workspace
=
true
}
lib/llm/Cargo.toml
View file @
66b7d2c7
...
@@ -101,7 +101,7 @@ unicode-segmentation = "1.12"
...
@@ -101,7 +101,7 @@ unicode-segmentation = "1.12"
axum
=
{
workspace
=
true
}
axum
=
{
workspace
=
true
}
# tokenizers
# tokenizers
tokenizers
=
{
version
=
"0.21.
1
"
,
default-features
=
false
,
features
=
[
tokenizers
=
{
version
=
"0.21.
2
"
,
default-features
=
false
,
features
=
[
"onig"
,
"onig"
,
"esaxx_fast"
,
"esaxx_fast"
,
"rustls-tls"
,
"rustls-tls"
,
...
@@ -110,8 +110,8 @@ sentencepiece = { version = "0.11.2", optional = true }
...
@@ -110,8 +110,8 @@ sentencepiece = { version = "0.11.2", optional = true }
# backend
# backend
galil-seiferas
=
{
version
=
"0.1"
}
galil-seiferas
=
{
version
=
"0.1"
}
toktrie
=
{
version
=
"
0.6.28
"
}
toktrie
=
{
version
=
"
1.1
"
}
toktrie_hf_tokenizers
=
{
version
=
"
0.6.28
"
}
toktrie_hf_tokenizers
=
{
version
=
"
1.1
"
}
# preprocessor
# preprocessor
bs62
=
{
version
=
"0.1"
}
bs62
=
{
version
=
"0.1"
}
...
@@ -127,6 +127,7 @@ memmap2 = "0.9.5"
...
@@ -127,6 +127,7 @@ memmap2 = "0.9.5"
# Publishers
# Publishers
zeromq
=
"0.4.1"
zeromq
=
"0.4.1"
rmp-serde
=
"1.3"
rmp-serde
=
"1.3"
ahash
=
"0.8.12"
[dev-dependencies]
[dev-dependencies]
approx
=
"0.5"
approx
=
"0.5"
...
...
lib/llm/src/gguf/gguf_tokenizer.rs
View file @
66b7d2c7
...
@@ -27,8 +27,7 @@
...
@@ -27,8 +27,7 @@
// https://github.com/huggingface/transformers/blob/8685b3c5d2dd2550527773d2a02499495a759e31/src/transformers/convert_slow_tokenizer.py
// https://github.com/huggingface/transformers/blob/8685b3c5d2dd2550527773d2a02499495a759e31/src/transformers/convert_slow_tokenizer.py
use
std
::
collections
::
HashMap
;
use
ahash
::
AHashMap
;
use
anyhow
::
Result
;
use
anyhow
::
Result
;
use
itertools
::
Itertools
;
use
itertools
::
Itertools
;
use
tokenizers
::{
use
tokenizers
::{
...
@@ -236,7 +235,8 @@ fn bpe_tokenizer(p: &PropsGGUF) -> Result<(Tokenizer, TokenizerKind, AddedTokens
...
@@ -236,7 +235,8 @@ fn bpe_tokenizer(p: &PropsGGUF) -> Result<(Tokenizer, TokenizerKind, AddedTokens
})
})
.collect
::
<
Vec
<
_
>>
();
.collect
::
<
Vec
<
_
>>
();
let
mut
vocab
=
HashMap
::
new
();
// Use ahash::AHashMap so that we satisfy Into<AHashMap<_>> bounds
let
mut
vocab
:
AHashMap
<
String
,
u32
>
=
AHashMap
::
new
();
for
(
i
,
token
)
in
p
.tokens
.iter
()
.enumerate
()
{
for
(
i
,
token
)
in
p
.tokens
.iter
()
.enumerate
()
{
#[allow(clippy::cast_possible_truncation)]
#[allow(clippy::cast_possible_truncation)]
vocab
.insert
(
token
.clone
(),
i
as
u32
);
vocab
.insert
(
token
.clone
(),
i
as
u32
);
...
@@ -266,7 +266,10 @@ fn bpe_tokenizer(p: &PropsGGUF) -> Result<(Tokenizer, TokenizerKind, AddedTokens
...
@@ -266,7 +266,10 @@ fn bpe_tokenizer(p: &PropsGGUF) -> Result<(Tokenizer, TokenizerKind, AddedTokens
false
,
true
,
true
,
false
,
true
,
true
,
)));
)));
if
add_bos_token
.is_some_and
(|
x
|
x
)
{
if
add_bos_token
.is_some_and
(|
x
|
x
)
{
let
mut
special_toks
=
HashMap
::
new
();
// Use ahash::AHashMap so that we satisfy Into<AHashMap<_>> bounds
let
mut
special_toks
:
AHashMap
<
String
,
processors
::
template
::
SpecialToken
>
=
AHashMap
::
new
();
special_toks
.insert
(
special_toks
.insert
(
p
.tokens
[
bos
as
usize
]
.clone
(),
p
.tokens
[
bos
as
usize
]
.clone
(),
template
::
SpecialToken
::
new
(
template
::
SpecialToken
::
new
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment