Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
da3b1dbd
Unverified
Commit
da3b1dbd
authored
Sep 24, 2025
by
Hyunjae Woo
Committed by
GitHub
Sep 24, 2025
Browse files
feat: modelexpress dynamo integration (#3191)
parent
5323b61f
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
427 additions
and
335 deletions
+427
-335
Cargo.lock
Cargo.lock
+291
-54
Cargo.toml
Cargo.toml
+35
-9
container/Dockerfile
container/Dockerfile
+19
-1
lib/llm/Cargo.toml
lib/llm/Cargo.toml
+22
-35
lib/llm/src/hub.rs
lib/llm/src/hub.rs
+60
-236
No files found.
Cargo.lock
View file @
da3b1dbd
...
@@ -141,6 +141,9 @@ name = "anyhow"
...
@@ -141,6 +141,9 @@ name = "anyhow"
version = "1.0.99"
version = "1.0.99"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
dependencies = [
"backtrace",
]
[[package]]
[[package]]
name = "apodize"
name = "apodize"
...
@@ -183,6 +186,12 @@ dependencies = [
...
@@ -183,6 +186,12 @@ dependencies = [
"syn 2.0.106",
"syn 2.0.106",
]
]
[[package]]
name = "arraydeque"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236"
[[package]]
[[package]]
name = "arrayref"
name = "arrayref"
version = "0.3.9"
version = "0.3.9"
...
@@ -798,7 +807,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -798,7 +807,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [
dependencies = [
"memchr",
"memchr",
"regex-automata
0.4.10
",
"regex-automata",
"serde",
"serde",
]
]
...
@@ -1011,7 +1020,7 @@ dependencies = [
...
@@ -1011,7 +1020,7 @@ dependencies = [
"serde_json",
"serde_json",
"syn 2.0.106",
"syn 2.0.106",
"tempfile",
"tempfile",
"toml",
"toml
0.8.23
",
]
]
[[package]]
[[package]]
...
@@ -1175,6 +1184,15 @@ version = "1.0.4"
...
@@ -1175,6 +1184,15 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "colored"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fde0e0ec90c9dfb3b4b1a0891a7dcd0e2bffde2f7efed5fe7c9bb00e5bfb915e"
dependencies = [
"windows-sys 0.59.0",
]
[[package]]
[[package]]
name = "compact_str"
name = "compact_str"
version = "0.9.0"
version = "0.9.0"
...
@@ -1190,6 +1208,26 @@ dependencies = [
...
@@ -1190,6 +1208,26 @@ dependencies = [
"static_assertions",
"static_assertions",
]
]
[[package]]
name = "config"
version = "0.15.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0faa974509d38b33ff89282db9c3295707ccf031727c0de9772038ec526852ba"
dependencies = [
"async-trait",
"convert_case",
"json5",
"pathdiff",
"ron",
"rust-ini",
"serde",
"serde-untagged",
"serde_json",
"toml 0.9.5",
"winnow",
"yaml-rust2",
]
[[package]]
[[package]]
name = "console"
name = "console"
version = "0.15.11"
version = "0.15.11"
...
@@ -1248,12 +1286,41 @@ version = "0.9.6"
...
@@ -1248,12 +1286,41 @@ version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
[[package]]
name = "const-random"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359"
dependencies = [
"const-random-macro",
]
[[package]]
name = "const-random-macro"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
dependencies = [
"getrandom 0.2.16",
"once_cell",
"tiny-keccak",
]
[[package]]
[[package]]
name = "constant_time_eq"
name = "constant_time_eq"
version = "0.3.1"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
[[package]]
name = "convert_case"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca"
dependencies = [
"unicode-segmentation",
]
[[package]]
[[package]]
name = "core-foundation"
name = "core-foundation"
version = "0.9.4"
version = "0.9.4"
...
@@ -1782,7 +1849,7 @@ dependencies = [
...
@@ -1782,7 +1849,7 @@ dependencies = [
"bytemuck",
"bytemuck",
"bytemuck_derive",
"bytemuck_derive",
"hashbrown 0.15.5",
"hashbrown 0.15.5",
"regex-syntax
0.8.6
",
"regex-syntax",
"strum",
"strum",
]
]
...
@@ -1866,6 +1933,15 @@ dependencies = [
...
@@ -1866,6 +1933,15 @@ dependencies = [
"syn 2.0.106",
"syn 2.0.106",
]
]
[[package]]
name = "dlv-list"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f"
dependencies = [
"const-random",
]
[[package]]
[[package]]
name = "doctest-file"
name = "doctest-file"
version = "1.0.0"
version = "1.0.0"
...
@@ -2025,6 +2101,8 @@ dependencies = [
...
@@ -2025,6 +2101,8 @@ dependencies = [
"memmap2",
"memmap2",
"minijinja",
"minijinja",
"minijinja-contrib",
"minijinja-contrib",
"modelexpress-client",
"modelexpress-common",
"ndarray",
"ndarray",
"nix 0.26.4",
"nix 0.26.4",
"nixl-sys",
"nixl-sys",
...
@@ -2448,8 +2526,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -2448,8 +2526,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
dependencies = [
dependencies = [
"bit-set 0.5.3",
"bit-set 0.5.3",
"regex-automata
0.4.10
",
"regex-automata",
"regex-syntax
0.8.6
",
"regex-syntax",
]
]
[[package]]
[[package]]
...
@@ -2459,8 +2537,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -2459,8 +2537,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
dependencies = [
dependencies = [
"bit-set 0.8.0",
"bit-set 0.8.0",
"regex-automata
0.4.10
",
"regex-automata",
"regex-syntax
0.8.6
",
"regex-syntax",
]
]
[[package]]
[[package]]
...
@@ -2496,7 +2574,7 @@ dependencies = [
...
@@ -2496,7 +2574,7 @@ dependencies = [
"serde",
"serde",
"serde_json",
"serde_json",
"tempfile",
"tempfile",
"toml",
"toml
0.8.23
",
"uncased",
"uncased",
"version_check",
"version_check",
]
]
...
@@ -3104,8 +3182,8 @@ dependencies = [
...
@@ -3104,8 +3182,8 @@ dependencies = [
"aho-corasick",
"aho-corasick",
"bstr",
"bstr",
"log",
"log",
"regex-automata
0.4.10
",
"regex-automata",
"regex-syntax
0.8.6
",
"regex-syntax",
]
]
[[package]]
[[package]]
...
@@ -3192,6 +3270,15 @@ dependencies = [
...
@@ -3192,6 +3270,15 @@ dependencies = [
"foldhash",
"foldhash",
]
]
[[package]]
name = "hashlink"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
dependencies = [
"hashbrown 0.15.5",
]
[[package]]
[[package]]
name = "hdrhistogram"
name = "hdrhistogram"
version = "7.5.4"
version = "7.5.4"
...
@@ -3895,10 +3982,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -3895,10 +3982,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49"
checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49"
dependencies = [
dependencies = [
"jiff-static",
"jiff-static",
"jiff-tzdb-platform",
"log",
"log",
"portable-atomic",
"portable-atomic",
"portable-atomic-util",
"portable-atomic-util",
"serde",
"serde",
"windows-sys 0.59.0",
]
]
[[package]]
[[package]]
...
@@ -3912,6 +4001,21 @@ dependencies = [
...
@@ -3912,6 +4001,21 @@ dependencies = [
"syn 2.0.106",
"syn 2.0.106",
]
]
[[package]]
name = "jiff-tzdb"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1283705eb0a21404d2bfd6eef2a7593d240bc42a0bdb39db0ad6fa2ec026524"
[[package]]
name = "jiff-tzdb-platform"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8"
dependencies = [
"jiff-tzdb",
]
[[package]]
[[package]]
name = "jobserver"
name = "jobserver"
version = "0.1.34"
version = "0.1.34"
...
@@ -3948,6 +4052,17 @@ dependencies = [
...
@@ -3948,6 +4052,17 @@ dependencies = [
"unicode-general-category",
"unicode-general-category",
]
]
[[package]]
name = "json5"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96b0db21af676c1ce64250b5f40f3ce2cf27e4e47cb91ed91eb6fe9350b430c1"
dependencies = [
"pest",
"pest_derive",
"serde",
]
[[package]]
[[package]]
name = "jsonschema"
name = "jsonschema"
version = "0.17.1"
version = "0.17.1"
...
@@ -4138,7 +4253,7 @@ dependencies = [
...
@@ -4138,7 +4253,7 @@ dependencies = [
"anyhow",
"anyhow",
"derivre",
"derivre",
"indexmap 2.11.0",
"indexmap 2.11.0",
"regex-syntax
0.8.6
",
"regex-syntax",
"serde",
"serde",
"serde_json",
"serde_json",
"toktrie 1.0.0",
"toktrie 1.0.0",
...
@@ -4327,11 +4442,11 @@ dependencies = [
...
@@ -4327,11 +4442,11 @@ dependencies = [
[[package]]
[[package]]
name = "matchers"
name = "matchers"
version = "0.
1
.0"
version = "0.
2
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558
"
checksum = "
d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9
"
dependencies = [
dependencies = [
"regex-automata
0.1.10
",
"regex-automata",
]
]
[[package]]
[[package]]
...
@@ -4649,7 +4764,7 @@ dependencies = [
...
@@ -4649,7 +4764,7 @@ dependencies = [
"rand_isaac",
"rand_isaac",
"rayon",
"rayon",
"regex",
"regex",
"regex-automata
0.4.10
",
"regex-automata",
"reqwest 0.12.23",
"reqwest 0.12.23",
"rubato",
"rubato",
"rust-mcp-schema",
"rust-mcp-schema",
...
@@ -4673,7 +4788,7 @@ dependencies = [
...
@@ -4673,7 +4788,7 @@ dependencies = [
"tokio-rayon",
"tokio-rayon",
"tokio-tungstenite 0.24.0",
"tokio-tungstenite 0.24.0",
"toktrie_hf_tokenizers 1.0.0",
"toktrie_hf_tokenizers 1.0.0",
"toml",
"toml
0.8.23
",
"tqdm",
"tqdm",
"tracing",
"tracing",
"tracing-subscriber",
"tracing-subscriber",
...
@@ -4756,6 +4871,50 @@ dependencies = [
...
@@ -4756,6 +4871,50 @@ dependencies = [
"rayon",
"rayon",
]
]
[[package]]
name = "modelexpress-client"
version = "0.1.0"
source = "git+https://github.com/ai-dynamo/modelexpress.git?rev=a232220bf268a475d293914d407f4ae186f443e3#a232220bf268a475d293914d407f4ae186f443e3"
dependencies = [
"anyhow",
"clap 4.5.46",
"colored",
"futures",
"modelexpress-common",
"prost",
"serde",
"serde_json",
"thiserror 2.0.16",
"tokio",
"tonic 0.13.1",
"tracing",
"tracing-subscriber",
"uuid 1.18.0",
]
[[package]]
name = "modelexpress-common"
version = "0.1.0"
source = "git+https://github.com/ai-dynamo/modelexpress.git?rev=a232220bf268a475d293914d407f4ae186f443e3#a232220bf268a475d293914d407f4ae186f443e3"
dependencies = [
"anyhow",
"async-trait",
"chrono",
"clap 4.5.46",
"config",
"hf-hub",
"jiff",
"prost",
"serde",
"serde_json",
"serde_yaml",
"thiserror 2.0.16",
"tokio",
"tonic 0.13.1",
"tonic-build",
"tracing",
]
[[package]]
[[package]]
name = "monostate"
name = "monostate"
version = "0.1.14"
version = "0.1.14"
...
@@ -4992,12 +5151,11 @@ dependencies = [
...
@@ -4992,12 +5151,11 @@ dependencies = [
[[package]]
[[package]]
name = "nu-ansi-term"
name = "nu-ansi-term"
version = "0.
46.0
"
version = "0.
50.1
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84
"
checksum = "
d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399
"
dependencies = [
dependencies = [
"overload",
"windows-sys 0.52.0",
"winapi 0.3.9",
]
]
[[package]]
[[package]]
...
@@ -5331,6 +5489,16 @@ dependencies = [
...
@@ -5331,6 +5489,16 @@ dependencies = [
"num-traits",
"num-traits",
]
]
[[package]]
name = "ordered-multimap"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79"
dependencies = [
"dlv-list",
"hashbrown 0.14.5",
]
[[package]]
[[package]]
name = "os_info"
name = "os_info"
version = "3.12.0"
version = "3.12.0"
...
@@ -5343,12 +5511,6 @@ dependencies = [
...
@@ -5343,12 +5511,6 @@ dependencies = [
"windows-sys 0.52.0",
"windows-sys 0.52.0",
]
]
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
[[package]]
name = "packedvec"
name = "packedvec"
version = "1.2.5"
version = "1.2.5"
...
@@ -5388,6 +5550,12 @@ version = "1.0.15"
...
@@ -5388,6 +5550,12 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
[[package]]
name = "pathdiff"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
[[package]]
[[package]]
name = "pear"
name = "pear"
version = "0.2.9"
version = "0.2.9"
...
@@ -5799,7 +5967,7 @@ dependencies = [
...
@@ -5799,7 +5967,7 @@ dependencies = [
"rand 0.9.2",
"rand 0.9.2",
"rand_chacha 0.9.0",
"rand_chacha 0.9.0",
"rand_xorshift",
"rand_xorshift",
"regex-syntax
0.8.6
",
"regex-syntax",
"rusty-fork",
"rusty-fork",
"tempfile",
"tempfile",
"unarray",
"unarray",
...
@@ -6284,17 +6452,8 @@ checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
...
@@ -6284,17 +6452,8 @@ checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
dependencies = [
dependencies = [
"aho-corasick",
"aho-corasick",
"memchr",
"memchr",
"regex-automata 0.4.10",
"regex-automata",
"regex-syntax 0.8.6",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
]
]
[[package]]
[[package]]
...
@@ -6305,15 +6464,9 @@ checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
...
@@ -6305,15 +6464,9 @@ checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
dependencies = [
dependencies = [
"aho-corasick",
"aho-corasick",
"memchr",
"memchr",
"regex-syntax
0.8.6
",
"regex-syntax",
]
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
[[package]]
name = "regex-syntax"
name = "regex-syntax"
version = "0.8.6"
version = "0.8.6"
...
@@ -6470,6 +6623,18 @@ dependencies = [
...
@@ -6470,6 +6623,18 @@ dependencies = [
"serde",
"serde",
]
]
[[package]]
name = "ron"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94"
dependencies = [
"base64 0.21.7",
"bitflags 2.9.3",
"serde",
"serde_derive",
]
[[package]]
[[package]]
name = "rstest"
name = "rstest"
version = "0.18.2"
version = "0.18.2"
...
@@ -6552,6 +6717,16 @@ dependencies = [
...
@@ -6552,6 +6717,16 @@ dependencies = [
"realfft",
"realfft",
]
]
[[package]]
name = "rust-ini"
version = "0.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "796e8d2b6696392a43bea58116b667fb4c29727dc5abd27d6acf338bb4f688c7"
dependencies = [
"cfg-if 1.0.3",
"ordered-multimap",
]
[[package]]
[[package]]
name = "rust-mcp-schema"
name = "rust-mcp-schema"
version = "0.1.11"
version = "0.1.11"
...
@@ -7034,6 +7209,17 @@ dependencies = [
...
@@ -7034,6 +7209,17 @@ dependencies = [
"serde",
"serde",
]
]
[[package]]
name = "serde-untagged"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34836a629bcbc6f1afdf0907a744870039b1e14c0561cb26094fa683b158eff3"
dependencies = [
"erased-serde",
"serde",
"typeid",
]
[[package]]
[[package]]
name = "serde_cbor"
name = "serde_cbor"
version = "0.11.2"
version = "0.11.2"
...
@@ -7127,6 +7313,15 @@ dependencies = [
...
@@ -7127,6 +7313,15 @@ dependencies = [
"serde",
"serde",
]
]
[[package]]
name = "serde_spanned"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40734c41988f7306bb04f0ecf60ec0f3f1caa34290e4e8ea471dcd3346483b83"
dependencies = [
"serde",
]
[[package]]
[[package]]
name = "serde_urlencoded"
name = "serde_urlencoded"
version = "0.7.1"
version = "0.7.1"
...
@@ -7820,7 +8015,7 @@ dependencies = [
...
@@ -7820,7 +8015,7 @@ dependencies = [
"cfg-expr",
"cfg-expr",
"heck 0.5.0",
"heck 0.5.0",
"pkg-config",
"pkg-config",
"toml",
"toml
0.8.23
",
"version-compare",
"version-compare",
]
]
...
@@ -8058,7 +8253,7 @@ dependencies = [
...
@@ -8058,7 +8253,7 @@ dependencies = [
"rayon",
"rayon",
"rayon-cond",
"rayon-cond",
"regex",
"regex",
"regex-syntax
0.8.6
",
"regex-syntax",
"serde",
"serde",
"serde_json",
"serde_json",
"spm_precompiled",
"spm_precompiled",
...
@@ -8273,11 +8468,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
...
@@ -8273,11 +8468,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
dependencies = [
dependencies = [
"serde",
"serde",
"serde_spanned",
"serde_spanned
0.6.9
",
"toml_datetime",
"toml_datetime
0.6.11
",
"toml_edit",
"toml_edit",
]
]
[[package]]
name = "toml"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75129e1dc5000bfbaa9fee9d1b21f974f9fbad9daec557a521ee6e080825f6e8"
dependencies = [
"serde",
"serde_spanned 1.0.0",
"toml_datetime 0.7.0",
"toml_parser",
"winnow",
]
[[package]]
[[package]]
name = "toml_datetime"
name = "toml_datetime"
version = "0.6.11"
version = "0.6.11"
...
@@ -8287,6 +8495,15 @@ dependencies = [
...
@@ -8287,6 +8495,15 @@ dependencies = [
"serde",
"serde",
]
]
[[package]]
name = "toml_datetime"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bade1c3e902f58d73d3f294cd7f20391c1cb2fbcb643b73566bc773971df91e3"
dependencies = [
"serde",
]
[[package]]
[[package]]
name = "toml_edit"
name = "toml_edit"
version = "0.22.27"
version = "0.22.27"
...
@@ -8295,12 +8512,21 @@ checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
...
@@ -8295,12 +8512,21 @@ checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
dependencies = [
"indexmap 2.11.0",
"indexmap 2.11.0",
"serde",
"serde",
"serde_spanned",
"serde_spanned
0.6.9
",
"toml_datetime",
"toml_datetime
0.6.11
",
"toml_write",
"toml_write",
"winnow",
"winnow",
]
]
[[package]]
name = "toml_parser"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cf893c33be71572e0e9aa6dd15e6677937abd686b066eac3f8cd3531688a627"
dependencies = [
"winnow",
]
[[package]]
[[package]]
name = "toml_write"
name = "toml_write"
version = "0.1.2"
version = "0.1.2"
...
@@ -8518,14 +8744,14 @@ dependencies = [
...
@@ -8518,14 +8744,14 @@ dependencies = [
[[package]]
[[package]]
name = "tracing-subscriber"
name = "tracing-subscriber"
version = "0.3.
19
"
version = "0.3.
20
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008
"
checksum = "
2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5
"
dependencies = [
dependencies = [
"matchers",
"matchers",
"nu-ansi-term",
"nu-ansi-term",
"once_cell",
"once_cell",
"regex",
"regex
-automata
",
"serde",
"serde",
"serde_json",
"serde_json",
"sharded-slab",
"sharded-slab",
...
@@ -9662,6 +9888,17 @@ version = "0.8.15"
...
@@ -9662,6 +9888,17 @@ version = "0.8.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
[[package]]
name = "yaml-rust2"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2462ea039c445496d8793d052e13787f2b90e750b833afee748e601c17621ed9"
dependencies = [
"arraydeque",
"encoding_rs",
"hashlink",
]
[[package]]
[[package]]
name = "yansi"
name = "yansi"
version = "1.0.1"
version = "1.0.1"
...
...
Cargo.toml
View file @
da3b1dbd
...
@@ -30,7 +30,10 @@ keywords = ["llm", "genai", "inference", "nvidia", "distributed"]
...
@@ -30,7 +30,10 @@ keywords = ["llm", "genai", "inference", "nvidia", "distributed"]
dynamo-runtime
=
{
path
=
"lib/runtime"
,
version
=
"0.5.0"
}
dynamo-runtime
=
{
path
=
"lib/runtime"
,
version
=
"0.5.0"
}
dynamo-llm
=
{
path
=
"lib/llm"
,
version
=
"0.5.0"
}
dynamo-llm
=
{
path
=
"lib/llm"
,
version
=
"0.5.0"
}
dynamo-tokens
=
{
path
=
"lib/tokens"
,
version
=
"0.5.0"
}
dynamo-tokens
=
{
path
=
"lib/tokens"
,
version
=
"0.5.0"
}
dynamo-async-openai
=
{
path
=
"lib/async-openai"
,
version
=
"0.5.0"
,
features
=
[
"byot"
,
"rustls"
]}
dynamo-async-openai
=
{
path
=
"lib/async-openai"
,
version
=
"0.5.0"
,
features
=
[
"byot"
,
"rustls"
,
]
}
dynamo-parsers
=
{
path
=
"lib/parsers"
,
version
=
"0.5.0"
}
dynamo-parsers
=
{
path
=
"lib/parsers"
,
version
=
"0.5.0"
}
# External dependencies
# External dependencies
...
@@ -41,19 +44,38 @@ async-trait = { version = "0.1" }
...
@@ -41,19 +44,38 @@ async-trait = { version = "0.1" }
async_zmq
=
{
version
=
"0.4.0"
}
async_zmq
=
{
version
=
"0.4.0"
}
blake3
=
{
version
=
"1"
}
blake3
=
{
version
=
"1"
}
bytes
=
{
version
=
"1"
}
bytes
=
{
version
=
"1"
}
chrono
=
{
version
=
"0.4"
,
default-features
=
false
,
features
=
[
"alloc"
,
"std"
,
"clock"
,
"now"
,
"serde"
]
}
chrono
=
{
version
=
"0.4"
,
default-features
=
false
,
features
=
[
"alloc"
,
"std"
,
"clock"
,
"now"
,
"serde"
,
]
}
derive_builder
=
{
version
=
"0.20"
}
derive_builder
=
{
version
=
"0.20"
}
derive-getters
=
{
version
=
"0.5"
}
derive-getters
=
{
version
=
"0.5"
}
either
=
{
version
=
"1.13"
,
features
=
["serde"]
}
either
=
{
version
=
"1.13"
,
features
=
["serde"]
}
etcd-client
=
{
version
=
"0.16"
,
features
=
["tls"]
}
etcd-client
=
{
version
=
"0.16"
,
features
=
["tls"]
}
futures
=
{
version
=
"0.3"
}
futures
=
{
version
=
"0.3"
}
hf-hub
=
{
version
=
"0.4.2"
,
default-features
=
false
,
features
=
[
"tokio"
,
"rustls-tls"
,
"ureq"
]
}
hf-hub
=
{
version
=
"0.4.2"
,
default-features
=
false
,
features
=
[
"tokio"
,
"rustls-tls"
,
"ureq"
,
]
}
# ModelExpress for model downloading
modelexpress-client
=
{
git
=
"https://github.com/ai-dynamo/modelexpress.git"
,
rev
=
"a232220bf268a475d293914d407f4ae186f443e3"
,
package
=
"modelexpress-client"
}
modelexpress-common
=
{
git
=
"https://github.com/ai-dynamo/modelexpress.git"
,
rev
=
"a232220bf268a475d293914d407f4ae186f443e3"
,
package
=
"modelexpress-common"
}
humantime
=
{
version
=
"2.2.0"
}
humantime
=
{
version
=
"2.2.0"
}
libc
=
{
version
=
"0.2"
}
libc
=
{
version
=
"0.2"
}
oneshot
=
{
version
=
"0.1.11"
,
features
=
[
"std"
,
"async"
]
}
oneshot
=
{
version
=
"0.1.11"
,
features
=
[
"std"
,
"async"
]
}
prometheus
=
{
version
=
"0.14"
}
prometheus
=
{
version
=
"0.14"
}
rand
=
{
version
=
"0.9.0"
}
rand
=
{
version
=
"0.9.0"
}
reqwest
=
{
version
=
"0.12.22"
,
default-features
=
false
,
features
=
[
"json"
,
"stream"
,
"rustls-tls"
]
}
reqwest
=
{
version
=
"0.12.22"
,
default-features
=
false
,
features
=
[
"json"
,
"stream"
,
"rustls-tls"
,
]
}
serde
=
{
version
=
"1"
,
features
=
["derive"]
}
serde
=
{
version
=
"1"
,
features
=
["derive"]
}
serde_json
=
{
version
=
"1"
}
serde_json
=
{
version
=
"1"
}
strum
=
{
version
=
"0.27"
,
features
=
["derive"]
}
strum
=
{
version
=
"0.27"
,
features
=
["derive"]
}
...
@@ -62,13 +84,17 @@ thiserror = { version = "2.0.11" }
...
@@ -62,13 +84,17 @@ thiserror = { version = "2.0.11" }
tokio
=
{
version
=
"1"
,
features
=
["full"]
}
tokio
=
{
version
=
"1"
,
features
=
["full"]
}
tokio-stream
=
{
version
=
"0.1"
}
tokio-stream
=
{
version
=
"0.1"
}
tokio-util
=
{
version
=
"0.7"
,
features
=
[
"codec"
,
"net"
,
"rt"
]
}
tokio-util
=
{
version
=
"0.7"
,
features
=
[
"codec"
,
"net"
,
"rt"
]
}
tower-http
=
{
version
=
"0.6"
,
features
=
["trace"]
}
tower-http
=
{
version
=
"0.6"
,
features
=
["trace"]
}
axum
=
{
version
=
"0.8"
,
features
=
["macros"]
}
axum
=
{
version
=
"0.8"
,
features
=
["macros"]
}
tracing
=
{
version
=
"0.1"
}
tracing
=
{
version
=
"0.1"
}
tracing-subscriber
=
{
version
=
"0.3"
,
features
=
[
"env-filter"
,
"local-time"
,
"json"
]
}
tracing-subscriber
=
{
version
=
"0.3"
,
features
=
[
"env-filter"
,
"local-time"
,
"json"
,
]
}
validator
=
{
version
=
"0.20.0"
,
features
=
["derive"]
}
validator
=
{
version
=
"0.20.0"
,
features
=
["derive"]
}
uuid
=
{
version
=
"1.17"
,
features
=
[
"v4"
,
"serde"
]
}
uuid
=
{
version
=
"1.17"
,
features
=
[
"v4"
,
"serde"
]
}
url
=
{
version
=
"2.5"
,
features
=
["serde"]
}
url
=
{
version
=
"2.5"
,
features
=
["serde"]
}
xxhash-rust
=
{
version
=
"0.8"
,
features
=
[
"xxh3"
,
"const_xxh3"
]
}
xxhash-rust
=
{
version
=
"0.8"
,
features
=
[
"xxh3"
,
"const_xxh3"
]
}
[profile.dev.package]
[profile.dev.package]
...
...
container/Dockerfile
View file @
da3b1dbd
...
@@ -290,10 +290,28 @@ ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
...
@@ -290,10 +290,28 @@ ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
# Install system dependencies
# Install system dependencies
ARG
PYTHON_VERSION
ARG
PYTHON_VERSION
RUN
dnf update
-y
\
RUN
dnf update
-y
\
&&
dnf
install
-y
llvm-toolset protobuf-compiler python
${
PYTHON_VERSION
}
-devel
wget
\
&&
dnf
install
-y
llvm-toolset protobuf-compiler python
${
PYTHON_VERSION
}
-devel
wget
unzip
\
&&
dnf clean all
\
&&
dnf clean all
\
&&
rm
-rf
/var/cache/dnf
&&
rm
-rf
/var/cache/dnf
# Ensure a modern protoc is available (required for --experimental_allow_proto3_optional)
RUN
set
-eux
;
\
PROTOC_VERSION
=
25.3
;
\
case
"
${
ARCH_ALT
}
"
in
\
x86_64
)
PROTOC_ZIP
=
"protoc-
${
PROTOC_VERSION
}
-linux-x86_64.zip"
;;
\
aarch64
)
PROTOC_ZIP
=
"protoc-
${
PROTOC_VERSION
}
-linux-aarch_64.zip"
;;
\
*
)
echo
"Unsupported architecture:
${
ARCH_ALT
}
"
>
&2
;
exit
1
;;
\
esac
;
\
wget
--tries
=
3
--waitretry
=
5
-O
/tmp/protoc.zip
"https://github.com/protocolbuffers/protobuf/releases/download/v
${
PROTOC_VERSION
}
/
${
PROTOC_ZIP
}
"
;
\
rm
-f
/usr/local/bin/protoc /usr/bin/protoc
;
\
unzip
-o
/tmp/protoc.zip
-d
/usr/local bin/protoc include/
*
;
\
chmod
+x /usr/local/bin/protoc
;
\
ln
-s
/usr/local/bin/protoc /usr/bin/protoc
;
\
protoc
--version
# Point build tools explicitly at the modern protoc
ENV
PROTOC=/usr/local/bin/protoc
# Copy artifacts from base stage
# Copy artifacts from base stage
COPY
--from=base $RUSTUP_HOME $RUSTUP_HOME
COPY
--from=base $RUSTUP_HOME $RUSTUP_HOME
COPY
--from=base $CARGO_HOME $CARGO_HOME
COPY
--from=base $CARGO_HOME $CARGO_HOME
...
...
lib/llm/Cargo.toml
View file @
da3b1dbd
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[package]
[package]
name
=
"dynamo-llm"
name
=
"dynamo-llm"
...
@@ -36,8 +24,6 @@ testing-etcd = []
...
@@ -36,8 +24,6 @@ testing-etcd = []
block-manager
=
[
"dep:nixl-sys"
,
"dep:cudarc"
,
"dep:ndarray"
,
"dep:nix"
]
block-manager
=
[
"dep:nixl-sys"
,
"dep:cudarc"
,
"dep:ndarray"
,
"dep:nix"
]
cuda
=
["dep:cudarc"]
cuda
=
["dep:cudarc"]
integration
=
["dynamo-runtime/integration"]
integration
=
["dynamo-runtime/integration"]
# NOTE: This feature will be enabled once ModelExpress packages are published
# model-express = ["dep:model_express_client", "dep:model_express_common"]
[[bench]]
[[bench]]
name
=
"tokenizer"
name
=
"tokenizer"
...
@@ -55,14 +41,14 @@ dynamo-runtime = { workspace = true }
...
@@ -55,14 +41,14 @@ dynamo-runtime = { workspace = true }
aho-corasick
=
"1.1"
aho-corasick
=
"1.1"
anyhow
=
{
workspace
=
true
}
anyhow
=
{
workspace
=
true
}
dynamo-async-openai
=
{
workspace
=
true
}
dynamo-async-openai
=
{
workspace
=
true
}
dynamo-parsers
=
{
workspace
=
true
}
dynamo-parsers
=
{
workspace
=
true
}
async-stream
=
{
workspace
=
true
}
async-stream
=
{
workspace
=
true
}
async-trait
=
{
workspace
=
true
}
async-trait
=
{
workspace
=
true
}
async-nats
=
{
workspace
=
true
}
async-nats
=
{
workspace
=
true
}
async_zmq
=
{
workspace
=
true
}
async_zmq
=
{
workspace
=
true
}
bytes
=
{
workspace
=
true
}
bytes
=
{
workspace
=
true
}
chrono
=
{
workspace
=
true
}
chrono
=
{
workspace
=
true
}
derive_builder
=
{
workspace
=
true
}
derive_builder
=
{
workspace
=
true
}
either
=
{
workspace
=
true
}
either
=
{
workspace
=
true
}
etcd-client
=
{
workspace
=
true
}
etcd-client
=
{
workspace
=
true
}
futures
=
{
workspace
=
true
}
futures
=
{
workspace
=
true
}
...
@@ -87,15 +73,11 @@ validator = { workspace = true }
...
@@ -87,15 +73,11 @@ validator = { workspace = true }
url
=
{
workspace
=
true
}
url
=
{
workspace
=
true
}
uuid
=
{
workspace
=
true
}
uuid
=
{
workspace
=
true
}
xxhash-rust
=
{
workspace
=
true
}
xxhash-rust
=
{
workspace
=
true
}
modelexpress-client
=
{
workspace
=
true
}
# ModelExpress client for model downloading
modelexpress-common
=
{
workspace
=
true
}
# NOTE: These will be uncommented once the packages are published to crates.io
# model_express_client = { version = "0.1.0", optional = true }
# model_express_common = { version = "0.1.0", optional = true }
akin
=
"0.4.0"
akin
=
"0.4.0"
bitflags
=
{
version
=
"2.4"
,
features
=
["serde"]
}
bitflags
=
{
version
=
"2.4"
,
features
=
["serde"]
}
blake3
=
{
version
=
"1.8"
,
features
=
[
"mmap"
,
"rayon"
]
}
blake3
=
{
version
=
"1.8"
,
features
=
[
"mmap"
,
"rayon"
]
}
bytemuck
=
"1.22"
bytemuck
=
"1.22"
candle-core
=
{
version
=
"0.9.1"
}
candle-core
=
{
version
=
"0.9.1"
}
derive-getters
=
"0.5"
derive-getters
=
"0.5"
...
@@ -105,10 +87,13 @@ rayon = "1"
...
@@ -105,10 +87,13 @@ rayon = "1"
dashmap
=
{
version
=
"5.5.3"
}
dashmap
=
{
version
=
"5.5.3"
}
# input/text
# input/text
dialoguer
=
{
version
=
"0.11"
,
default-features
=
false
,
features
=
[
"editor"
,
"history"
]
}
dialoguer
=
{
version
=
"0.11"
,
default-features
=
false
,
features
=
[
"editor"
,
"history"
,
]
}
# block_manager
# block_manager
nixl-sys
=
{
version
=
"0.4.1"
,
optional
=
true
}
nixl-sys
=
{
version
=
"0.4.1"
,
optional
=
true
}
cudarc
=
{
version
=
"0.17.1"
,
features
=
["cuda-12020"]
,
optional
=
true
}
cudarc
=
{
version
=
"0.17.1"
,
features
=
["cuda-12020"]
,
optional
=
true
}
ndarray
=
{
version
=
"0.16"
,
optional
=
true
}
ndarray
=
{
version
=
"0.16"
,
optional
=
true
}
nix
=
{
version
=
"0.26"
,
optional
=
true
}
nix
=
{
version
=
"0.26"
,
optional
=
true
}
...
@@ -119,7 +104,7 @@ unicode-segmentation = "1.12"
...
@@ -119,7 +104,7 @@ unicode-segmentation = "1.12"
# http-service
# http-service
axum
=
{
workspace
=
true
}
axum
=
{
workspace
=
true
}
axum-server
=
{
version
=
"0.7"
,
features
=
["tls-rustls"]
}
axum-server
=
{
version
=
"0.7"
,
features
=
["tls-rustls"]
}
tower-http
=
{
workspace
=
true
}
tower-http
=
{
workspace
=
true
}
rustls
=
{
version
=
"0.23"
}
rustls
=
{
version
=
"0.23"
}
...
@@ -164,6 +149,8 @@ approx = "0.5"
...
@@ -164,6 +149,8 @@ approx = "0.5"
assert_matches
=
"1.5"
assert_matches
=
"1.5"
criterion
=
{
version
=
"0.3"
,
features
=
["html_reports"]
}
criterion
=
{
version
=
"0.3"
,
features
=
["html_reports"]
}
hf-hub
=
{
workspace
=
true
}
hf-hub
=
{
workspace
=
true
}
modelexpress-client
=
{
workspace
=
true
}
modelexpress-common
=
{
workspace
=
true
}
proptest
=
"1.5.0"
proptest
=
"1.5.0"
reqwest
=
{
workspace
=
true
}
reqwest
=
{
workspace
=
true
}
rstest
=
"0.18.2"
rstest
=
"0.18.2"
...
@@ -181,4 +168,4 @@ aligned-vec = "0.6.4"
...
@@ -181,4 +168,4 @@ aligned-vec = "0.6.4"
lazy_static
=
"1.4"
lazy_static
=
"1.4"
[build-dependencies]
[build-dependencies]
tonic-build
=
{
version
=
"0.13.1"
}
tonic-build
=
{
version
=
"0.13.1"
}
lib/llm/src/hub.rs
View file @
da3b1dbd
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: Apache-2.0
#![allow(unexpected_cfgs)]
use
hf_hub
::
api
::
tokio
::
ApiBuilder
;
use
std
::
env
;
use
std
::
env
;
use
std
::
path
::{
Path
,
PathBuf
};
use
std
::
path
::{
Path
,
PathBuf
};
#[cfg(feature
=
"model-express"
)]
use
modelexpress_client
::{
use
model_express_client
::{
Client
as
MxClient
,
ClientConfig
as
MxClientConfig
,
ModelProvider
as
MxModelProvider
,
Client
as
MxClient
,
ClientConfig
as
MxClientConfig
,
ModelProvider
as
MxModelProvider
,
};
};
#[cfg(feature
=
"model-express"
)]
use
modelexpress_common
::
download
as
mx
;
use
model_express_common
::
download
as
mx
;
/// Example: export MODEL_EXPRESS_URL=http://localhost:8001
const
MODEL_EXPRESS_ENDPOINT_ENV_VAR
:
&
str
=
"MODEL_EXPRESS_URL"
;
const
MODEL_EXPRESS_ENDPOINT_ENV_VAR
:
&
str
=
"MODEL_EXPRESS_URL"
;
const
HF_TOKEN_ENV_VAR
:
&
str
=
"HF_TOKEN"
;
/// Checks if a file is a model weight file
fn
is_weight_file
(
filename
:
&
str
)
->
bool
{
filename
.ends_with
(
".bin"
)
||
filename
.ends_with
(
".safetensors"
)
||
filename
.ends_with
(
".h5"
)
||
filename
.ends_with
(
".msgpack"
)
||
filename
.ends_with
(
".ckpt.index"
)
}
/// Attempt to download a model from Hugging Face using ModelExpress client
/// Download a model using ModelExpress client. The client first requests for the model
/// Only called when model-express feature is enabled, otherwise it will fall back to homonymous hf-hub function
/// from the server and fallbacks to direct download in case of server failure.
/// Returns the directory it is in
/// If ignore_weights is true, model weight files will be skipped
/// If ignore_weights is true, model weight files will be skipped
#[cfg(feature
=
"model-express"
)]
/// Returns the path to the model files
pub
async
fn
from_hf
(
name
:
impl
AsRef
<
Path
>
,
ignore_weights
:
bool
)
->
anyhow
::
Result
<
PathBuf
>
{
pub
async
fn
from_hf
(
name
:
impl
AsRef
<
Path
>
,
ignore_weights
:
bool
)
->
anyhow
::
Result
<
PathBuf
>
{
let
name
=
name
.as_ref
();
let
name
=
name
.as_ref
();
let
model_name
=
name
.display
()
.to_string
();
let
model_name
=
name
.display
()
.to_string
();
// Only use ModelExpress if the environment variable is explicitly set
let
mut
config
:
MxClientConfig
=
MxClientConfig
::
default
();
if
let
Ok
(
endpoint
)
=
env
::
var
(
MODEL_EXPRESS_ENDPOINT_ENV_VAR
)
{
if
let
Ok
(
endpoint
)
=
env
::
var
(
MODEL_EXPRESS_ENDPOINT_ENV_VAR
)
{
tracing
::
info!
(
config
=
config
.with_endpoint
(
endpoint
);
"ModelExpress endpoint configured, attempting to use ModelExpress for model: {model_name}"
}
);
let
config
:
MxClientConfig
=
MxClientConfig
::
default
()
.with_endpoint
(
endpoint
.clone
());
let
result
=
match
MxClient
::
new
(
config
.clone
()
)
.await
{
let
result
=
match
MxClient
::
new
(
config
)
.await
{
Ok
(
mut
client
)
=>
{
Ok
(
mut
client
)
=>
{
tracing
::
info!
(
"Successfully connected to ModelExpress server"
);
tracing
::
info!
(
"Successfully connected to ModelExpress server"
);
match
client
match
client
.request_model_with_provider_and_fallback
(
.request_model_with_provider_and_fallback
(
&
model_name
,
&
model_name
,
MxModelProvider
::
HuggingFace
,
MxModelProvider
::
HuggingFace
,
ignore_weights
,
)
)
.await
.await
{
{
Ok
(())
=>
{
Ok
(())
=>
{
tracing
::
info!
(
"Server download succeeded for model: {model_name}"
);
tracing
::
info!
(
"Server download succeeded for model: {model_name}"
);
get_mx_model_path_from_cache
(
&
model_name
)
match
client
.get_model_path
(
&
model_name
)
.await
{
}
Ok
(
path
)
=>
Ok
(
path
),
Err
(
e
)
=>
{
Err
(
e
)
=>
{
tracing
::
warn!
(
tracing
::
warn!
(
"Server download failed for model '{model_name}': {e}. Falling back to direct download."
"Failed to resolve local model path after server download for '{model_name}': {e}.
\
Falling back to direct download."
);
);
mx_download_direct
(
&
model_name
)
.await
mx_download_direct
(
&
model_name
,
ignore_weights
)
.await
}
}
}
}
Err
(
e
)
=>
{
tracing
::
warn!
(
"Cannot connect to ModelExpress server: {e}. Using direct download."
);
mx_download_direct
(
&
model_name
)
.await
}
}
};
match
result
{
Ok
(
path
)
=>
{
tracing
::
info!
(
"ModelExpress download completed successfully for model: {model_name}"
);
return
Ok
(
path
);
}
}
Err
(
e
)
=>
{
Err
(
e
)
=>
{
tracing
::
warn!
(
tracing
::
warn!
(
"ModelExpress
download failed for model '{model_name}': {e}. Falling back to
hf-hub
."
"Server
download failed for model '{model_name}': {e}. Falling back to
direct download
."
);
);
mx_download_direct
(
&
model_name
,
ignore_weights
)
.await
}
}
}
}
}
}
Err
(
e
)
=>
{
tracing
::
info!
(
"Using hf-hub for model: {model_name}"
);
tracing
::
warn!
(
"Cannot connect to ModelExpress server: {e}. Using direct download."
);
download_with_hf_hub
(
&
model_name
,
ignore_weights
)
.await
mx_download_direct
(
&
model_name
,
ignore_weights
)
.await
}
/// Attempt to download a model from Hugging Face using hf-hub directly
/// Called when model-express feature is not enabled
/// Returns the directory it is in
/// If ignore_weights is true, model weight files will be skipped
#[cfg(not(feature
=
"model-express"
))]
pub
async
fn
from_hf
(
name
:
impl
AsRef
<
Path
>
,
ignore_weights
:
bool
)
->
anyhow
::
Result
<
PathBuf
>
{
let
name
=
name
.as_ref
();
let
model_name
=
name
.display
()
.to_string
();
if
env
::
var
(
MODEL_EXPRESS_ENDPOINT_ENV_VAR
)
.is_ok
()
{
tracing
::
warn!
(
"ModelExpress endpoint configured but model-express feature not enabled. Using hf-hub."
);
}
tracing
::
info!
(
"Using hf-hub for model: {model_name}"
);
download_with_hf_hub
(
&
model_name
,
ignore_weights
)
.await
}
// Direct download using the ModelExpress client.
#[cfg(feature
=
"model-express"
)]
async
fn
mx_download_direct
(
model_name
:
&
str
)
->
anyhow
::
Result
<
PathBuf
>
{
let
cache_dir
=
get_model_express_cache_dir
();
mx
::
download_model
(
model_name
,
MxModelProvider
::
HuggingFace
,
Some
(
cache_dir
))
.await
}
/// Attempt to download a model from Hugging Face with hf-hub
/// Returns the directory it is in
/// If ignore_weights is true, model weight files will be skipped
async
fn
download_with_hf_hub
(
model_name
:
&
str
,
ignore_weights
:
bool
)
->
anyhow
::
Result
<
PathBuf
>
{
let
token
=
env
::
var
(
HF_TOKEN_ENV_VAR
)
.ok
();
let
api
=
ApiBuilder
::
from_env
()
.with_progress
(
true
)
.with_token
(
token
)
.high
()
.build
()
?
;
let
repo
=
api
.model
(
model_name
.to_string
());
let
info
=
repo
.info
()
.await
.map_err
(|
e
|
anyhow
::
anyhow!
(
"Failed to fetch model '{model_name}' from HuggingFace: {e}. Is this a valid HuggingFace ID?"
))
?
;
if
info
.siblings
.is_empty
()
{
return
Err
(
anyhow
::
anyhow!
(
"Model '{model_name}' exists but contains no downloadable files."
));
}
let
mut
model_path
=
PathBuf
::
new
();
let
mut
files_downloaded
=
false
;
for
sibling
in
info
.siblings
{
if
is_ignored_file
(
&
sibling
.rfilename
)
||
is_image_file
(
&
sibling
.rfilename
)
{
continue
;
}
if
ignore_weights
&&
is_weight_file
(
&
sibling
.rfilename
)
{
continue
;
}
}
};
match
re
po
.get
(
&
sibling
.rfilename
)
.awai
t
{
match
re
sul
t
{
Ok
(
path
)
=>
{
Ok
(
path
)
=>
{
model_path
=
path
;
tracing
::
info!
(
"ModelExpress download completed successfully for model: {model_name}"
)
;
files_downloaded
=
true
;
Ok
(
path
)
}
}
Err
(
e
)
=>
{
Err
(
e
)
=>
{
return
Err
(
anyhow
::
anyhow!
(
tracing
::
warn!
(
"ModelExpress download failed for model '{model_name}': {e}"
);
"Failed to download file '{}' from model '{model_name}': {e}"
,
Err
(
e
)
sibling
.rfilename
));
}
}
}
}
}
if
!
files_downloaded
{
let
file_type
=
if
ignore_weights
{
"non-weight"
}
else
{
"valid"
};
return
Err
(
anyhow
::
anyhow!
(
"No {file_type} files found for model '{model_name}'."
));
}
match
model_path
.parent
()
{
Some
(
path
)
=>
Ok
(
path
.to_path_buf
()),
None
=>
Err
(
anyhow
::
anyhow!
(
"Invalid HF cache path: {}"
,
model_path
.display
()
)),
}
}
fn
is_ignored_file
(
filename
:
&
str
)
->
bool
{
const
IGNORED_FILES
:
[
&
str
;
5
]
=
[
".gitattributes"
,
"LICENSE"
,
"LICENSE.txt"
,
"README.md"
,
"USE_POLICY.md"
,
];
IGNORED_FILES
.contains
(
&
filename
)
}
fn
is_image_file
(
filename
:
&
str
)
->
bool
{
filename
.ends_with
(
".png"
)
||
filename
.ends_with
(
"PNG"
)
||
filename
.ends_with
(
".jpg"
)
||
filename
.ends_with
(
"JPG"
)
||
filename
.ends_with
(
".jpeg"
)
||
filename
.ends_with
(
"JPEG"
)
}
}
#[cfg(feature
=
"m
odel
-e
xpress
"
)]
// Direct download using the M
odel
E
xpress
client.
fn
get_mx_model_path_from_cache
(
model_name
:
&
str
)
->
anyhow
::
Result
<
PathBuf
>
{
async
fn
mx_download_direct
(
model_name
:
&
str
,
ignore_weights
:
bool
)
->
anyhow
::
Result
<
PathBuf
>
{
let
cache_dir
=
get_model_express_cache_dir
();
let
cache_dir
=
get_model_express_cache_dir
();
let
model_dir
=
cache_dir
.join
(
model_name
);
mx
::
download_model
(
model_name
,
if
!
model_dir
.exists
()
{
MxModelProvider
::
HuggingFace
,
return
Err
(
anyhow
::
anyhow!
(
Some
(
cache_dir
),
"Model '{model_name}' was downloaded but directory not found at expected location: {}"
,
ignore_weights
,
model_dir
.display
()
)
));
.await
}
Ok
(
model_dir
)
}
}
#[cfg(feature
=
"model-express"
)]
// TODO: remove in the future. This is a temporary workaround to find common
// cache directory between client and server.
fn
get_model_express_cache_dir
()
->
PathBuf
{
fn
get_model_express_cache_dir
()
->
PathBuf
{
if
let
Ok
(
cache_path
)
=
env
::
var
(
"HF_HUB_CACHE"
)
{
if
let
Ok
(
cache_path
)
=
env
::
var
(
"HF_HUB_CACHE"
)
{
return
PathBuf
::
from
(
cache_path
);
return
PathBuf
::
from
(
cache_path
);
}
}
if
let
Ok
(
cache_path
)
=
env
::
var
(
"MODEL_EXPRESS_PATH"
)
{
if
let
Ok
(
cache_path
)
=
env
::
var
(
"MODEL_EXPRESS_
CACHE_
PATH"
)
{
return
PathBuf
::
from
(
cache_path
);
return
PathBuf
::
from
(
cache_path
);
}
}
let
home
=
env
::
var
(
"HOME"
)
let
home
=
env
::
var
(
"HOME"
)
...
@@ -248,52 +114,10 @@ mod tests {
...
@@ -248,52 +114,10 @@ mod tests {
let
_
result
:
anyhow
::
Result
<
PathBuf
>
=
from_hf
(
test_path
,
false
)
.await
;
let
_
result
:
anyhow
::
Result
<
PathBuf
>
=
from_hf
(
test_path
,
false
)
.await
;
}
}
#[cfg(feature
=
"model-express"
)]
#[test]
#[test]
fn
test_get_model_express_cache_dir
()
{
fn
test_get_model_express_cache_dir
()
{
let
cache_dir
=
get_model_express_cache_dir
();
let
cache_dir
=
get_model_express_cache_dir
();
assert
!
(
!
cache_dir
.to_string_lossy
()
.is_empty
());
assert
!
(
!
cache_dir
.to_string_lossy
()
.is_empty
());
assert
!
(
cache_dir
.is_absolute
()
||
cache_dir
.starts_with
(
"."
));
assert
!
(
cache_dir
.is_absolute
()
||
cache_dir
.starts_with
(
"."
));
}
}
#[test]
fn
test_is_ignored_file
()
{
assert
!
(
is_ignored_file
(
".gitattributes"
));
assert
!
(
is_ignored_file
(
"LICENSE"
));
assert
!
(
is_ignored_file
(
"LICENSE.txt"
));
assert
!
(
is_ignored_file
(
"README.md"
));
assert
!
(
is_ignored_file
(
"USE_POLICY.md"
));
assert
!
(
!
is_ignored_file
(
"model.bin"
));
assert
!
(
!
is_ignored_file
(
"tokenizer.json"
));
assert
!
(
!
is_ignored_file
(
"config.json"
));
}
#[test]
fn
test_is_weight_file
()
{
assert
!
(
is_weight_file
(
"model.bin"
));
assert
!
(
is_weight_file
(
"model.safetensors"
));
assert
!
(
is_weight_file
(
"model.h5"
));
assert
!
(
is_weight_file
(
"model.msgpack"
));
assert
!
(
is_weight_file
(
"model.ckpt.index"
));
assert
!
(
!
is_weight_file
(
"tokenizer.json"
));
assert
!
(
!
is_weight_file
(
"config.json"
));
assert
!
(
!
is_weight_file
(
"README.md"
));
}
#[test]
fn
test_is_image_file
()
{
assert
!
(
is_image_file
(
"image.png"
));
assert
!
(
is_image_file
(
"image.PNG"
));
assert
!
(
is_image_file
(
"photo.jpg"
));
assert
!
(
is_image_file
(
"photo.JPG"
));
assert
!
(
is_image_file
(
"picture.jpeg"
));
assert
!
(
is_image_file
(
"picture.JPEG"
));
assert
!
(
!
is_image_file
(
"model.bin"
));
assert
!
(
!
is_image_file
(
"tokenizer.json"
));
assert
!
(
!
is_image_file
(
"config.json"
));
assert
!
(
!
is_image_file
(
"README.md"
));
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment