Unverified Commit da3b1dbd authored by Hyunjae Woo's avatar Hyunjae Woo Committed by GitHub
Browse files

feat: modelexpress dynamo integration (#3191)

parent 5323b61f
......@@ -141,6 +141,9 @@ name = "anyhow"
version = "1.0.99"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
dependencies = [
"backtrace",
]
[[package]]
name = "apodize"
......@@ -183,6 +186,12 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "arraydeque"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236"
[[package]]
name = "arrayref"
version = "0.3.9"
......@@ -798,7 +807,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [
"memchr",
"regex-automata 0.4.10",
"regex-automata",
"serde",
]
......@@ -1011,7 +1020,7 @@ dependencies = [
"serde_json",
"syn 2.0.106",
"tempfile",
"toml",
"toml 0.8.23",
]
[[package]]
......@@ -1175,6 +1184,15 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "colored"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fde0e0ec90c9dfb3b4b1a0891a7dcd0e2bffde2f7efed5fe7c9bb00e5bfb915e"
dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "compact_str"
version = "0.9.0"
......@@ -1190,6 +1208,26 @@ dependencies = [
"static_assertions",
]
[[package]]
name = "config"
version = "0.15.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0faa974509d38b33ff89282db9c3295707ccf031727c0de9772038ec526852ba"
dependencies = [
"async-trait",
"convert_case",
"json5",
"pathdiff",
"ron",
"rust-ini",
"serde",
"serde-untagged",
"serde_json",
"toml 0.9.5",
"winnow",
"yaml-rust2",
]
[[package]]
name = "console"
version = "0.15.11"
......@@ -1248,12 +1286,41 @@ version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
[[package]]
name = "const-random"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359"
dependencies = [
"const-random-macro",
]
[[package]]
name = "const-random-macro"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
dependencies = [
"getrandom 0.2.16",
"once_cell",
"tiny-keccak",
]
[[package]]
name = "constant_time_eq"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
[[package]]
name = "convert_case"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "core-foundation"
version = "0.9.4"
......@@ -1782,7 +1849,7 @@ dependencies = [
"bytemuck",
"bytemuck_derive",
"hashbrown 0.15.5",
"regex-syntax 0.8.6",
"regex-syntax",
"strum",
]
......@@ -1866,6 +1933,15 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "dlv-list"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f"
dependencies = [
"const-random",
]
[[package]]
name = "doctest-file"
version = "1.0.0"
......@@ -2025,6 +2101,8 @@ dependencies = [
"memmap2",
"minijinja",
"minijinja-contrib",
"modelexpress-client",
"modelexpress-common",
"ndarray",
"nix 0.26.4",
"nixl-sys",
......@@ -2448,8 +2526,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
dependencies = [
"bit-set 0.5.3",
"regex-automata 0.4.10",
"regex-syntax 0.8.6",
"regex-automata",
"regex-syntax",
]
[[package]]
......@@ -2459,8 +2537,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
dependencies = [
"bit-set 0.8.0",
"regex-automata 0.4.10",
"regex-syntax 0.8.6",
"regex-automata",
"regex-syntax",
]
[[package]]
......@@ -2496,7 +2574,7 @@ dependencies = [
"serde",
"serde_json",
"tempfile",
"toml",
"toml 0.8.23",
"uncased",
"version_check",
]
......@@ -3104,8 +3182,8 @@ dependencies = [
"aho-corasick",
"bstr",
"log",
"regex-automata 0.4.10",
"regex-syntax 0.8.6",
"regex-automata",
"regex-syntax",
]
[[package]]
......@@ -3192,6 +3270,15 @@ dependencies = [
"foldhash",
]
[[package]]
name = "hashlink"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
dependencies = [
"hashbrown 0.15.5",
]
[[package]]
name = "hdrhistogram"
version = "7.5.4"
......@@ -3895,10 +3982,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49"
dependencies = [
"jiff-static",
"jiff-tzdb-platform",
"log",
"portable-atomic",
"portable-atomic-util",
"serde",
"windows-sys 0.59.0",
]
[[package]]
......@@ -3912,6 +4001,21 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "jiff-tzdb"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1283705eb0a21404d2bfd6eef2a7593d240bc42a0bdb39db0ad6fa2ec026524"
[[package]]
name = "jiff-tzdb-platform"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8"
dependencies = [
"jiff-tzdb",
]
[[package]]
name = "jobserver"
version = "0.1.34"
......@@ -3948,6 +4052,17 @@ dependencies = [
"unicode-general-category",
]
[[package]]
name = "json5"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96b0db21af676c1ce64250b5f40f3ce2cf27e4e47cb91ed91eb6fe9350b430c1"
dependencies = [
"pest",
"pest_derive",
"serde",
]
[[package]]
name = "jsonschema"
version = "0.17.1"
......@@ -4138,7 +4253,7 @@ dependencies = [
"anyhow",
"derivre",
"indexmap 2.11.0",
"regex-syntax 0.8.6",
"regex-syntax",
"serde",
"serde_json",
"toktrie 1.0.0",
......@@ -4327,11 +4442,11 @@ dependencies = [
[[package]]
name = "matchers"
version = "0.1.0"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
dependencies = [
"regex-automata 0.1.10",
"regex-automata",
]
[[package]]
......@@ -4649,7 +4764,7 @@ dependencies = [
"rand_isaac",
"rayon",
"regex",
"regex-automata 0.4.10",
"regex-automata",
"reqwest 0.12.23",
"rubato",
"rust-mcp-schema",
......@@ -4673,7 +4788,7 @@ dependencies = [
"tokio-rayon",
"tokio-tungstenite 0.24.0",
"toktrie_hf_tokenizers 1.0.0",
"toml",
"toml 0.8.23",
"tqdm",
"tracing",
"tracing-subscriber",
......@@ -4756,6 +4871,50 @@ dependencies = [
"rayon",
]
[[package]]
name = "modelexpress-client"
version = "0.1.0"
source = "git+https://github.com/ai-dynamo/modelexpress.git?rev=a232220bf268a475d293914d407f4ae186f443e3#a232220bf268a475d293914d407f4ae186f443e3"
dependencies = [
"anyhow",
"clap 4.5.46",
"colored",
"futures",
"modelexpress-common",
"prost",
"serde",
"serde_json",
"thiserror 2.0.16",
"tokio",
"tonic 0.13.1",
"tracing",
"tracing-subscriber",
"uuid 1.18.0",
]
[[package]]
name = "modelexpress-common"
version = "0.1.0"
source = "git+https://github.com/ai-dynamo/modelexpress.git?rev=a232220bf268a475d293914d407f4ae186f443e3#a232220bf268a475d293914d407f4ae186f443e3"
dependencies = [
"anyhow",
"async-trait",
"chrono",
"clap 4.5.46",
"config",
"hf-hub",
"jiff",
"prost",
"serde",
"serde_json",
"serde_yaml",
"thiserror 2.0.16",
"tokio",
"tonic 0.13.1",
"tonic-build",
"tracing",
]
[[package]]
name = "monostate"
version = "0.1.14"
......@@ -4992,12 +5151,11 @@ dependencies = [
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
version = "0.50.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399"
dependencies = [
"overload",
"winapi 0.3.9",
"windows-sys 0.52.0",
]
[[package]]
......@@ -5331,6 +5489,16 @@ dependencies = [
"num-traits",
]
[[package]]
name = "ordered-multimap"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79"
dependencies = [
"dlv-list",
"hashbrown 0.14.5",
]
[[package]]
name = "os_info"
version = "3.12.0"
......@@ -5343,12 +5511,6 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "packedvec"
version = "1.2.5"
......@@ -5388,6 +5550,12 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
[[package]]
name = "pathdiff"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
[[package]]
name = "pear"
version = "0.2.9"
......@@ -5799,7 +5967,7 @@ dependencies = [
"rand 0.9.2",
"rand_chacha 0.9.0",
"rand_xorshift",
"regex-syntax 0.8.6",
"regex-syntax",
"rusty-fork",
"tempfile",
"unarray",
......@@ -6284,17 +6452,8 @@ checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata 0.4.10",
"regex-syntax 0.8.6",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
"regex-automata",
"regex-syntax",
]
[[package]]
......@@ -6305,15 +6464,9 @@ checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax 0.8.6",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "regex-syntax"
version = "0.8.6"
......@@ -6470,6 +6623,18 @@ dependencies = [
"serde",
]
[[package]]
name = "ron"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94"
dependencies = [
"base64 0.21.7",
"bitflags 2.9.3",
"serde",
"serde_derive",
]
[[package]]
name = "rstest"
version = "0.18.2"
......@@ -6552,6 +6717,16 @@ dependencies = [
"realfft",
]
[[package]]
name = "rust-ini"
version = "0.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "796e8d2b6696392a43bea58116b667fb4c29727dc5abd27d6acf338bb4f688c7"
dependencies = [
"cfg-if 1.0.3",
"ordered-multimap",
]
[[package]]
name = "rust-mcp-schema"
version = "0.1.11"
......@@ -7034,6 +7209,17 @@ dependencies = [
"serde",
]
[[package]]
name = "serde-untagged"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34836a629bcbc6f1afdf0907a744870039b1e14c0561cb26094fa683b158eff3"
dependencies = [
"erased-serde",
"serde",
"typeid",
]
[[package]]
name = "serde_cbor"
version = "0.11.2"
......@@ -7127,6 +7313,15 @@ dependencies = [
"serde",
]
[[package]]
name = "serde_spanned"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40734c41988f7306bb04f0ecf60ec0f3f1caa34290e4e8ea471dcd3346483b83"
dependencies = [
"serde",
]
[[package]]
name = "serde_urlencoded"
version = "0.7.1"
......@@ -7820,7 +8015,7 @@ dependencies = [
"cfg-expr",
"heck 0.5.0",
"pkg-config",
"toml",
"toml 0.8.23",
"version-compare",
]
......@@ -8058,7 +8253,7 @@ dependencies = [
"rayon",
"rayon-cond",
"regex",
"regex-syntax 0.8.6",
"regex-syntax",
"serde",
"serde_json",
"spm_precompiled",
......@@ -8273,11 +8468,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"serde_spanned 0.6.9",
"toml_datetime 0.6.11",
"toml_edit",
]
[[package]]
name = "toml"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75129e1dc5000bfbaa9fee9d1b21f974f9fbad9daec557a521ee6e080825f6e8"
dependencies = [
"serde",
"serde_spanned 1.0.0",
"toml_datetime 0.7.0",
"toml_parser",
"winnow",
]
[[package]]
name = "toml_datetime"
version = "0.6.11"
......@@ -8287,6 +8495,15 @@ dependencies = [
"serde",
]
[[package]]
name = "toml_datetime"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bade1c3e902f58d73d3f294cd7f20391c1cb2fbcb643b73566bc773971df91e3"
dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.22.27"
......@@ -8295,12 +8512,21 @@ checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
"indexmap 2.11.0",
"serde",
"serde_spanned",
"toml_datetime",
"serde_spanned 0.6.9",
"toml_datetime 0.6.11",
"toml_write",
"winnow",
]
[[package]]
name = "toml_parser"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cf893c33be71572e0e9aa6dd15e6677937abd686b066eac3f8cd3531688a627"
dependencies = [
"winnow",
]
[[package]]
name = "toml_write"
version = "0.1.2"
......@@ -8518,14 +8744,14 @@ dependencies = [
[[package]]
name = "tracing-subscriber"
version = "0.3.19"
version = "0.3.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5"
dependencies = [
"matchers",
"nu-ansi-term",
"once_cell",
"regex",
"regex-automata",
"serde",
"serde_json",
"sharded-slab",
......@@ -9662,6 +9888,17 @@ version = "0.8.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
[[package]]
name = "yaml-rust2"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2462ea039c445496d8793d052e13787f2b90e750b833afee748e601c17621ed9"
dependencies = [
"arraydeque",
"encoding_rs",
"hashlink",
]
[[package]]
name = "yansi"
version = "1.0.1"
......
......@@ -30,7 +30,10 @@ keywords = ["llm", "genai", "inference", "nvidia", "distributed"]
dynamo-runtime = { path = "lib/runtime", version = "0.5.0" }
dynamo-llm = { path = "lib/llm", version = "0.5.0" }
dynamo-tokens = { path = "lib/tokens", version = "0.5.0" }
dynamo-async-openai = { path = "lib/async-openai", version = "0.5.0", features = ["byot", "rustls"]}
dynamo-async-openai = { path = "lib/async-openai", version = "0.5.0", features = [
"byot",
"rustls",
] }
dynamo-parsers = { path = "lib/parsers", version = "0.5.0" }
# External dependencies
......@@ -41,19 +44,38 @@ async-trait = { version = "0.1" }
async_zmq = { version = "0.4.0" }
blake3 = { version = "1" }
bytes = { version = "1" }
chrono = { version = "0.4", default-features = false, features = ["alloc", "std", "clock", "now", "serde"] }
chrono = { version = "0.4", default-features = false, features = [
"alloc",
"std",
"clock",
"now",
"serde",
] }
derive_builder = { version = "0.20" }
derive-getters = { version = "0.5" }
either = { version = "1.13", features = ["serde"] }
etcd-client = { version = "0.16", features = ["tls"] }
futures = { version = "0.3" }
hf-hub = { version = "0.4.2", default-features = false, features = ["tokio", "rustls-tls", "ureq"] }
hf-hub = { version = "0.4.2", default-features = false, features = [
"tokio",
"rustls-tls",
"ureq",
] }
# ModelExpress for model downloading
modelexpress-client = { git = "https://github.com/ai-dynamo/modelexpress.git", rev = "a232220bf268a475d293914d407f4ae186f443e3", package = "modelexpress-client" }
modelexpress-common = { git = "https://github.com/ai-dynamo/modelexpress.git", rev = "a232220bf268a475d293914d407f4ae186f443e3", package = "modelexpress-common" }
humantime = { version = "2.2.0" }
libc = { version = "0.2" }
oneshot = { version = "0.1.11", features = ["std", "async"] }
prometheus = { version = "0.14" }
rand = { version = "0.9.0" }
reqwest = { version = "0.12.22", default-features = false, features = ["json", "stream", "rustls-tls"] }
reqwest = { version = "0.12.22", default-features = false, features = [
"json",
"stream",
"rustls-tls",
] }
serde = { version = "1", features = ["derive"] }
serde_json = { version = "1" }
strum = { version = "0.27", features = ["derive"] }
......@@ -62,13 +84,17 @@ thiserror = { version = "2.0.11" }
tokio = { version = "1", features = ["full"] }
tokio-stream = { version = "0.1" }
tokio-util = { version = "0.7", features = ["codec", "net", "rt"] }
tower-http = {version = "0.6", features=["trace"]}
axum = { version = "0.8" , features = ["macros"]}
tower-http = { version = "0.6", features = ["trace"] }
axum = { version = "0.8", features = ["macros"] }
tracing = { version = "0.1" }
tracing-subscriber = { version = "0.3", features = ["env-filter", "local-time", "json"] }
tracing-subscriber = { version = "0.3", features = [
"env-filter",
"local-time",
"json",
] }
validator = { version = "0.20.0", features = ["derive"] }
uuid = { version = "1.17", features = ["v4", "serde"] }
url = {version = "2.5", features = ["serde"]}
url = { version = "2.5", features = ["serde"] }
xxhash-rust = { version = "0.8", features = ["xxh3", "const_xxh3"] }
[profile.dev.package]
......
......@@ -290,10 +290,28 @@ ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
# Install system dependencies
ARG PYTHON_VERSION
RUN dnf update -y \
&& dnf install -y llvm-toolset protobuf-compiler python${PYTHON_VERSION}-devel wget \
&& dnf install -y llvm-toolset protobuf-compiler python${PYTHON_VERSION}-devel wget unzip \
&& dnf clean all \
&& rm -rf /var/cache/dnf
# Ensure a modern protoc is available (required for --experimental_allow_proto3_optional)
RUN set -eux; \
PROTOC_VERSION=25.3; \
case "${ARCH_ALT}" in \
x86_64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-x86_64.zip" ;; \
aarch64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-aarch_64.zip" ;; \
*) echo "Unsupported architecture: ${ARCH_ALT}" >&2; exit 1 ;; \
esac; \
wget --tries=3 --waitretry=5 -O /tmp/protoc.zip "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/${PROTOC_ZIP}"; \
rm -f /usr/local/bin/protoc /usr/bin/protoc; \
unzip -o /tmp/protoc.zip -d /usr/local bin/protoc include/*; \
chmod +x /usr/local/bin/protoc; \
ln -s /usr/local/bin/protoc /usr/bin/protoc; \
protoc --version
# Point build tools explicitly at the modern protoc
ENV PROTOC=/usr/local/bin/protoc
# Copy artifacts from base stage
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[package]
name = "dynamo-llm"
......@@ -36,8 +24,6 @@ testing-etcd = []
block-manager = ["dep:nixl-sys", "dep:cudarc", "dep:ndarray", "dep:nix"]
cuda = ["dep:cudarc"]
integration = ["dynamo-runtime/integration"]
# NOTE: This feature will be enabled once ModelExpress packages are published
# model-express = ["dep:model_express_client", "dep:model_express_common"]
[[bench]]
name = "tokenizer"
......@@ -55,14 +41,14 @@ dynamo-runtime = { workspace = true }
aho-corasick = "1.1"
anyhow = { workspace = true }
dynamo-async-openai = { workspace = true }
dynamo-parsers = { workspace = true}
dynamo-parsers = { workspace = true }
async-stream = { workspace = true }
async-trait = { workspace = true }
async-nats = { workspace = true }
async_zmq = { workspace = true }
bytes = { workspace = true }
chrono = { workspace = true }
derive_builder = {workspace = true }
derive_builder = { workspace = true }
either = { workspace = true }
etcd-client = { workspace = true }
futures = { workspace = true }
......@@ -87,15 +73,11 @@ validator = { workspace = true }
url = { workspace = true }
uuid = { workspace = true }
xxhash-rust = { workspace = true }
# ModelExpress client for model downloading
# NOTE: These will be uncommented once the packages are published to crates.io
# model_express_client = { version = "0.1.0", optional = true }
# model_express_common = { version = "0.1.0", optional = true }
modelexpress-client = { workspace = true }
modelexpress-common = { workspace = true }
akin = "0.4.0"
bitflags = { version = "2.4", features = ["serde"] }
blake3 = { version = "1.8", features=["mmap", "rayon"] }
blake3 = { version = "1.8", features = ["mmap", "rayon"] }
bytemuck = "1.22"
candle-core = { version = "0.9.1" }
derive-getters = "0.5"
......@@ -105,10 +87,13 @@ rayon = "1"
dashmap = { version = "5.5.3" }
# input/text
dialoguer = { version = "0.11", default-features = false, features = ["editor", "history"] }
dialoguer = { version = "0.11", default-features = false, features = [
"editor",
"history",
] }
# block_manager
nixl-sys = {version = "0.4.1", optional = true }
nixl-sys = { version = "0.4.1", optional = true }
cudarc = { version = "0.17.1", features = ["cuda-12020"], optional = true }
ndarray = { version = "0.16", optional = true }
nix = { version = "0.26", optional = true }
......@@ -119,7 +104,7 @@ unicode-segmentation = "1.12"
# http-service
axum = { workspace = true }
axum-server = { version = "0.7", features = ["tls-rustls"] }
tower-http = {workspace = true}
tower-http = { workspace = true }
rustls = { version = "0.23" }
......@@ -164,6 +149,8 @@ approx = "0.5"
assert_matches = "1.5"
criterion = { version = "0.3", features = ["html_reports"] }
hf-hub = { workspace = true }
modelexpress-client = { workspace = true }
modelexpress-common = { workspace = true }
proptest = "1.5.0"
reqwest = { workspace = true }
rstest = "0.18.2"
......@@ -181,4 +168,4 @@ aligned-vec = "0.6.4"
lazy_static = "1.4"
[build-dependencies]
tonic-build = { version = "0.13.1"}
tonic-build = { version = "0.13.1" }
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
#![allow(unexpected_cfgs)]
use hf_hub::api::tokio::ApiBuilder;
use std::env;
use std::path::{Path, PathBuf};
#[cfg(feature = "model-express")]
use model_express_client::{
use modelexpress_client::{
Client as MxClient, ClientConfig as MxClientConfig, ModelProvider as MxModelProvider,
};
#[cfg(feature = "model-express")]
use model_express_common::download as mx;
use modelexpress_common::download as mx;
/// Example: export MODEL_EXPRESS_URL=http://localhost:8001
const MODEL_EXPRESS_ENDPOINT_ENV_VAR: &str = "MODEL_EXPRESS_URL";
const HF_TOKEN_ENV_VAR: &str = "HF_TOKEN";
/// Checks if a file is a model weight file
fn is_weight_file(filename: &str) -> bool {
filename.ends_with(".bin")
|| filename.ends_with(".safetensors")
|| filename.ends_with(".h5")
|| filename.ends_with(".msgpack")
|| filename.ends_with(".ckpt.index")
}
/// Attempt to download a model from Hugging Face using ModelExpress client
/// Only called when model-express feature is enabled, otherwise it will fall back to homonymous hf-hub function
/// Returns the directory it is in
/// Download a model using ModelExpress client. The client first requests for the model
/// from the server and fallbacks to direct download in case of server failure.
/// If ignore_weights is true, model weight files will be skipped
#[cfg(feature = "model-express")]
/// Returns the path to the model files
pub async fn from_hf(name: impl AsRef<Path>, ignore_weights: bool) -> anyhow::Result<PathBuf> {
let name = name.as_ref();
let model_name = name.display().to_string();
// Only use ModelExpress if the environment variable is explicitly set
let mut config: MxClientConfig = MxClientConfig::default();
if let Ok(endpoint) = env::var(MODEL_EXPRESS_ENDPOINT_ENV_VAR) {
tracing::info!(
"ModelExpress endpoint configured, attempting to use ModelExpress for model: {model_name}"
);
let config: MxClientConfig = MxClientConfig::default().with_endpoint(endpoint.clone());
config = config.with_endpoint(endpoint);
}
let result = match MxClient::new(config.clone()).await {
let result = match MxClient::new(config).await {
Ok(mut client) => {
tracing::info!("Successfully connected to ModelExpress server");
match client
.request_model_with_provider_and_fallback(
&model_name,
MxModelProvider::HuggingFace,
ignore_weights,
)
.await
{
Ok(()) => {
tracing::info!("Server download succeeded for model: {model_name}");
get_mx_model_path_from_cache(&model_name)
}
match client.get_model_path(&model_name).await {
Ok(path) => Ok(path),
Err(e) => {
tracing::warn!(
"Server download failed for model '{model_name}': {e}. Falling back to direct download."
"Failed to resolve local model path after server download for '{model_name}': {e}. \
Falling back to direct download."
);
mx_download_direct(&model_name).await
}
}
mx_download_direct(&model_name, ignore_weights).await
}
Err(e) => {
tracing::warn!(
"Cannot connect to ModelExpress server: {e}. Using direct download."
);
mx_download_direct(&model_name).await
}
};
match result {
Ok(path) => {
tracing::info!(
"ModelExpress download completed successfully for model: {model_name}"
);
return Ok(path);
}
Err(e) => {
tracing::warn!(
"ModelExpress download failed for model '{model_name}': {e}. Falling back to hf-hub."
"Server download failed for model '{model_name}': {e}. Falling back to direct download."
);
mx_download_direct(&model_name, ignore_weights).await
}
}
}
tracing::info!("Using hf-hub for model: {model_name}");
download_with_hf_hub(&model_name, ignore_weights).await
}
/// Attempt to download a model from Hugging Face using hf-hub directly
/// Called when model-express feature is not enabled
/// Returns the directory it is in
/// If ignore_weights is true, model weight files will be skipped
#[cfg(not(feature = "model-express"))]
pub async fn from_hf(name: impl AsRef<Path>, ignore_weights: bool) -> anyhow::Result<PathBuf> {
let name = name.as_ref();
let model_name = name.display().to_string();
if env::var(MODEL_EXPRESS_ENDPOINT_ENV_VAR).is_ok() {
tracing::warn!(
"ModelExpress endpoint configured but model-express feature not enabled. Using hf-hub."
);
}
tracing::info!("Using hf-hub for model: {model_name}");
download_with_hf_hub(&model_name, ignore_weights).await
}
// Direct download using the ModelExpress client.
#[cfg(feature = "model-express")]
async fn mx_download_direct(model_name: &str) -> anyhow::Result<PathBuf> {
let cache_dir = get_model_express_cache_dir();
mx::download_model(model_name, MxModelProvider::HuggingFace, Some(cache_dir)).await
}
/// Attempt to download a model from Hugging Face with hf-hub
/// Returns the directory it is in
/// If ignore_weights is true, model weight files will be skipped
async fn download_with_hf_hub(model_name: &str, ignore_weights: bool) -> anyhow::Result<PathBuf> {
let token = env::var(HF_TOKEN_ENV_VAR).ok();
let api = ApiBuilder::from_env()
.with_progress(true)
.with_token(token)
.high()
.build()?;
let repo = api.model(model_name.to_string());
let info = repo.info().await
.map_err(|e| anyhow::anyhow!("Failed to fetch model '{model_name}' from HuggingFace: {e}. Is this a valid HuggingFace ID?"))?;
if info.siblings.is_empty() {
return Err(anyhow::anyhow!(
"Model '{model_name}' exists but contains no downloadable files."
));
}
let mut model_path = PathBuf::new();
let mut files_downloaded = false;
for sibling in info.siblings {
if is_ignored_file(&sibling.rfilename) || is_image_file(&sibling.rfilename) {
continue;
}
if ignore_weights && is_weight_file(&sibling.rfilename) {
continue;
Err(e) => {
tracing::warn!("Cannot connect to ModelExpress server: {e}. Using direct download.");
mx_download_direct(&model_name, ignore_weights).await
}
};
match repo.get(&sibling.rfilename).await {
match result {
Ok(path) => {
model_path = path;
files_downloaded = true;
tracing::info!("ModelExpress download completed successfully for model: {model_name}");
Ok(path)
}
Err(e) => {
return Err(anyhow::anyhow!(
"Failed to download file '{}' from model '{model_name}': {e}",
sibling.rfilename
));
tracing::warn!("ModelExpress download failed for model '{model_name}': {e}");
Err(e)
}
}
}
if !files_downloaded {
let file_type = if ignore_weights {
"non-weight"
} else {
"valid"
};
return Err(anyhow::anyhow!(
"No {file_type} files found for model '{model_name}'."
));
}
match model_path.parent() {
Some(path) => Ok(path.to_path_buf()),
None => Err(anyhow::anyhow!(
"Invalid HF cache path: {}",
model_path.display()
)),
}
}
fn is_ignored_file(filename: &str) -> bool {
const IGNORED_FILES: [&str; 5] = [
".gitattributes",
"LICENSE",
"LICENSE.txt",
"README.md",
"USE_POLICY.md",
];
IGNORED_FILES.contains(&filename)
}
fn is_image_file(filename: &str) -> bool {
filename.ends_with(".png")
|| filename.ends_with("PNG")
|| filename.ends_with(".jpg")
|| filename.ends_with("JPG")
|| filename.ends_with(".jpeg")
|| filename.ends_with("JPEG")
}
#[cfg(feature = "model-express")]
fn get_mx_model_path_from_cache(model_name: &str) -> anyhow::Result<PathBuf> {
// Direct download using the ModelExpress client.
async fn mx_download_direct(model_name: &str, ignore_weights: bool) -> anyhow::Result<PathBuf> {
let cache_dir = get_model_express_cache_dir();
let model_dir = cache_dir.join(model_name);
if !model_dir.exists() {
return Err(anyhow::anyhow!(
"Model '{model_name}' was downloaded but directory not found at expected location: {}",
model_dir.display()
));
}
Ok(model_dir)
mx::download_model(
model_name,
MxModelProvider::HuggingFace,
Some(cache_dir),
ignore_weights,
)
.await
}
#[cfg(feature = "model-express")]
// TODO: remove in the future. This is a temporary workaround to find common
// cache directory between client and server.
fn get_model_express_cache_dir() -> PathBuf {
if let Ok(cache_path) = env::var("HF_HUB_CACHE") {
return PathBuf::from(cache_path);
}
if let Ok(cache_path) = env::var("MODEL_EXPRESS_PATH") {
if let Ok(cache_path) = env::var("MODEL_EXPRESS_CACHE_PATH") {
return PathBuf::from(cache_path);
}
let home = env::var("HOME")
......@@ -248,52 +114,10 @@ mod tests {
let _result: anyhow::Result<PathBuf> = from_hf(test_path, false).await;
}
#[cfg(feature = "model-express")]
#[test]
fn test_get_model_express_cache_dir() {
let cache_dir = get_model_express_cache_dir();
assert!(!cache_dir.to_string_lossy().is_empty());
assert!(cache_dir.is_absolute() || cache_dir.starts_with("."));
}
#[test]
fn test_is_ignored_file() {
assert!(is_ignored_file(".gitattributes"));
assert!(is_ignored_file("LICENSE"));
assert!(is_ignored_file("LICENSE.txt"));
assert!(is_ignored_file("README.md"));
assert!(is_ignored_file("USE_POLICY.md"));
assert!(!is_ignored_file("model.bin"));
assert!(!is_ignored_file("tokenizer.json"));
assert!(!is_ignored_file("config.json"));
}
#[test]
fn test_is_weight_file() {
assert!(is_weight_file("model.bin"));
assert!(is_weight_file("model.safetensors"));
assert!(is_weight_file("model.h5"));
assert!(is_weight_file("model.msgpack"));
assert!(is_weight_file("model.ckpt.index"));
assert!(!is_weight_file("tokenizer.json"));
assert!(!is_weight_file("config.json"));
assert!(!is_weight_file("README.md"));
}
#[test]
fn test_is_image_file() {
assert!(is_image_file("image.png"));
assert!(is_image_file("image.PNG"));
assert!(is_image_file("photo.jpg"));
assert!(is_image_file("photo.JPG"));
assert!(is_image_file("picture.jpeg"));
assert!(is_image_file("picture.JPEG"));
assert!(!is_image_file("model.bin"));
assert!(!is_image_file("tokenizer.json"));
assert!(!is_image_file("config.json"));
assert!(!is_image_file("README.md"));
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment