Unverified Commit 6a0e67ed authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

feat: add LoRA common APIs and implementation for lora management (#4464)

parent bbaab9f7
...@@ -567,9 +567,9 @@ dependencies = [ ...@@ -567,9 +567,9 @@ dependencies = [
[[package]] [[package]]
name = "aws-lc-rs" name = "aws-lc-rs"
version = "1.15.0" version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5932a7d9d28b0d2ea34c6b3779d35e3dd6f6345317c34e73438c4f1f29144151" checksum = "6b5ce75405893cd713f9ab8e297d8e438f624dde7d706108285f7e17a25a180f"
dependencies = [ dependencies = [
"aws-lc-sys", "aws-lc-sys",
"zeroize", "zeroize",
...@@ -577,11 +577,10 @@ dependencies = [ ...@@ -577,11 +577,10 @@ dependencies = [
[[package]] [[package]]
name = "aws-lc-sys" name = "aws-lc-sys"
version = "0.33.0" version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1826f2e4cfc2cd19ee53c42fbf68e2f81ec21108e0b7ecf6a71cf062137360fc" checksum = "179c3777a8b5e70e90ea426114ffc565b2c1a9f82f6c4a0c5a34aa6ef5e781b6"
dependencies = [ dependencies = [
"bindgen 0.72.1",
"cc", "cc",
"cmake", "cmake",
"dunce", "dunce",
...@@ -851,26 +850,6 @@ dependencies = [ ...@@ -851,26 +850,6 @@ dependencies = [
"syn 2.0.110", "syn 2.0.110",
] ]
[[package]]
name = "bindgen"
version = "0.72.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
dependencies = [
"bitflags 2.10.0",
"cexpr",
"clang-sys",
"itertools 0.13.0",
"log",
"prettyplease",
"proc-macro2",
"quote",
"regex",
"rustc-hash 2.1.1",
"shlex",
"syn 2.0.110",
]
[[package]] [[package]]
name = "bindgen_cuda" name = "bindgen_cuda"
version = "0.1.5" version = "0.1.5"
...@@ -1321,9 +1300,9 @@ version = "0.27.0" ...@@ -1321,9 +1300,9 @@ version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fce8dd7fcfcbf3a0a87d8f515194b49d6135acab73e18bd380d1d93bb1a15eb" checksum = "3fce8dd7fcfcbf3a0a87d8f515194b49d6135acab73e18bd380d1d93bb1a15eb"
dependencies = [ dependencies = [
"clap 4.5.52", "clap 4.5.53",
"heck 0.4.1", "heck 0.4.1",
"indexmap 2.12.0", "indexmap 2.12.1",
"log", "log",
"proc-macro2", "proc-macro2",
"quote", "quote",
...@@ -1336,9 +1315,9 @@ dependencies = [ ...@@ -1336,9 +1315,9 @@ dependencies = [
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.2.46" version = "1.2.47"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36" checksum = "cd405d82c84ff7f35739f175f67d8b9fb7687a0e84ccdc78bd3568839827cf07"
dependencies = [ dependencies = [
"find-msvc-tools", "find-msvc-tools",
"jobserver", "jobserver",
...@@ -1389,7 +1368,7 @@ version = "0.13.10" ...@@ -1389,7 +1368,7 @@ version = "0.13.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf" checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf"
dependencies = [ dependencies = [
"indexmap 2.12.0", "indexmap 2.12.1",
"lazy_static", "lazy_static",
"num-traits", "num-traits",
"regex", "regex",
...@@ -1497,9 +1476,9 @@ dependencies = [ ...@@ -1497,9 +1476,9 @@ dependencies = [
[[package]] [[package]]
name = "clap" name = "clap"
version = "4.5.52" version = "4.5.53"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa8120877db0e5c011242f96806ce3c94e0737ab8108532a76a3300a01db2ab8" checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8"
dependencies = [ dependencies = [
"clap_builder", "clap_builder",
"clap_derive", "clap_derive",
...@@ -1507,9 +1486,9 @@ dependencies = [ ...@@ -1507,9 +1486,9 @@ dependencies = [
[[package]] [[package]]
name = "clap_builder" name = "clap_builder"
version = "4.5.52" version = "4.5.53"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02576b399397b659c26064fbc92a75fede9d18ffd5f80ca1cd74ddab167016e1" checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00"
dependencies = [ dependencies = [
"anstream", "anstream",
"anstyle", "anstyle",
...@@ -1808,7 +1787,7 @@ dependencies = [ ...@@ -1808,7 +1787,7 @@ dependencies = [
"anes", "anes",
"cast", "cast",
"ciborium", "ciborium",
"clap 4.5.52", "clap 4.5.53",
"criterion-plot 0.5.0", "criterion-plot 0.5.0",
"futures", "futures",
"is-terminal", "is-terminal",
...@@ -2594,7 +2573,7 @@ dependencies = [ ...@@ -2594,7 +2573,7 @@ dependencies = [
"axum 0.8.4", "axum 0.8.4",
"blake2", "blake2",
"bytes", "bytes",
"clap 4.5.52", "clap 4.5.53",
"dashmap 6.1.0", "dashmap 6.1.0",
"derive_builder", "derive_builder",
"etcd-client", "etcd-client",
...@@ -2638,7 +2617,7 @@ dependencies = [ ...@@ -2638,7 +2617,7 @@ dependencies = [
"dynamo-llm", "dynamo-llm",
"dynamo-runtime", "dynamo-runtime",
"either", "either",
"indexmap 2.12.0", "indexmap 2.12.1",
"mistralrs", "mistralrs",
"serde_json", "serde_json",
"tokio", "tokio",
...@@ -2706,6 +2685,7 @@ dependencies = [ ...@@ -2706,6 +2685,7 @@ dependencies = [
"ndarray-npy", "ndarray-npy",
"nix 0.26.4", "nix 0.26.4",
"nixl-sys", "nixl-sys",
"object_store",
"offset-allocator", "offset-allocator",
"oneshot", "oneshot",
"parking_lot", "parking_lot",
...@@ -2793,7 +2773,7 @@ dependencies = [ ...@@ -2793,7 +2773,7 @@ dependencies = [
"anyhow", "anyhow",
"async-stream", "async-stream",
"async-trait", "async-trait",
"clap 4.5.52", "clap 4.5.53",
"dynamo-async-openai", "dynamo-async-openai",
"dynamo-engine-mistralrs", "dynamo-engine-mistralrs",
"dynamo-llm", "dynamo-llm",
...@@ -3893,9 +3873,9 @@ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" ...@@ -3893,9 +3873,9 @@ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]] [[package]]
name = "globset" name = "globset"
version = "0.4.18" version = "0.4.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" checksum = "54a1028dfc5f5df5da8a56a73e6c153c9a9708ec57232470703592a3f18e49f5"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"bstr", "bstr",
...@@ -3928,7 +3908,7 @@ dependencies = [ ...@@ -3928,7 +3908,7 @@ dependencies = [
"futures-sink", "futures-sink",
"futures-util", "futures-util",
"http 0.2.12", "http 0.2.12",
"indexmap 2.12.0", "indexmap 2.12.1",
"slab", "slab",
"tokio", "tokio",
"tokio-util", "tokio-util",
...@@ -3947,7 +3927,7 @@ dependencies = [ ...@@ -3947,7 +3927,7 @@ dependencies = [
"futures-core", "futures-core",
"futures-sink", "futures-sink",
"http 1.3.1", "http 1.3.1",
"indexmap 2.12.0", "indexmap 2.12.1",
"slab", "slab",
"tokio", "tokio",
"tokio-util", "tokio-util",
...@@ -4003,9 +3983,9 @@ dependencies = [ ...@@ -4003,9 +3983,9 @@ dependencies = [
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.16.0" version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
[[package]] [[package]]
name = "hashlink" name = "hashlink"
...@@ -4677,12 +4657,12 @@ dependencies = [ ...@@ -4677,12 +4657,12 @@ dependencies = [
[[package]] [[package]]
name = "indexmap" name = "indexmap"
version = "2.12.0" version = "2.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
dependencies = [ dependencies = [
"equivalent", "equivalent",
"hashbrown 0.16.0", "hashbrown 0.16.1",
"serde", "serde",
"serde_core", "serde_core",
] ]
...@@ -4751,9 +4731,9 @@ dependencies = [ ...@@ -4751,9 +4731,9 @@ dependencies = [
[[package]] [[package]]
name = "insta" name = "insta"
version = "1.43.2" version = "1.44.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46fdb647ebde000f43b5b53f773c30cf9b0cb4300453208713fa38b2c70935a0" checksum = "e8732d3774162a0851e3f2b150eb98f31a9885dd75985099421d393385a01dfd"
dependencies = [ dependencies = [
"console", "console",
"globset", "globset",
...@@ -5043,7 +5023,7 @@ dependencies = [ ...@@ -5043,7 +5023,7 @@ dependencies = [
"anyhow", "anyhow",
"base64 0.21.7", "base64 0.21.7",
"bytecount", "bytecount",
"clap 4.5.52", "clap 4.5.53",
"fancy-regex 0.11.0", "fancy-regex 0.11.0",
"fraction", "fraction",
"getrandom 0.2.16", "getrandom 0.2.16",
...@@ -5657,7 +5637,7 @@ source = "git+https://github.com/guidance-ai/llguidance.git?rev=c432092#c432092d ...@@ -5657,7 +5637,7 @@ source = "git+https://github.com/guidance-ai/llguidance.git?rev=c432092#c432092d
dependencies = [ dependencies = [
"anyhow", "anyhow",
"derivre", "derivre",
"indexmap 2.12.0", "indexmap 2.12.1",
"regex-syntax", "regex-syntax",
"serde", "serde",
"serde_json", "serde_json",
...@@ -5905,6 +5885,16 @@ dependencies = [ ...@@ -5905,6 +5885,16 @@ dependencies = [
"rayon", "rayon",
] ]
[[package]]
name = "md-5"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
dependencies = [
"cfg-if 1.0.4",
"digest",
]
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.7.6" version = "2.7.6"
...@@ -6082,11 +6072,11 @@ dependencies = [ ...@@ -6082,11 +6072,11 @@ dependencies = [
"anyhow", "anyhow",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=175926c9)", "candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=175926c9)",
"candle-nn", "candle-nn",
"clap 4.5.52", "clap 4.5.53",
"either", "either",
"futures", "futures",
"image", "image",
"indexmap 2.12.0", "indexmap 2.12.1",
"mistralrs-core", "mistralrs-core",
"rand 0.9.2", "rand 0.9.2",
"reqwest 0.12.24", "reqwest 0.12.24",
...@@ -6129,7 +6119,7 @@ dependencies = [ ...@@ -6129,7 +6119,7 @@ dependencies = [
"candle-nn", "candle-nn",
"cfgrammar", "cfgrammar",
"chrono", "chrono",
"clap 4.5.52", "clap 4.5.53",
"csv", "csv",
"derive-new", "derive-new",
"derive_more 2.0.1", "derive_more 2.0.1",
...@@ -6145,7 +6135,7 @@ dependencies = [ ...@@ -6145,7 +6135,7 @@ dependencies = [
"html2text", "html2text",
"http 1.3.1", "http 1.3.1",
"image", "image",
"indexmap 2.12.0", "indexmap 2.12.1",
"indicatif", "indicatif",
"interprocess", "interprocess",
"itertools 0.14.0", "itertools 0.14.0",
...@@ -6306,7 +6296,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -6306,7 +6296,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9462ec6cd8b3d6beaa262ad0907a8ba297c7e3a220c11e4159742d8c275588eb" checksum = "9462ec6cd8b3d6beaa262ad0907a8ba297c7e3a220c11e4159742d8c275588eb"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"clap 4.5.52", "clap 4.5.53",
"colored", "colored",
"futures", "futures",
"modelexpress-common", "modelexpress-common",
...@@ -6330,7 +6320,7 @@ dependencies = [ ...@@ -6330,7 +6320,7 @@ dependencies = [
"anyhow", "anyhow",
"async-trait", "async-trait",
"chrono", "chrono",
"clap 4.5.52", "clap 4.5.53",
"config", "config",
"hf-hub", "hf-hub",
"jiff", "jiff",
...@@ -6699,7 +6689,7 @@ version = "0.7.1" ...@@ -6699,7 +6689,7 @@ version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d80bd4b5b8363cfd933000a8757a453e58ee10ee6e400c38ae31db512444a31" checksum = "6d80bd4b5b8363cfd933000a8757a453e58ee10ee6e400c38ae31db512444a31"
dependencies = [ dependencies = [
"bindgen 0.71.1", "bindgen",
"cc", "cc",
"libc", "libc",
"os_info", "os_info",
...@@ -7138,6 +7128,44 @@ dependencies = [ ...@@ -7138,6 +7128,44 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "object_store"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740"
dependencies = [
"async-trait",
"base64 0.22.1",
"bytes",
"chrono",
"form_urlencoded",
"futures",
"http 1.3.1",
"http-body-util",
"httparse",
"humantime",
"hyper 1.8.1",
"itertools 0.14.0",
"md-5",
"parking_lot",
"percent-encoding",
"quick-xml",
"rand 0.9.2",
"reqwest 0.12.24",
"ring",
"rustls-pemfile",
"serde",
"serde_json",
"serde_urlencoded",
"thiserror 2.0.17",
"tokio",
"tracing",
"url",
"walkdir",
"wasm-bindgen-futures",
"web-time",
]
[[package]] [[package]]
name = "offset-allocator" name = "offset-allocator"
version = "0.2.0" version = "0.2.0"
...@@ -7222,7 +7250,7 @@ dependencies = [ ...@@ -7222,7 +7250,7 @@ dependencies = [
"anyhow", "anyhow",
"base64 0.22.1", "base64 0.22.1",
"bstr", "bstr",
"clap 4.5.52", "clap 4.5.53",
"fancy-regex 0.13.0", "fancy-regex 0.13.0",
"futures", "futures",
"image", "image",
...@@ -7549,9 +7577,9 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" ...@@ -7549,9 +7577,9 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]] [[package]]
name = "pest" name = "pest"
version = "2.8.3" version = "2.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "989e7521a040efde50c3ab6bbadafbe15ab6dc042686926be59ac35d74607df4" checksum = "cbcfd20a6d4eeba40179f05735784ad32bdaef05ce8e8af05f180d45bb3e7e22"
dependencies = [ dependencies = [
"memchr", "memchr",
"ucd-trie", "ucd-trie",
...@@ -7559,9 +7587,9 @@ dependencies = [ ...@@ -7559,9 +7587,9 @@ dependencies = [
[[package]] [[package]]
name = "pest_derive" name = "pest_derive"
version = "2.8.3" version = "2.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "187da9a3030dbafabbbfb20cb323b976dc7b7ce91fcd84f2f74d6e31d378e2de" checksum = "51f72981ade67b1ca6adc26ec221be9f463f2b5839c7508998daa17c23d94d7f"
dependencies = [ dependencies = [
"pest", "pest",
"pest_generator", "pest_generator",
...@@ -7569,9 +7597,9 @@ dependencies = [ ...@@ -7569,9 +7597,9 @@ dependencies = [
[[package]] [[package]]
name = "pest_generator" name = "pest_generator"
version = "2.8.3" version = "2.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49b401d98f5757ebe97a26085998d6c0eecec4995cad6ab7fc30ffdf4b052843" checksum = "dee9efd8cdb50d719a80088b76f81aec7c41ed6d522ee750178f83883d271625"
dependencies = [ dependencies = [
"pest", "pest",
"pest_meta", "pest_meta",
...@@ -7582,9 +7610,9 @@ dependencies = [ ...@@ -7582,9 +7610,9 @@ dependencies = [
[[package]] [[package]]
name = "pest_meta" name = "pest_meta"
version = "2.8.3" version = "2.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72f27a2cfee9f9039c4d86faa5af122a0ac3851441a34865b8a043b46be0065a" checksum = "bf1d70880e76bdc13ba52eafa6239ce793d85c8e43896507e43dd8984ff05b82"
dependencies = [ dependencies = [
"pest", "pest",
"sha2", "sha2",
...@@ -7597,7 +7625,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -7597,7 +7625,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
dependencies = [ dependencies = [
"fixedbitset", "fixedbitset",
"indexmap 2.12.0", "indexmap 2.12.1",
] ]
[[package]] [[package]]
...@@ -8185,6 +8213,16 @@ dependencies = [ ...@@ -8185,6 +8213,16 @@ dependencies = [
"unsigned-varint 0.8.0", "unsigned-varint 0.8.0",
] ]
[[package]]
name = "quick-xml"
version = "0.38.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
dependencies = [
"memchr",
"serde",
]
[[package]] [[package]]
name = "quinn" name = "quinn"
version = "0.11.9" version = "0.11.9"
...@@ -9496,7 +9534,7 @@ version = "1.0.145" ...@@ -9496,7 +9534,7 @@ version = "1.0.145"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
dependencies = [ dependencies = [
"indexmap 2.12.0", "indexmap 2.12.1",
"itoa", "itoa",
"memchr", "memchr",
"ryu", "ryu",
...@@ -9584,7 +9622,7 @@ dependencies = [ ...@@ -9584,7 +9622,7 @@ dependencies = [
"chrono", "chrono",
"hex", "hex",
"indexmap 1.9.3", "indexmap 1.9.3",
"indexmap 2.12.0", "indexmap 2.12.1",
"schemars 0.9.0", "schemars 0.9.0",
"schemars 1.1.0", "schemars 1.1.0",
"serde_core", "serde_core",
...@@ -9611,7 +9649,7 @@ version = "0.9.34+deprecated" ...@@ -9611,7 +9649,7 @@ version = "0.9.34+deprecated"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
dependencies = [ dependencies = [
"indexmap 2.12.0", "indexmap 2.12.1",
"itoa", "itoa",
"ryu", "ryu",
"serde", "serde",
...@@ -10835,7 +10873,7 @@ version = "0.22.27" ...@@ -10835,7 +10873,7 @@ version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [ dependencies = [
"indexmap 2.12.0", "indexmap 2.12.1",
"serde", "serde",
"serde_spanned 0.6.9", "serde_spanned 0.6.9",
"toml_datetime 0.6.11", "toml_datetime 0.6.11",
...@@ -10849,7 +10887,7 @@ version = "0.23.7" ...@@ -10849,7 +10887,7 @@ version = "0.23.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d"
dependencies = [ dependencies = [
"indexmap 2.12.0", "indexmap 2.12.1",
"toml_datetime 0.7.3", "toml_datetime 0.7.3",
"toml_parser", "toml_parser",
"winnow", "winnow",
...@@ -11040,7 +11078,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" ...@@ -11040,7 +11078,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
dependencies = [ dependencies = [
"futures-core", "futures-core",
"futures-util", "futures-util",
"indexmap 2.12.0", "indexmap 2.12.1",
"pin-project-lite", "pin-project-lite",
"slab", "slab",
"sync_wrapper 1.0.2", "sync_wrapper 1.0.2",
...@@ -11637,7 +11675,7 @@ version = "5.4.0" ...@@ -11637,7 +11675,7 @@ version = "5.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fcc29c80c21c31608227e0912b2d7fddba57ad76b606890627ba8ee7964e993" checksum = "2fcc29c80c21c31608227e0912b2d7fddba57ad76b606890627ba8ee7964e993"
dependencies = [ dependencies = [
"indexmap 2.12.0", "indexmap 2.12.1",
"serde", "serde",
"serde_json", "serde_json",
"utoipa-gen", "utoipa-gen",
...@@ -12623,18 +12661,18 @@ dependencies = [ ...@@ -12623,18 +12661,18 @@ dependencies = [
[[package]] [[package]]
name = "zerocopy" name = "zerocopy"
version = "0.8.27" version = "0.8.28"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" checksum = "43fa6694ed34d6e57407afbccdeecfa268c470a7d2a5b0cf49ce9fcc345afb90"
dependencies = [ dependencies = [
"zerocopy-derive", "zerocopy-derive",
] ]
[[package]] [[package]]
name = "zerocopy-derive" name = "zerocopy-derive"
version = "0.8.27" version = "0.8.28"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" checksum = "c640b22cd9817fae95be82f0d2f90b11f7605f6c319d16705c459b27ac2cbc26"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
...@@ -12762,7 +12800,7 @@ dependencies = [ ...@@ -12762,7 +12800,7 @@ dependencies = [
"crc32fast", "crc32fast",
"crossbeam-utils", "crossbeam-utils",
"displaydoc", "displaydoc",
"indexmap 2.12.0", "indexmap 2.12.1",
"num_enum", "num_enum",
"thiserror 1.0.69", "thiserror 1.0.69",
] ]
...@@ -12778,7 +12816,7 @@ dependencies = [ ...@@ -12778,7 +12816,7 @@ dependencies = [
"crossbeam-utils", "crossbeam-utils",
"displaydoc", "displaydoc",
"flate2", "flate2",
"indexmap 2.12.0", "indexmap 2.12.1",
"memchr", "memchr",
"thiserror 2.0.17", "thiserror 2.0.17",
"zopfli", "zopfli",
...@@ -12793,7 +12831,7 @@ dependencies = [ ...@@ -12793,7 +12831,7 @@ dependencies = [
"arbitrary", "arbitrary",
"crc32fast", "crc32fast",
"flate2", "flate2",
"indexmap 2.12.0", "indexmap 2.12.1",
"memchr", "memchr",
"zopfli", "zopfli",
] ]
......
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""
LoRA management infrastructure
"""
from .lora import LoRAManager, LoRASourceProtocol
__all__ = ["LoRAManager", "LoRASourceProtocol"]
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""
Minimal LoRA management layer with extensible sources.
"""
from .manager import LoRAManager, LoRASourceProtocol
__all__ = ["LoRAManager", "LoRASourceProtocol"]
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""
Minimal Python wrapper around Rust LoRA core with extension points for custom sources.
"""
import asyncio
from pathlib import Path
from typing import Any, Dict, Optional, Protocol
from dynamo.llm import LoRADownloader
class LoRASourceProtocol(Protocol):
"""
Protocol for custom Python LoRA sources.
Users can implement this to add custom sources.
"""
async def download(self, lora_uri: str, dest_path: Path) -> Path:
"""Download LoRA to dest_path, return actual path"""
...
async def exists(self, lora_uri: str) -> bool:
"""Check if LoRA exists in this source"""
...
class LoRAManager:
"""
Minimal Python wrapper around Rust core with extension points.
The manager uses the Rust-based LoRADownloader for S3 and local file sources,
and allows registering custom Python sources for other protocols.
"""
def __init__(self, cache_path: Optional[Path] = None):
"""
Initialize LoRA manager.
Args:
cache_path: Optional custom cache path. If not provided, uses DYN_LORA_PATH env var.
"""
# Single unified Rust interface handles both downloading and caching
cache_str = str(cache_path) if cache_path else None
self._downloader = LoRADownloader(cache_str)
# Extension point: custom sources
self._custom_sources: Dict[str, LoRASourceProtocol] = {}
def register_custom_source(self, scheme: str, source: LoRASourceProtocol):
"""
Register a custom Python source for a URI scheme.
Args:
scheme: URI scheme without "://" (e.g., "hf" for hf:// URIs)
source: LoRA source implementing LoRASourceProtocol
"""
self._custom_sources[scheme] = source
async def download_lora(self, lora_uri: str) -> Dict[str, Any]:
"""
Download LoRA if needed, return local path.
The source is inferred from the URI scheme:
- file:// -> Local filesystem (Rust)
- s3:// -> S3 (Rust)
- Custom schemes -> Registered Python sources
Args:
lora_uri: Source URI (file://, s3://, or custom scheme)
Returns:
Dictionary with:
- status: "success" or "error"
- local_path: Local path to LoRA (if successful)
- message: Error message (if error)
"""
try:
# Extract scheme from URI
scheme = lora_uri.split("://")[0] if "://" in lora_uri else None
# Check for custom Python source matching the scheme
if scheme and scheme in self._custom_sources:
source = self._custom_sources[scheme]
if not await source.exists(lora_uri):
return {
"status": "error",
"message": f"LoRA not found at {lora_uri}",
}
cache_key = self._uri_to_cache_key(lora_uri)
dest_path = Path(self._downloader.get_cache_path(cache_key))
local_path = await source.download(lora_uri, dest_path)
else:
# Use Rust downloader (handles file:// and s3://)
local_path = Path(await self._downloader.download_if_needed(lora_uri))
return {"status": "success", "local_path": str(local_path)}
except asyncio.CancelledError:
raise
except Exception as e:
return {"status": "error", "message": str(e)}
def is_cached(self, lora_uri: str) -> bool:
"""Check if LoRA is already cached locally."""
cache_key = self._uri_to_cache_key(lora_uri)
return self._downloader.is_cached(cache_key)
def _uri_to_cache_key(self, uri: str) -> str:
return (
uri.replace("://", "__")
.replace(".", "_")
.replace("/", "_")
.replace("\\", "_")
)
...@@ -1583,6 +1583,7 @@ dependencies = [ ...@@ -1583,6 +1583,7 @@ dependencies = [
"ndarray", "ndarray",
"ndarray-interp", "ndarray-interp",
"ndarray-npy", "ndarray-npy",
"object_store",
"offset-allocator", "offset-allocator",
"oneshot", "oneshot",
"parking_lot", "parking_lot",
...@@ -3703,6 +3704,16 @@ dependencies = [ ...@@ -3703,6 +3704,16 @@ dependencies = [
"rayon", "rayon",
] ]
[[package]]
name = "md-5"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
dependencies = [
"cfg-if 1.0.4",
"digest",
]
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.7.6" version = "2.7.6"
...@@ -4260,6 +4271,44 @@ dependencies = [ ...@@ -4260,6 +4271,44 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "object_store"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740"
dependencies = [
"async-trait",
"base64 0.22.1",
"bytes",
"chrono",
"form_urlencoded",
"futures",
"http",
"http-body-util",
"httparse",
"humantime",
"hyper",
"itertools 0.14.0",
"md-5",
"parking_lot",
"percent-encoding",
"quick-xml",
"rand 0.9.2",
"reqwest",
"ring",
"rustls-pemfile",
"serde",
"serde_json",
"serde_urlencoded",
"thiserror 2.0.17",
"tokio",
"tracing",
"url",
"walkdir",
"wasm-bindgen-futures",
"web-time",
]
[[package]] [[package]]
name = "offset-allocator" name = "offset-allocator"
version = "0.2.0" version = "0.2.0"
...@@ -5128,6 +5177,16 @@ version = "2.0.1" ...@@ -5128,6 +5177,16 @@ version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
[[package]]
name = "quick-xml"
version = "0.38.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
dependencies = [
"memchr",
"serde",
]
[[package]] [[package]]
name = "quinn" name = "quinn"
version = "0.11.9" version = "0.11.9"
......
...@@ -174,6 +174,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { ...@@ -174,6 +174,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<llm::kv::ZmqKvEventPublisher>()?; m.add_class::<llm::kv::ZmqKvEventPublisher>()?;
m.add_class::<llm::kv::ZmqKvEventPublisherConfig>()?; m.add_class::<llm::kv::ZmqKvEventPublisherConfig>()?;
m.add_class::<llm::kv::KvRecorder>()?; m.add_class::<llm::kv::KvRecorder>()?;
m.add_class::<llm::lora::LoRADownloader>()?;
m.add_class::<http::HttpService>()?; m.add_class::<http::HttpService>()?;
m.add_class::<http::HttpAsyncEngine>()?; m.add_class::<http::HttpAsyncEngine>()?;
m.add_class::<context::Context>()?; m.add_class::<context::Context>()?;
......
...@@ -30,5 +30,6 @@ pub mod backend; ...@@ -30,5 +30,6 @@ pub mod backend;
pub mod entrypoint; pub mod entrypoint;
pub mod kv; pub mod kv;
pub mod local_model; pub mod local_model;
pub mod lora;
pub mod model_card; pub mod model_card;
pub mod preprocessor; pub mod preprocessor;
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Python bindings for LoRA downloading and caching
//!
//! Provides a single unified interface (LoRADownloader) for all LoRA operations.
use dynamo_llm::lora::{
LoRACache as RsLoRACache, LoRADownloader as RsLoRADownloader, LocalLoRASource, S3LoRASource,
};
use pyo3::prelude::*;
use std::path::PathBuf;
use std::sync::Arc;
/// Unified Python interface for LoRA downloading and caching.
///
/// Handles local file:// URIs (zero-copy) and S3 s3:// URIs with automatic caching.
#[pyclass(name = "LoRADownloader")]
pub struct LoRADownloader {
inner: Arc<RsLoRADownloader>,
cache: RsLoRACache,
}
#[pymethods]
impl LoRADownloader {
/// Create downloader with custom cache path.
#[new]
#[pyo3(signature = (cache_path=None))]
fn new(cache_path: Option<String>) -> PyResult<Self> {
let cache = match cache_path {
Some(path) => RsLoRACache::new(PathBuf::from(path)),
None => RsLoRACache::from_env().map_err(|e| {
pyo3::exceptions::PyRuntimeError::new_err(format!("Failed to create cache: {}", e))
})?,
};
let mut sources: Vec<Arc<dyn dynamo_llm::lora::LoRASource>> =
vec![Arc::new(LocalLoRASource::new())];
if let Ok(s3_source) = S3LoRASource::from_env() {
sources.push(Arc::new(s3_source));
}
let downloader = RsLoRADownloader::new(sources, cache.clone());
Ok(Self {
inner: Arc::new(downloader),
cache,
})
}
/// Download LoRA if not in cache, return local path.
///
/// - file:// URIs: Returns original path (no copy)
/// - s3:// URIs: Downloads to cache, returns cache path
fn download_if_needed<'p>(
&self,
py: Python<'p>,
lora_uri: String,
) -> PyResult<Bound<'p, PyAny>> {
let downloader = Arc::clone(&self.inner);
pyo3_async_runtimes::tokio::future_into_py(py, async move {
let path = downloader
.download_if_needed(&lora_uri)
.await
.map_err(|e| {
pyo3::exceptions::PyRuntimeError::new_err(format!("Download failed: {}", e))
})?;
Ok(path.display().to_string())
})
}
/// Get local cache path for a cache key.
fn get_cache_path(&self, cache_key: &str) -> String {
self.cache.get_cache_path(cache_key).display().to_string()
}
/// Check if LoRA is cached (by cache key).
fn is_cached(&self, cache_key: &str) -> bool {
self.cache.is_cached(cache_key)
}
/// Validate cached LoRA has required files.
fn validate_cached(&self, cache_key: &str) -> PyResult<bool> {
self.cache.validate_cached(cache_key).map_err(|e| {
pyo3::exceptions::PyRuntimeError::new_err(format!("Validation failed: {}", e))
})
}
}
...@@ -18,6 +18,7 @@ from dynamo._core import KvPushRouter as KvPushRouter ...@@ -18,6 +18,7 @@ from dynamo._core import KvPushRouter as KvPushRouter
from dynamo._core import KvRecorder as KvRecorder from dynamo._core import KvRecorder as KvRecorder
from dynamo._core import KvRouterConfig as KvRouterConfig from dynamo._core import KvRouterConfig as KvRouterConfig
from dynamo._core import KvStats as KvStats from dynamo._core import KvStats as KvStats
from dynamo._core import LoRADownloader as LoRADownloader
from dynamo._core import MediaDecoder as MediaDecoder from dynamo._core import MediaDecoder as MediaDecoder
from dynamo._core import MediaFetcher as MediaFetcher from dynamo._core import MediaFetcher as MediaFetcher
from dynamo._core import ModelInput as ModelInput from dynamo._core import ModelInput as ModelInput
......
...@@ -90,6 +90,9 @@ rayon = "1" ...@@ -90,6 +90,9 @@ rayon = "1"
dashmap = { version = "5.5.3" } dashmap = { version = "5.5.3" }
bincode = { version = "2.0.1", features = ["serde", "derive"] } bincode = { version = "2.0.1", features = ["serde", "derive"] }
# lora
object_store = { version = "0.12.4", features = ["aws", "gcp", "azure", "http"] }
# input/text # input/text
dialoguer = { version = "0.11", default-features = false, features = [ dialoguer = { version = "0.11", default-features = false, features = [
"editor", "editor",
......
...@@ -23,6 +23,7 @@ pub mod hub; ...@@ -23,6 +23,7 @@ pub mod hub;
pub mod audit; pub mod audit;
pub mod kv_router; pub mod kv_router;
pub mod local_model; pub mod local_model;
pub mod lora;
pub mod migration; pub mod migration;
pub mod mocker; pub mod mocker;
pub mod model_card; pub mod model_card;
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! LoRA downloading and caching infrastructure
//!
//! This module provides a minimal, extensible interface for downloading LoRA adapters
//! from various sources (local filesystem, S3, etc.) with automatic caching.
mod cache;
mod downloader;
mod source;
pub use cache::LoRACache;
pub use downloader::LoRADownloader;
pub use source::{LoRASource, LocalLoRASource, S3LoRASource};
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use anyhow::Result;
use std::path::PathBuf;
use dynamo_runtime::config::environment_names::llm;
#[derive(Clone)]
pub struct LoRACache {
cache_root: PathBuf,
}
impl LoRACache {
pub fn new(cache_root: PathBuf) -> Self {
Self { cache_root }
}
/// Get cache path from DYN_LORA_PATH environment variable.
/// Defaults to `$HOME/.cache/dynamo_loras` if not set.
pub fn from_env() -> Result<Self> {
let cache_root = std::env::var(llm::DYN_LORA_PATH).unwrap_or_else(|_| {
// Use $HOME/.cache/dynamo_loras as default, fallback to /tmp if HOME is not set
let home = std::env::var("HOME")
.or_else(|_| std::env::var("USERPROFILE"))
.unwrap_or_else(|_| "/tmp".to_string());
PathBuf::from(home)
.join(".cache")
.join("dynamo_loras")
.to_string_lossy()
.to_string()
});
Ok(Self::new(PathBuf::from(cache_root)))
}
/// Get local cache path for LoRA ID
pub fn get_cache_path(&self, lora_id: &str) -> PathBuf {
self.cache_root.join(lora_id)
}
/// Check if LoRA is cached
pub fn is_cached(&self, lora_id: &str) -> bool {
self.get_cache_path(lora_id).exists()
}
/// Validate cached LoRA has required files
/// TODO: Add support for other weight file formats supported by trtllm
pub fn validate_cached(&self, lora_id: &str) -> Result<bool> {
let path = self.get_cache_path(lora_id);
if !path.exists() {
return Ok(false);
}
// Check for at least adapter_config.json
let config_path = path.join("adapter_config.json");
if !config_path.exists() {
return Ok(false);
}
// Check for at least one weight file
// TODO: Add support for other weight file formats supported by trtllm
let has_weights = path.join("adapter_model.safetensors").exists()
|| path.join("adapter_model.bin").exists()
|| path.join("model.lora_weights.npy").exists();
Ok(has_weights)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_cache_creation() {
let temp_dir = TempDir::new().unwrap();
let cache = LoRACache::new(temp_dir.path().to_path_buf());
assert_eq!(cache.cache_root, temp_dir.path());
}
#[test]
fn test_get_cache_path() {
let temp_dir = TempDir::new().unwrap();
let cache = LoRACache::new(temp_dir.path().to_path_buf());
let lora_path = cache.get_cache_path("my-lora");
assert_eq!(lora_path, temp_dir.path().join("my-lora"));
}
#[test]
fn test_is_cached() {
let temp_dir = TempDir::new().unwrap();
let cache = LoRACache::new(temp_dir.path().to_path_buf());
// Create a lora directory
let lora_dir = temp_dir.path().join("test-lora");
fs::create_dir(&lora_dir).unwrap();
assert!(cache.is_cached("test-lora"));
assert!(!cache.is_cached("non-existent"));
}
#[test]
fn test_validate_cached() {
let temp_dir = TempDir::new().unwrap();
let cache = LoRACache::new(temp_dir.path().to_path_buf());
// Create a lora directory with required files
let lora_dir = temp_dir.path().join("valid-lora");
fs::create_dir(&lora_dir).unwrap();
fs::write(lora_dir.join("adapter_config.json"), "{}").unwrap();
fs::write(lora_dir.join("adapter_model.safetensors"), "").unwrap();
assert!(cache.validate_cached("valid-lora").unwrap());
// Test missing weight file
let lora_dir2 = temp_dir.path().join("invalid-lora");
fs::create_dir(&lora_dir2).unwrap();
fs::write(lora_dir2.join("adapter_config.json"), "{}").unwrap();
assert!(!cache.validate_cached("invalid-lora").unwrap());
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use super::{cache::LoRACache, source::LoRASource};
use anyhow::Result;
use std::{path::PathBuf, sync::Arc};
pub struct LoRADownloader {
sources: Vec<Arc<dyn LoRASource>>,
cache: LoRACache,
}
impl LoRADownloader {
pub fn new(sources: Vec<Arc<dyn LoRASource>>, cache: LoRACache) -> Self {
Self { sources, cache }
}
/// Download LoRA if not in cache, return local path
///
/// For local file:// URIs, this will return the original path without copying.
/// For remote URIs (s3://, gcs://, etc.), this will download to cache.
pub async fn download_if_needed(&self, lora_uri: &str) -> Result<PathBuf> {
// For local file:// URIs, don't use cache - just validate and return
if lora_uri.starts_with("file://") {
for source in &self.sources {
// Ignore errors from incompatible sources
if let Ok(exists) = source.exists(lora_uri).await
&& exists
{
// LocalLoRASource.download() returns the original path
return source.download(lora_uri, &PathBuf::new()).await;
}
}
anyhow::bail!("Local LoRA not found: {}", lora_uri);
}
// For remote URIs, use the URI as the cache key
let cache_key = self.uri_to_cache_key(lora_uri);
// Check cache first
if self.cache.is_cached(&cache_key) && self.cache.validate_cached(&cache_key)? {
tracing::debug!("LoRA found in cache: {}", cache_key);
return Ok(self.cache.get_cache_path(&cache_key));
}
// Try sources in order
let dest_path = self.cache.get_cache_path(&cache_key);
for source in &self.sources {
if let Ok(exists) = source.exists(lora_uri).await
&& exists
{
let downloaded_path = source.download(lora_uri, &dest_path).await?;
if self.cache.validate_cached(&cache_key)? {
return Ok(downloaded_path);
} else {
tracing::warn!(
"Downloaded LoRA at {} failed validation",
downloaded_path.display()
);
}
}
}
anyhow::bail!("LoRA {} not found in any source", lora_uri)
}
/// Convert URI to cache key
fn uri_to_cache_key(&self, uri: &str) -> String {
uri.replace("://", "_").replace(['/', '\\'], "_")
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use anyhow::{Context, Result};
use async_trait::async_trait;
use futures::StreamExt;
use object_store::{ObjectStore, aws::AmazonS3Builder, path::Path as ObjectPath};
use std::{
path::{Path, PathBuf},
sync::Arc,
};
use url::Url;
/// Minimal trait for LoRA sources
/// Users can implement this in Rust for custom sources
#[async_trait]
pub trait LoRASource: Send + Sync {
/// Download LoRA from source to destination path
/// Returns the actual path where files were written
async fn download(&self, lora_uri: &str, dest_path: &Path) -> Result<PathBuf>;
/// Check if LoRA exists in this source
async fn exists(&self, lora_uri: &str) -> Result<bool>;
}
/// Local filesystem LoRA source
/// For file:// URIs, just validates the path exists
pub struct LocalLoRASource;
impl Default for LocalLoRASource {
fn default() -> Self {
Self::new()
}
}
impl LocalLoRASource {
pub fn new() -> Self {
Self
}
/// Parse file:// URI to extract local path
/// Format: file:///absolute/path/to/lora
fn parse_file_uri(uri: &str) -> Result<PathBuf> {
if !uri.starts_with("file://") {
anyhow::bail!("Invalid file URI scheme: expected file://");
}
let path_str = uri.strip_prefix("file://").unwrap();
Ok(PathBuf::from(path_str))
}
}
#[async_trait]
impl LoRASource for LocalLoRASource {
async fn download(&self, file_uri: &str, _dest_path: &Path) -> Result<PathBuf> {
let source_path = Self::parse_file_uri(file_uri)?;
if !source_path.exists() {
anyhow::bail!("LoRA path does not exist: {}", source_path.display());
}
if !source_path.is_dir() {
anyhow::bail!("LoRA path is not a directory: {}", source_path.display());
}
// For local files, we don't copy - just return the source path
// This avoids unnecessary disk I/O
tracing::info!("Using local LoRA at: {:?}", source_path);
Ok(source_path)
}
async fn exists(&self, file_uri: &str) -> Result<bool> {
let source_path = Self::parse_file_uri(file_uri)?;
Ok(source_path.exists() && source_path.is_dir())
}
}
/// S3-based LoRA source using object_store crate
/// Reads credentials from environment variables
pub struct S3LoRASource {
access_key_id: String,
secret_access_key: String,
region: String,
endpoint: Option<String>,
}
impl S3LoRASource {
/// Create S3 source from environment variables:
/// - AWS_ACCESS_KEY_ID
/// - AWS_SECRET_ACCESS_KEY
/// - AWS_REGION (optional, defaults to us-east-1)
/// - AWS_ENDPOINT (optional, for custom S3-compatible endpoints)
pub fn from_env() -> Result<Self> {
let access_key_id =
std::env::var("AWS_ACCESS_KEY_ID").context("AWS_ACCESS_KEY_ID not set")?;
let secret_access_key =
std::env::var("AWS_SECRET_ACCESS_KEY").context("AWS_SECRET_ACCESS_KEY not set")?;
let region = std::env::var("AWS_REGION").unwrap_or_else(|_| "us-east-1".to_string());
let endpoint = std::env::var("AWS_ENDPOINT").ok();
Ok(Self {
access_key_id,
secret_access_key,
region,
endpoint,
})
}
/// Build an ObjectStore for a specific bucket
fn build_store(&self, bucket: &str) -> Result<Arc<dyn ObjectStore>> {
let mut builder = AmazonS3Builder::new()
.with_access_key_id(&self.access_key_id)
.with_secret_access_key(&self.secret_access_key)
.with_region(&self.region)
.with_bucket_name(bucket);
if let Some(ref endpoint) = self.endpoint {
builder = builder
.with_endpoint(endpoint)
// Use path-style URLs for custom endpoints (e.g., MinIO)
.with_virtual_hosted_style_request(false);
// Only allow HTTP when explicitly enabled via environment variable
// HTTPS is the default for security
if std::env::var("AWS_ALLOW_HTTP")
.map(|v| v.eq_ignore_ascii_case("true"))
.unwrap_or(false)
{
builder = builder.with_allow_http(true);
}
}
let store = builder.build()?;
Ok(Arc::new(store))
}
/// Parse S3 URI to extract bucket and key
/// Format: s3://bucket-name/path/to/lora
fn parse_s3_uri(uri: &str) -> Result<(String, String)> {
let url = Url::parse(uri)?;
if url.scheme() != "s3" {
anyhow::bail!("Invalid S3 URI scheme: {}", url.scheme());
}
let bucket = url
.host_str()
.ok_or_else(|| anyhow::anyhow!("No bucket in S3 URI"))?
.to_string();
let key = url.path().trim_start_matches('/').to_string();
Ok((bucket, key))
}
}
#[async_trait]
impl LoRASource for S3LoRASource {
async fn download(&self, s3_uri: &str, dest_path: &Path) -> Result<PathBuf> {
let (bucket, prefix) = Self::parse_s3_uri(s3_uri)?;
tracing::info!(
"Downloading LoRA from S3: bucket={}, prefix={}",
bucket,
prefix
);
// Build store for this specific bucket
let bucket_store = self.build_store(&bucket)?;
// List all objects under the prefix
let object_prefix = ObjectPath::from(prefix.clone());
let mut list_stream = bucket_store.list(Some(&object_prefix));
// Create destination directory
tokio::fs::create_dir_all(dest_path).await?;
let mut file_count = 0;
while let Some(meta_result) = list_stream.next().await {
let meta = meta_result?;
// Get relative path (remove prefix)
let rel_path = meta
.location
.as_ref()
.strip_prefix(prefix.as_str())
.unwrap_or(meta.location.as_ref())
.trim_start_matches('/');
if rel_path.is_empty() {
continue; // Skip the prefix itself
}
let file_path = dest_path.join(rel_path);
// Create parent directories
if let Some(parent) = file_path.parent() {
tokio::fs::create_dir_all(parent).await?;
}
// Download file
let bytes = bucket_store.get(&meta.location).await?.bytes().await?;
tokio::fs::write(&file_path, &bytes).await?;
file_count += 1;
tracing::debug!("Downloaded: {} ({} bytes)", rel_path, bytes.len());
}
if file_count == 0 {
anyhow::bail!("No files found at S3 URI: {}", s3_uri);
}
tracing::info!("Downloaded {} files from S3 to {:?}", file_count, dest_path);
Ok(dest_path.to_path_buf())
}
async fn exists(&self, s3_uri: &str) -> Result<bool> {
let (bucket, prefix) = Self::parse_s3_uri(s3_uri)?;
let bucket_store = self.build_store(&bucket)?;
let object_prefix = ObjectPath::from(prefix);
let mut list_stream = bucket_store.list(Some(&object_prefix));
// Check if at least one object exists, propagating errors
match list_stream.next().await {
Some(Ok(_)) => Ok(true),
Some(Err(e)) => Err(e.into()),
None => Ok(false),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_file_uri() {
let uri = "file:///path/to/lora";
let path = LocalLoRASource::parse_file_uri(uri).unwrap();
assert_eq!(path, PathBuf::from("/path/to/lora"));
}
#[test]
fn test_parse_file_uri_invalid() {
let uri = "http://example.com/lora";
assert!(LocalLoRASource::parse_file_uri(uri).is_err());
}
#[test]
fn test_parse_s3_uri() {
let uri = "s3://my-bucket/path/to/lora";
let (bucket, key) = S3LoRASource::parse_s3_uri(uri).unwrap();
assert_eq!(bucket, "my-bucket");
assert_eq!(key, "path/to/lora");
}
#[test]
fn test_parse_s3_uri_invalid() {
let uri = "file:///path/to/lora";
assert!(S3LoRASource::parse_s3_uri(uri).is_err());
}
}
...@@ -225,6 +225,9 @@ pub mod llm { ...@@ -225,6 +225,9 @@ pub mod llm {
/// HTTP body size limit in MB /// HTTP body size limit in MB
pub const DYN_HTTP_BODY_LIMIT_MB: &str = "DYN_HTTP_BODY_LIMIT_MB"; pub const DYN_HTTP_BODY_LIMIT_MB: &str = "DYN_HTTP_BODY_LIMIT_MB";
/// LoRA cache directory path
pub const DYN_LORA_PATH: &str = "DYN_LORA_PATH";
/// Metrics configuration /// Metrics configuration
pub mod metrics { pub mod metrics {
/// Custom metrics prefix (overrides default "dynamo_frontend") /// Custom metrics prefix (overrides default "dynamo_frontend")
...@@ -353,6 +356,7 @@ mod tests { ...@@ -353,6 +356,7 @@ mod tests {
kvbm::leader::DYN_KVBM_LEADER_ZMQ_ACK_PORT, kvbm::leader::DYN_KVBM_LEADER_ZMQ_ACK_PORT,
// LLM // LLM
llm::DYN_HTTP_BODY_LIMIT_MB, llm::DYN_HTTP_BODY_LIMIT_MB,
llm::DYN_LORA_PATH,
llm::metrics::DYN_METRICS_PREFIX, llm::metrics::DYN_METRICS_PREFIX,
// Model // Model
model::model_express::MODEL_EXPRESS_URL, model::model_express::MODEL_EXPRESS_URL,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment