Unverified Commit 2100f6aa authored by Graham King's avatar Graham King Committed by GitHub
Browse files

feat(mistralrs): Upgrade to support CUDA 13 (#4474)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent 17e22476
......@@ -1231,7 +1231,7 @@ dependencies = [
"rayon",
"safetensors 0.4.5",
"thiserror 1.0.69",
"ug",
"ug 0.4.0",
"yoke 0.7.5",
"zip 1.1.4",
]
......@@ -1239,12 +1239,12 @@ dependencies = [
[[package]]
name = "candle-core"
version = "0.9.1"
source = "git+https://github.com/EricLBuehler/candle.git?rev=7511e510#7511e510054973ea4e2043f6d2da14409c195baf"
source = "git+https://github.com/EricLBuehler/candle.git?rev=175926c9#175926c960f5aff6a543a06b3214471dd92785e4"
dependencies = [
"byteorder",
"candle-kernels",
"candle-metal-kernels",
"cudarc 0.17.8",
"cudarc",
"float8",
"gemm 0.17.1",
"half 2.7.1",
......@@ -1257,7 +1257,7 @@ dependencies = [
"rayon",
"safetensors 0.6.2",
"thiserror 1.0.69",
"ug",
"ug 0.5.0",
"ug-cuda",
"ug-metal",
"yoke 0.7.5",
......@@ -1267,7 +1267,7 @@ dependencies = [
[[package]]
name = "candle-kernels"
version = "0.9.1"
source = "git+https://github.com/EricLBuehler/candle.git?rev=7511e510#7511e510054973ea4e2043f6d2da14409c195baf"
source = "git+https://github.com/EricLBuehler/candle.git?rev=175926c9#175926c960f5aff6a543a06b3214471dd92785e4"
dependencies = [
"bindgen_cuda 0.1.5",
]
......@@ -1275,7 +1275,7 @@ dependencies = [
[[package]]
name = "candle-metal-kernels"
version = "0.9.1"
source = "git+https://github.com/EricLBuehler/candle.git?rev=7511e510#7511e510054973ea4e2043f6d2da14409c195baf"
source = "git+https://github.com/EricLBuehler/candle.git?rev=175926c9#175926c960f5aff6a543a06b3214471dd92785e4"
dependencies = [
"half 2.7.1",
"metal 0.27.0",
......@@ -1287,9 +1287,9 @@ dependencies = [
[[package]]
name = "candle-nn"
version = "0.9.1"
source = "git+https://github.com/EricLBuehler/candle.git?rev=7511e510#7511e510054973ea4e2043f6d2da14409c195baf"
source = "git+https://github.com/EricLBuehler/candle.git?rev=175926c9#175926c960f5aff6a543a06b3214471dd92785e4"
dependencies = [
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=7511e510)",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=175926c9)",
"candle-metal-kernels",
"half 2.7.1",
"metal 0.27.0",
......@@ -2004,16 +2004,6 @@ dependencies = [
"cipher",
]
[[package]]
name = "cudarc"
version = "0.16.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17200eb07e7d85a243aa1bf4569a7aa998385ba98d14833973a817a63cc86e92"
dependencies = [
"half 2.7.1",
"libloading",
]
[[package]]
name = "cudarc"
version = "0.17.8"
......@@ -2447,7 +2437,7 @@ dependencies = [
"libc",
"option-ext",
"redox_users",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -2682,7 +2672,7 @@ dependencies = [
"candle-core 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono",
"criterion 0.3.6",
"cudarc 0.17.8",
"cudarc",
"dashmap 5.5.3",
"derive-getters",
"derive_builder",
......@@ -2742,8 +2732,8 @@ dependencies = [
"tokio-rayon",
"tokio-stream",
"tokio-util",
"toktrie 1.3.0",
"toktrie_hf_tokenizers 1.3.0",
"toktrie 1.4.0",
"toktrie_hf_tokenizers 1.4.0",
"tonic 0.13.1",
"tonic-build 0.13.1",
"tower 0.5.2",
......@@ -2764,7 +2754,7 @@ name = "dynamo-memory"
version = "0.7.0"
dependencies = [
"anyhow",
"cudarc 0.17.8",
"cudarc",
"dynamo-config",
"libc",
"nix 0.30.1",
......@@ -3082,7 +3072,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
"libc",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -3308,7 +3298,7 @@ version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4203231de188ebbdfb85c11f3c20ca2b063945710de04e7b59268731e728b462"
dependencies = [
"cudarc 0.17.8",
"cudarc",
"half 2.7.1",
"num-traits",
]
......@@ -4862,7 +4852,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46"
dependencies = [
"hermit-abi 0.5.2",
"libc",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -4934,7 +4924,7 @@ dependencies = [
"portable-atomic",
"portable-atomic-util",
"serde_core",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -6083,10 +6073,10 @@ dependencies = [
[[package]]
name = "mistralrs"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=a13220255#a132202551f4fdf3a1d9e2da51232581789af7ea"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=2bcf0e9e3#2bcf0e9e3a0c688786cfa3142e2b18844a7f3447"
dependencies = [
"anyhow",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=7511e510)",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=175926c9)",
"candle-nn",
"clap 4.5.52",
"either",
......@@ -6099,13 +6089,15 @@ dependencies = [
"serde",
"serde_json",
"tokio",
"tracing",
"tracing-subscriber",
"walkdir",
]
[[package]]
name = "mistralrs-audio"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=a13220255#a132202551f4fdf3a1d9e2da51232581789af7ea"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=2bcf0e9e3#2bcf0e9e3a0c688786cfa3142e2b18844a7f3447"
dependencies = [
"anyhow",
"apodize",
......@@ -6116,7 +6108,7 @@ dependencies = [
[[package]]
name = "mistralrs-core"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=a13220255#a132202551f4fdf3a1d9e2da51232581789af7ea"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=2bcf0e9e3#2bcf0e9e3a0c688786cfa3142e2b18844a7f3447"
dependencies = [
"ahash",
"akin",
......@@ -6129,7 +6121,7 @@ dependencies = [
"bm25",
"bytemuck",
"bytemuck_derive",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=7511e510)",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=175926c9)",
"candle-nn",
"cfgrammar",
"chrono",
......@@ -6166,7 +6158,6 @@ dependencies = [
"mistralrs-vision",
"num-traits",
"objc",
"once_cell",
"ordered-float 5.1.0",
"parking_lot",
"radix_trie",
......@@ -6212,7 +6203,7 @@ dependencies = [
[[package]]
name = "mistralrs-mcp"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=a13220255#a132202551f4fdf3a1d9e2da51232581789af7ea"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=2bcf0e9e3#2bcf0e9e3a0c688786cfa3142e2b18844a7f3447"
dependencies = [
"anyhow",
"async-trait",
......@@ -6232,26 +6223,25 @@ dependencies = [
[[package]]
name = "mistralrs-paged-attn"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=a13220255#a132202551f4fdf3a1d9e2da51232581789af7ea"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=2bcf0e9e3#2bcf0e9e3a0c688786cfa3142e2b18844a7f3447"
dependencies = [
"anyhow",
"bindgen_cuda 0.1.7",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=7511e510)",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=175926c9)",
"float8",
"half 2.7.1",
"metal 0.27.0",
"once_cell",
"thiserror 2.0.17",
]
[[package]]
name = "mistralrs-quant"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=a13220255#a132202551f4fdf3a1d9e2da51232581789af7ea"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=2bcf0e9e3#2bcf0e9e3a0c688786cfa3142e2b18844a7f3447"
dependencies = [
"bindgen_cuda 0.1.7",
"byteorder",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=7511e510)",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=175926c9)",
"candle-nn",
"float8",
"half 2.7.1",
......@@ -6259,7 +6249,6 @@ dependencies = [
"lazy_static",
"memmap2",
"metal 0.27.0",
"once_cell",
"paste",
"rayon",
"regex",
......@@ -6275,9 +6264,9 @@ dependencies = [
[[package]]
name = "mistralrs-vision"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=a13220255#a132202551f4fdf3a1d9e2da51232581789af7ea"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=2bcf0e9e3#2bcf0e9e3a0c688786cfa3142e2b18844a7f3447"
dependencies = [
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=7511e510)",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=175926c9)",
"image",
"rayon",
]
......@@ -6801,7 +6790,7 @@ version = "0.50.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
dependencies = [
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -8245,7 +8234,7 @@ dependencies = [
"once_cell",
"socket2 0.6.1",
"tracing",
"windows-sys 0.59.0",
"windows-sys 0.60.2",
]
[[package]]
......@@ -8688,9 +8677,9 @@ dependencies = [
[[package]]
name = "resolv-conf"
version = "0.7.5"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b3789b30bd25ba102de4beabd95d21ac45b69b1be7d14522bab988c526d6799"
checksum = "1e061d1b48cb8d38042de4ae0a7a6401009d6143dc80d2e2d6f31f0bdd6470c7"
[[package]]
name = "rgb"
......@@ -8972,7 +8961,7 @@ dependencies = [
"errno",
"libc",
"linux-raw-sys",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -10317,7 +10306,7 @@ dependencies = [
"getrandom 0.3.4",
"once_cell",
"rustix",
"windows-sys 0.59.0",
"windows-sys 0.61.2",
]
[[package]]
......@@ -10725,9 +10714,9 @@ dependencies = [
[[package]]
name = "toktrie"
version = "1.3.0"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55c5e672ee5e0311bbb37ac0f9f60ed7db38463365e6cf4f71240c7b15bb374d"
checksum = "dcfbe778b5bf5ffda8e2c5e79540bf40505ce1fb48ba31e8b43337fe55ca4c23"
dependencies = [
"anyhow",
"bytemuck",
......@@ -10751,16 +10740,16 @@ dependencies = [
[[package]]
name = "toktrie_hf_tokenizers"
version = "1.3.0"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baec7fb81a716f85ae1e07ce951f9e9bc1b745635edd48cbebb6a033c6491dca"
checksum = "2ff6e566c3ec913607f16076ae2619fb018c468ed4715b899ab99898c850934b"
dependencies = [
"anyhow",
"log",
"serde",
"serde_json",
"tokenizers",
"toktrie 1.3.0",
"toktrie 1.4.0",
]
[[package]]
......@@ -11263,31 +11252,52 @@ dependencies = [
"yoke 0.7.5",
]
[[package]]
name = "ug"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76b761acf8af3494640d826a8609e2265e19778fb43306c7f15379c78c9b05b0"
dependencies = [
"gemm 0.18.2",
"half 2.7.1",
"libloading",
"memmap2",
"num",
"num-traits",
"num_cpus",
"rayon",
"safetensors 0.4.5",
"serde",
"thiserror 1.0.69",
"tracing",
"yoke 0.7.5",
]
[[package]]
name = "ug-cuda"
version = "0.4.0"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14053653d0b7fa7b21015aa9a62edc8af2f60aa6f9c54e66386ecce55f22ed29"
checksum = "9f0a1fa748f26166778c33b8498255ebb7c6bffb472bcc0a72839e07ebb1d9b5"
dependencies = [
"cudarc 0.16.6",
"cudarc",
"half 2.7.1",
"serde",
"thiserror 1.0.69",
"ug",
"ug 0.5.0",
]
[[package]]
name = "ug-metal"
version = "0.4.0"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76daec3c7a32a1b4a0e3307b6b057fa067aa64e750713987410a2c402e5cd731"
checksum = "9f7adf545a99a086d362efc739e7cf4317c18cbeda22706000fd434d70ea3d95"
dependencies = [
"half 2.7.1",
"metal 0.29.0",
"objc",
"serde",
"thiserror 1.0.69",
"ug",
"ug 0.5.0",
]
[[package]]
......@@ -12002,7 +12012,7 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.61.2",
]
[[package]]
......
......@@ -27,7 +27,7 @@ async-stream = { workspace = true }
async-trait = { workspace = true }
either = { workspace = true }
indexmap = { version = "2.9.0", features = ["serde"] }
mistralrs = { git = "https://github.com/EricLBuehler/mistral.rs.git", version = "0.6.0", rev = "a13220255" }
mistralrs = { git = "https://github.com/EricLBuehler/mistral.rs.git", version = "0.6.0", rev = "2bcf0e9e3" }
serde_json = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
......@@ -136,7 +136,6 @@ impl MistralRsEngine {
let paged_attention_config = if cfg!(feature = "cuda") && EXP_ENABLE_PAGED_ATTENTION {
Some(PagedAttentionConfig::new(
None, // Block size, default 32
4096, // CPU memory in MiB
MemoryGpuConfig::ContextSize(max_seq_len),
PagedCacheType::Auto,
)?)
......@@ -237,6 +236,7 @@ impl MistralRsEngine {
logits_processors: None,
return_raw_logits: false,
web_search_options: None,
truncate_sequence: false,
}));
// Send warmup request and consume response
......@@ -359,6 +359,7 @@ impl
logits_processors: None,
return_raw_logits: false,
web_search_options: None,
truncate_sequence: false,
}));
self.mistralrs
......@@ -559,6 +560,7 @@ impl
logits_processors: None,
return_raw_logits: false,
web_search_options: None,
truncate_sequence: false,
}));
self.mistralrs
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment