Commit 45b3505c authored by Graham King's avatar Graham King Committed by GitHub
Browse files

fix: Unique IDs for mistralrs requests (#186)

Upgrade mistralrs to latest.
parent 2f700421
...@@ -477,21 +477,6 @@ version = "0.2.2" ...@@ -477,21 +477,6 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3eeab4423108c5d7c744f4d234de88d18d636100093ae04caf4825134b9c3a32" checksum = "3eeab4423108c5d7c744f4d234de88d18d636100093ae04caf4825134b9c3a32"
[[package]]
name = "buildstructor"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3907aac66c65520545ae3cb3c195306e20d5ed5c90bfbb992e061cf12a104d0"
dependencies = [
"lazy_static",
"proc-macro2",
"quote",
"str_inflector",
"syn 2.0.98",
"thiserror 1.0.69",
"try_match",
]
[[package]] [[package]]
name = "bumpalo" name = "bumpalo"
version = "3.17.0" version = "3.17.0"
...@@ -542,17 +527,17 @@ dependencies = [ ...@@ -542,17 +527,17 @@ dependencies = [
[[package]] [[package]]
name = "candle-core" name = "candle-core"
version = "0.8.0" version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119" source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [ dependencies = [
"byteorder", "byteorder",
"candle-kernels", "candle-kernels",
"candle-metal-kernels", "candle-metal-kernels",
"cudarc",
"float8", "float8",
"gemm", "gemm",
"half", "half",
"memmap2", "memmap2",
"metal 0.27.0", "metal",
"mistralrs_cudarc_fork",
"num-traits", "num-traits",
"num_cpus", "num_cpus",
"rand", "rand",
...@@ -560,9 +545,6 @@ dependencies = [ ...@@ -560,9 +545,6 @@ dependencies = [
"rayon", "rayon",
"safetensors", "safetensors",
"thiserror 1.0.69", "thiserror 1.0.69",
"ug",
"ug-cuda",
"ug-metal",
"yoke", "yoke",
"zip", "zip",
] ]
...@@ -591,7 +573,7 @@ dependencies = [ ...@@ -591,7 +573,7 @@ dependencies = [
[[package]] [[package]]
name = "candle-kernels" name = "candle-kernels"
version = "0.8.0" version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119" source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [ dependencies = [
"bindgen_cuda 0.1.5", "bindgen_cuda 0.1.5",
] ]
...@@ -599,9 +581,9 @@ dependencies = [ ...@@ -599,9 +581,9 @@ dependencies = [
[[package]] [[package]]
name = "candle-metal-kernels" name = "candle-metal-kernels"
version = "0.8.0" version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119" source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [ dependencies = [
"metal 0.27.0", "metal",
"once_cell", "once_cell",
"thiserror 1.0.69", "thiserror 1.0.69",
"tracing", "tracing",
...@@ -610,12 +592,12 @@ dependencies = [ ...@@ -610,12 +592,12 @@ dependencies = [
[[package]] [[package]]
name = "candle-nn" name = "candle-nn"
version = "0.8.0" version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119" source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [ dependencies = [
"candle-core", "candle-core",
"candle-metal-kernels", "candle-metal-kernels",
"half", "half",
"metal 0.27.0", "metal",
"num-traits", "num-traits",
"rayon", "rayon",
"safetensors", "safetensors",
...@@ -960,16 +942,6 @@ dependencies = [ ...@@ -960,16 +942,6 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "cudarc"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38cd60a9a42ec83a2ed7effb0b1f073270264ea99da7acfc44f7e8d74dee0384"
dependencies = [
"half",
"libloading",
]
[[package]] [[package]]
name = "cudarc" name = "cudarc"
version = "0.13.4" version = "0.13.4"
...@@ -1490,9 +1462,8 @@ version = "0.1.3" ...@@ -1490,9 +1462,8 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a14d1c3d88fdab81b5886c34b2a424b3fba5123564ea415ff98b160cf44c432f" checksum = "a14d1c3d88fdab81b5886c34b2a424b3fba5123564ea415ff98b160cf44c432f"
dependencies = [ dependencies = [
"cudarc 0.13.4", "cudarc",
"half", "half",
"mistralrs_cudarc_fork",
"num-traits", "num-traits",
"rand", "rand",
"rand_distr", "rand_distr",
...@@ -2631,21 +2602,6 @@ dependencies = [ ...@@ -2631,21 +2602,6 @@ dependencies = [
"paste", "paste",
] ]
[[package]]
name = "metal"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
dependencies = [
"bitflags 2.8.0",
"block",
"core-graphics-types",
"foreign-types 0.5.0",
"log",
"objc",
"paste",
]
[[package]] [[package]]
name = "mime" name = "mime"
version = "0.3.17" version = "0.3.17"
...@@ -2744,8 +2700,8 @@ dependencies = [ ...@@ -2744,8 +2700,8 @@ dependencies = [
[[package]] [[package]]
name = "mistralrs" name = "mistralrs"
version = "0.3.5" version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232" source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"candle-core", "candle-core",
...@@ -2765,8 +2721,8 @@ dependencies = [ ...@@ -2765,8 +2721,8 @@ dependencies = [
[[package]] [[package]]
name = "mistralrs-core" name = "mistralrs-core"
version = "0.3.5" version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232" source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [ dependencies = [
"akin", "akin",
"anyhow", "anyhow",
...@@ -2774,7 +2730,6 @@ dependencies = [ ...@@ -2774,7 +2730,6 @@ dependencies = [
"async-trait", "async-trait",
"base64 0.22.1", "base64 0.22.1",
"bindgen_cuda 0.1.5", "bindgen_cuda 0.1.5",
"buildstructor",
"bytemuck", "bytemuck",
"bytemuck_derive", "bytemuck_derive",
"candle-core", "candle-core",
...@@ -2798,6 +2753,7 @@ dependencies = [ ...@@ -2798,6 +2753,7 @@ dependencies = [
"itertools 0.13.0", "itertools 0.13.0",
"llguidance", "llguidance",
"lrtable", "lrtable",
"metal",
"minijinja", "minijinja",
"minijinja-contrib", "minijinja-contrib",
"mistralrs-paged-attn", "mistralrs-paged-attn",
...@@ -2837,23 +2793,23 @@ dependencies = [ ...@@ -2837,23 +2793,23 @@ dependencies = [
[[package]] [[package]]
name = "mistralrs-paged-attn" name = "mistralrs-paged-attn"
version = "0.3.5" version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232" source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bindgen_cuda 0.1.6", "bindgen_cuda 0.1.6",
"candle-core", "candle-core",
"float8", "float8",
"half", "half",
"metal 0.27.0", "metal",
"once_cell", "once_cell",
"thiserror 1.0.69", "thiserror 1.0.69",
] ]
[[package]] [[package]]
name = "mistralrs-quant" name = "mistralrs-quant"
version = "0.3.5" version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232" source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [ dependencies = [
"bindgen_cuda 0.1.5", "bindgen_cuda 0.1.5",
"byteorder", "byteorder",
...@@ -2862,34 +2818,29 @@ dependencies = [ ...@@ -2862,34 +2818,29 @@ dependencies = [
"float8", "float8",
"half", "half",
"lazy_static", "lazy_static",
"memmap2",
"metal",
"once_cell", "once_cell",
"paste", "paste",
"rayon", "rayon",
"regex",
"safetensors",
"serde", "serde",
"serde_json", "serde_json",
"thiserror 1.0.69", "thiserror 1.0.69",
"tracing", "tracing",
"yoke",
] ]
[[package]] [[package]]
name = "mistralrs-vision" name = "mistralrs-vision"
version = "0.3.5" version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232" source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [ dependencies = [
"candle-core", "candle-core",
"image", "image",
] ]
[[package]]
name = "mistralrs_cudarc_fork"
version = "0.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c82ee900a0e7080a2ff91042d070b0112a3533b32ebc6d8faaba96ee4f598e8d"
dependencies = [
"half",
"libloading",
]
[[package]] [[package]]
name = "monostate" name = "monostate"
version = "0.1.13" version = "0.1.13"
...@@ -3130,30 +3081,6 @@ dependencies = [ ...@@ -3130,30 +3081,6 @@ dependencies = [
"rand", "rand",
] ]
[[package]]
name = "num"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
dependencies = [
"num-bigint",
"num-complex",
"num-integer",
"num-iter",
"num-rational",
"num-traits",
]
[[package]]
name = "num-bigint"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
dependencies = [
"num-integer",
"num-traits",
]
[[package]] [[package]]
name = "num-complex" name = "num-complex"
version = "0.4.6" version = "0.4.6"
...@@ -3170,37 +3097,6 @@ version = "0.1.0" ...@@ -3170,37 +3097,6 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num-integer"
version = "0.1.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
dependencies = [
"num-traits",
]
[[package]]
name = "num-iter"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-rational"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
dependencies = [
"num-bigint",
"num-integer",
"num-traits",
]
[[package]] [[package]]
name = "num-traits" name = "num-traits"
version = "0.2.19" version = "0.2.19"
...@@ -4497,16 +4393,6 @@ version = "1.2.0" ...@@ -4497,16 +4393,6 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "str_inflector"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0b848d5a7695b33ad1be00f84a3c079fe85c9278a325ff9159e6c99cef4ef7"
dependencies = [
"lazy_static",
"regex",
]
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.10.0" version = "0.10.0"
...@@ -5267,26 +5153,6 @@ version = "0.2.5" ...@@ -5267,26 +5153,6 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "try_match"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b065c869a3f832418e279aa4c1d7088f9d5d323bde15a60a08e20c2cd4549082"
dependencies = [
"try_match_inner",
]
[[package]]
name = "try_match_inner"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9c81686f7ab4065ccac3df7a910c4249f8c0f3fb70421d6ddec19b9311f63f9"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]] [[package]]
name = "tryhard" name = "tryhard"
version = "0.5.1" version = "0.5.1"
...@@ -5304,48 +5170,6 @@ version = "1.17.0" ...@@ -5304,48 +5170,6 @@ version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "ug"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4eef2ebfc18c67a6dbcacd9d8a4d85e0568cc58c82515552382312c2730ea13"
dependencies = [
"half",
"num",
"serde",
"serde_json",
"thiserror 1.0.69",
]
[[package]]
name = "ug-cuda"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c4dcab280ad0ef3957e153a82dcad608c954d02cf253b695322f502d1f8902e"
dependencies = [
"cudarc 0.12.1",
"half",
"serde",
"serde_json",
"thiserror 1.0.69",
"ug",
]
[[package]]
name = "ug-metal"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e4ed1df2c20a1a138f993041f650cc84ff27aaefb4342b7f986e77d00e80799"
dependencies = [
"half",
"metal 0.29.0",
"objc",
"serde",
"serde_json",
"thiserror 1.0.69",
"ug",
]
[[package]] [[package]]
name = "uncased" name = "uncased"
version = "0.9.10" version = "0.9.10"
......
...@@ -502,21 +502,6 @@ dependencies = [ ...@@ -502,21 +502,6 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "buildstructor"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3907aac66c65520545ae3cb3c195306e20d5ed5c90bfbb992e061cf12a104d0"
dependencies = [
"lazy_static",
"proc-macro2",
"quote",
"str_inflector",
"syn 2.0.98",
"thiserror 1.0.69",
"try_match",
]
[[package]] [[package]]
name = "bumpalo" name = "bumpalo"
version = "3.17.0" version = "3.17.0"
...@@ -567,17 +552,17 @@ dependencies = [ ...@@ -567,17 +552,17 @@ dependencies = [
[[package]] [[package]]
name = "candle-core" name = "candle-core"
version = "0.8.0" version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119" source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [ dependencies = [
"byteorder", "byteorder",
"candle-kernels", "candle-kernels",
"candle-metal-kernels", "candle-metal-kernels",
"cudarc",
"float8", "float8",
"gemm", "gemm",
"half", "half",
"memmap2", "memmap2",
"metal 0.27.0", "metal",
"mistralrs_cudarc_fork",
"num-traits", "num-traits",
"num_cpus", "num_cpus",
"rand", "rand",
...@@ -585,9 +570,6 @@ dependencies = [ ...@@ -585,9 +570,6 @@ dependencies = [
"rayon", "rayon",
"safetensors", "safetensors",
"thiserror 1.0.69", "thiserror 1.0.69",
"ug",
"ug-cuda",
"ug-metal",
"yoke", "yoke",
"zip", "zip",
] ]
...@@ -616,7 +598,7 @@ dependencies = [ ...@@ -616,7 +598,7 @@ dependencies = [
[[package]] [[package]]
name = "candle-kernels" name = "candle-kernels"
version = "0.8.0" version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119" source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [ dependencies = [
"bindgen_cuda 0.1.5", "bindgen_cuda 0.1.5",
] ]
...@@ -624,9 +606,9 @@ dependencies = [ ...@@ -624,9 +606,9 @@ dependencies = [
[[package]] [[package]]
name = "candle-metal-kernels" name = "candle-metal-kernels"
version = "0.8.0" version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119" source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [ dependencies = [
"metal 0.27.0", "metal",
"once_cell", "once_cell",
"thiserror 1.0.69", "thiserror 1.0.69",
"tracing", "tracing",
...@@ -635,12 +617,12 @@ dependencies = [ ...@@ -635,12 +617,12 @@ dependencies = [
[[package]] [[package]]
name = "candle-nn" name = "candle-nn"
version = "0.8.0" version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119" source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [ dependencies = [
"candle-core", "candle-core",
"candle-metal-kernels", "candle-metal-kernels",
"half", "half",
"metal 0.27.0", "metal",
"num-traits", "num-traits",
"rayon", "rayon",
"safetensors", "safetensors",
...@@ -985,16 +967,6 @@ dependencies = [ ...@@ -985,16 +967,6 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "cudarc"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38cd60a9a42ec83a2ed7effb0b1f073270264ea99da7acfc44f7e8d74dee0384"
dependencies = [
"half",
"libloading",
]
[[package]] [[package]]
name = "cudarc" name = "cudarc"
version = "0.13.4" version = "0.13.4"
...@@ -1503,9 +1475,8 @@ version = "0.1.3" ...@@ -1503,9 +1475,8 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a14d1c3d88fdab81b5886c34b2a424b3fba5123564ea415ff98b160cf44c432f" checksum = "a14d1c3d88fdab81b5886c34b2a424b3fba5123564ea415ff98b160cf44c432f"
dependencies = [ dependencies = [
"cudarc 0.13.4", "cudarc",
"half", "half",
"mistralrs_cudarc_fork",
"num-traits", "num-traits",
"rand", "rand",
"rand_distr", "rand_distr",
...@@ -2675,21 +2646,6 @@ dependencies = [ ...@@ -2675,21 +2646,6 @@ dependencies = [
"paste", "paste",
] ]
[[package]]
name = "metal"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
dependencies = [
"bitflags 2.8.0",
"block",
"core-graphics-types",
"foreign-types 0.5.0",
"log",
"objc",
"paste",
]
[[package]] [[package]]
name = "mime" name = "mime"
version = "0.3.17" version = "0.3.17"
...@@ -2788,8 +2744,8 @@ dependencies = [ ...@@ -2788,8 +2744,8 @@ dependencies = [
[[package]] [[package]]
name = "mistralrs" name = "mistralrs"
version = "0.3.5" version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232" source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"candle-core", "candle-core",
...@@ -2809,8 +2765,8 @@ dependencies = [ ...@@ -2809,8 +2765,8 @@ dependencies = [
[[package]] [[package]]
name = "mistralrs-core" name = "mistralrs-core"
version = "0.3.5" version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232" source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [ dependencies = [
"akin", "akin",
"anyhow", "anyhow",
...@@ -2818,7 +2774,6 @@ dependencies = [ ...@@ -2818,7 +2774,6 @@ dependencies = [
"async-trait", "async-trait",
"base64 0.22.1", "base64 0.22.1",
"bindgen_cuda 0.1.5", "bindgen_cuda 0.1.5",
"buildstructor",
"bytemuck", "bytemuck",
"bytemuck_derive", "bytemuck_derive",
"candle-core", "candle-core",
...@@ -2842,6 +2797,7 @@ dependencies = [ ...@@ -2842,6 +2797,7 @@ dependencies = [
"itertools 0.13.0", "itertools 0.13.0",
"llguidance", "llguidance",
"lrtable", "lrtable",
"metal",
"minijinja", "minijinja",
"minijinja-contrib", "minijinja-contrib",
"mistralrs-paged-attn", "mistralrs-paged-attn",
...@@ -2881,23 +2837,23 @@ dependencies = [ ...@@ -2881,23 +2837,23 @@ dependencies = [
[[package]] [[package]]
name = "mistralrs-paged-attn" name = "mistralrs-paged-attn"
version = "0.3.5" version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232" source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bindgen_cuda 0.1.6", "bindgen_cuda 0.1.6",
"candle-core", "candle-core",
"float8", "float8",
"half", "half",
"metal 0.27.0", "metal",
"once_cell", "once_cell",
"thiserror 1.0.69", "thiserror 1.0.69",
] ]
[[package]] [[package]]
name = "mistralrs-quant" name = "mistralrs-quant"
version = "0.3.5" version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232" source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [ dependencies = [
"bindgen_cuda 0.1.5", "bindgen_cuda 0.1.5",
"byteorder", "byteorder",
...@@ -2906,34 +2862,29 @@ dependencies = [ ...@@ -2906,34 +2862,29 @@ dependencies = [
"float8", "float8",
"half", "half",
"lazy_static", "lazy_static",
"memmap2",
"metal",
"once_cell", "once_cell",
"paste", "paste",
"rayon", "rayon",
"regex",
"safetensors",
"serde", "serde",
"serde_json", "serde_json",
"thiserror 1.0.69", "thiserror 1.0.69",
"tracing", "tracing",
"yoke",
] ]
[[package]] [[package]]
name = "mistralrs-vision" name = "mistralrs-vision"
version = "0.3.5" version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232" source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [ dependencies = [
"candle-core", "candle-core",
"image", "image",
] ]
[[package]]
name = "mistralrs_cudarc_fork"
version = "0.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c82ee900a0e7080a2ff91042d070b0112a3533b32ebc6d8faaba96ee4f598e8d"
dependencies = [
"half",
"libloading",
]
[[package]] [[package]]
name = "monostate" name = "monostate"
version = "0.1.13" version = "0.1.13"
...@@ -3099,30 +3050,6 @@ dependencies = [ ...@@ -3099,30 +3050,6 @@ dependencies = [
"rand", "rand",
] ]
[[package]]
name = "num"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
dependencies = [
"num-bigint",
"num-complex",
"num-integer",
"num-iter",
"num-rational",
"num-traits",
]
[[package]]
name = "num-bigint"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
dependencies = [
"num-integer",
"num-traits",
]
[[package]] [[package]]
name = "num-complex" name = "num-complex"
version = "0.4.6" version = "0.4.6"
...@@ -3139,37 +3066,6 @@ version = "0.1.0" ...@@ -3139,37 +3066,6 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num-integer"
version = "0.1.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
dependencies = [
"num-traits",
]
[[package]]
name = "num-iter"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-rational"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
dependencies = [
"num-bigint",
"num-integer",
"num-traits",
]
[[package]] [[package]]
name = "num-traits" name = "num-traits"
version = "0.2.19" version = "0.2.19"
...@@ -4602,16 +4498,6 @@ version = "1.2.0" ...@@ -4602,16 +4498,6 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "str_inflector"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0b848d5a7695b33ad1be00f84a3c079fe85c9278a325ff9159e6c99cef4ef7"
dependencies = [
"lazy_static",
"regex",
]
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.10.0" version = "0.10.0"
...@@ -5366,26 +5252,6 @@ version = "0.2.5" ...@@ -5366,26 +5252,6 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "try_match"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b065c869a3f832418e279aa4c1d7088f9d5d323bde15a60a08e20c2cd4549082"
dependencies = [
"try_match_inner",
]
[[package]]
name = "try_match_inner"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9c81686f7ab4065ccac3df7a910c4249f8c0f3fb70421d6ddec19b9311f63f9"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]] [[package]]
name = "tryhard" name = "tryhard"
version = "0.5.1" version = "0.5.1"
...@@ -5409,48 +5275,6 @@ version = "0.1.7" ...@@ -5409,48 +5275,6 @@ version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
[[package]]
name = "ug"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4eef2ebfc18c67a6dbcacd9d8a4d85e0568cc58c82515552382312c2730ea13"
dependencies = [
"half",
"num",
"serde",
"serde_json",
"thiserror 1.0.69",
]
[[package]]
name = "ug-cuda"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c4dcab280ad0ef3957e153a82dcad608c954d02cf253b695322f502d1f8902e"
dependencies = [
"cudarc 0.12.1",
"half",
"serde",
"serde_json",
"thiserror 1.0.69",
"ug",
]
[[package]]
name = "ug-metal"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e4ed1df2c20a1a138f993041f650cc84ff27aaefb4342b7f986e77d00e80799"
dependencies = [
"half",
"metal 0.29.0",
"objc",
"serde",
"serde_json",
"thiserror 1.0.69",
"ug",
]
[[package]] [[package]]
name = "unarray" name = "unarray"
version = "0.1.4" version = "0.1.4"
......
...@@ -60,7 +60,7 @@ prometheus = { version = "0.13" } ...@@ -60,7 +60,7 @@ prometheus = { version = "0.13" }
# mistralrs # mistralrs
either = { version = "1.13" } either = { version = "1.13" }
indexmap = { version = "2.6" } indexmap = { version = "2.6" }
mistralrs = { git = "https://github.com/EricLBuehler/mistral.rs.git", rev = "c26e6c8", optional = true } mistralrs = { git = "https://github.com/EricLBuehler/mistral.rs.git", rev = "5e689c9", optional = true }
[dev-dependencies] [dev-dependencies]
insta = { version = "1.41", features = ["glob", "json", "redactions"]} insta = { version = "1.41", features = ["glob", "json", "redactions"]}
......
...@@ -20,10 +20,10 @@ use async_trait::async_trait; ...@@ -20,10 +20,10 @@ use async_trait::async_trait;
use either::Either; use either::Either;
use indexmap::IndexMap; use indexmap::IndexMap;
use mistralrs::{ use mistralrs::{
Constraint, DefaultSchedulerMethod, Device, DeviceMapMetadata, GGUFLoaderBuilder, Constraint, DefaultSchedulerMethod, Device, DeviceMapMetadata, DeviceMapSetting,
GGUFSpecificConfig, MemoryGpuConfig, MistralRs, MistralRsBuilder, ModelDType, NormalRequest, GGUFLoaderBuilder, GGUFSpecificConfig, MemoryGpuConfig, MistralRs, MistralRsBuilder,
PagedAttentionConfig, Pipeline, Request, RequestMessage, ResponseOk, SamplingParams, ModelDType, NormalRequest, PagedAttentionConfig, Pipeline, Request, RequestMessage, ResponseOk,
SchedulerConfig, TokenSource, SamplingParams, SchedulerConfig, TokenSource,
}; };
use tokio::sync::mpsc::channel; use tokio::sync::mpsc::channel;
...@@ -85,7 +85,7 @@ impl MistralRsEngine { ...@@ -85,7 +85,7 @@ impl MistralRsEngine {
model_dir.display().to_string(), model_dir.display().to_string(),
vec![model_filename.to_string_lossy().into_owned()], vec![model_filename.to_string_lossy().into_owned()],
GGUFSpecificConfig { GGUFSpecificConfig {
prompt_batchsize: None, prompt_chunksize: None,
topology: None, topology: None,
}, },
) )
...@@ -108,7 +108,7 @@ impl MistralRsEngine { ...@@ -108,7 +108,7 @@ impl MistralRsEngine {
&ModelDType::Auto, &ModelDType::Auto,
&best_device()?, &best_device()?,
false, false,
DeviceMapMetadata::dummy(), DeviceMapSetting::Map(DeviceMapMetadata::dummy()),
None, None,
paged_attention_config, paged_attention_config,
)?; )?;
...@@ -195,7 +195,7 @@ impl ...@@ -195,7 +195,7 @@ impl
response: tx, response: tx,
return_logprobs: false, return_logprobs: false,
is_streaming: true, is_streaming: true,
id: 0, id: self.mistralrs.next_request_id(),
constraint: Constraint::None, constraint: Constraint::None,
suffix: None, suffix: None,
adapters: None, adapters: None,
...@@ -219,7 +219,10 @@ impl ...@@ -219,7 +219,10 @@ impl
}; };
match response { match response {
ResponseOk::Chunk(c) => { ResponseOk::Chunk(c) => {
let from_assistant = c.choices[0].delta.content.clone(); let Some(from_assistant) = c.choices[0].delta.content.clone() else {
tracing::warn!("No content from mistralrs. Abandoning request.");
break;
};
if let Some(tok) = maybe_tok.as_ref() { if let Some(tok) = maybe_tok.as_ref() {
used_output_tokens += tok used_output_tokens += tok
.encode(from_assistant.clone(), false) .encode(from_assistant.clone(), false)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment