Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
45b3505c
Commit
45b3505c
authored
Feb 14, 2025
by
Graham King
Committed by
GitHub
Feb 14, 2025
Browse files
fix: Unique IDs for mistralrs requests (#186)
Upgrade mistralrs to latest.
parent
2f700421
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
64 additions
and
413 deletions
+64
-413
applications/llm/bin/tio/Cargo.lock
applications/llm/bin/tio/Cargo.lock
+26
-202
llm/rust/Cargo.lock
llm/rust/Cargo.lock
+26
-202
llm/rust/triton-llm/Cargo.toml
llm/rust/triton-llm/Cargo.toml
+1
-1
llm/rust/triton-llm/src/engines/mistralrs.rs
llm/rust/triton-llm/src/engines/mistralrs.rs
+11
-8
No files found.
applications/llm/bin/tio/Cargo.lock
View file @
45b3505c
...
@@ -477,21 +477,6 @@ version = "0.2.2"
...
@@ -477,21 +477,6 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3eeab4423108c5d7c744f4d234de88d18d636100093ae04caf4825134b9c3a32"
checksum = "3eeab4423108c5d7c744f4d234de88d18d636100093ae04caf4825134b9c3a32"
[[package]]
name = "buildstructor"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3907aac66c65520545ae3cb3c195306e20d5ed5c90bfbb992e061cf12a104d0"
dependencies = [
"lazy_static",
"proc-macro2",
"quote",
"str_inflector",
"syn 2.0.98",
"thiserror 1.0.69",
"try_match",
]
[[package]]
[[package]]
name = "bumpalo"
name = "bumpalo"
version = "3.17.0"
version = "3.17.0"
...
@@ -542,17 +527,17 @@ dependencies = [
...
@@ -542,17 +527,17 @@ dependencies = [
[[package]]
[[package]]
name = "candle-core"
name = "candle-core"
version = "0.8.0"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
524bc6#f524bc68a929e2404ea855f692bf30d98ff15119
"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
b5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe
"
dependencies = [
dependencies = [
"byteorder",
"byteorder",
"candle-kernels",
"candle-kernels",
"candle-metal-kernels",
"candle-metal-kernels",
"cudarc",
"float8",
"float8",
"gemm",
"gemm",
"half",
"half",
"memmap2",
"memmap2",
"metal 0.27.0",
"metal",
"mistralrs_cudarc_fork",
"num-traits",
"num-traits",
"num_cpus",
"num_cpus",
"rand",
"rand",
...
@@ -560,9 +545,6 @@ dependencies = [
...
@@ -560,9 +545,6 @@ dependencies = [
"rayon",
"rayon",
"safetensors",
"safetensors",
"thiserror 1.0.69",
"thiserror 1.0.69",
"ug",
"ug-cuda",
"ug-metal",
"yoke",
"yoke",
"zip",
"zip",
]
]
...
@@ -591,7 +573,7 @@ dependencies = [
...
@@ -591,7 +573,7 @@ dependencies = [
[[package]]
[[package]]
name = "candle-kernels"
name = "candle-kernels"
version = "0.8.0"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
524bc6#f524bc68a929e2404ea855f692bf30d98ff15119
"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
b5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe
"
dependencies = [
dependencies = [
"bindgen_cuda 0.1.5",
"bindgen_cuda 0.1.5",
]
]
...
@@ -599,9 +581,9 @@ dependencies = [
...
@@ -599,9 +581,9 @@ dependencies = [
[[package]]
[[package]]
name = "candle-metal-kernels"
name = "candle-metal-kernels"
version = "0.8.0"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
524bc6#f524bc68a929e2404ea855f692bf30d98ff15119
"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
b5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe
"
dependencies = [
dependencies = [
"metal
0.27.0
",
"metal",
"once_cell",
"once_cell",
"thiserror 1.0.69",
"thiserror 1.0.69",
"tracing",
"tracing",
...
@@ -610,12 +592,12 @@ dependencies = [
...
@@ -610,12 +592,12 @@ dependencies = [
[[package]]
[[package]]
name = "candle-nn"
name = "candle-nn"
version = "0.8.0"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
524bc6#f524bc68a929e2404ea855f692bf30d98ff15119
"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
b5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe
"
dependencies = [
dependencies = [
"candle-core",
"candle-core",
"candle-metal-kernels",
"candle-metal-kernels",
"half",
"half",
"metal
0.27.0
",
"metal",
"num-traits",
"num-traits",
"rayon",
"rayon",
"safetensors",
"safetensors",
...
@@ -960,16 +942,6 @@ dependencies = [
...
@@ -960,16 +942,6 @@ dependencies = [
"memchr",
"memchr",
]
]
[[package]]
name = "cudarc"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38cd60a9a42ec83a2ed7effb0b1f073270264ea99da7acfc44f7e8d74dee0384"
dependencies = [
"half",
"libloading",
]
[[package]]
[[package]]
name = "cudarc"
name = "cudarc"
version = "0.13.4"
version = "0.13.4"
...
@@ -1490,9 +1462,8 @@ version = "0.1.3"
...
@@ -1490,9 +1462,8 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a14d1c3d88fdab81b5886c34b2a424b3fba5123564ea415ff98b160cf44c432f"
checksum = "a14d1c3d88fdab81b5886c34b2a424b3fba5123564ea415ff98b160cf44c432f"
dependencies = [
dependencies = [
"cudarc
0.13.4
",
"cudarc",
"half",
"half",
"mistralrs_cudarc_fork",
"num-traits",
"num-traits",
"rand",
"rand",
"rand_distr",
"rand_distr",
...
@@ -2631,21 +2602,6 @@ dependencies = [
...
@@ -2631,21 +2602,6 @@ dependencies = [
"paste",
"paste",
]
]
[[package]]
name = "metal"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
dependencies = [
"bitflags 2.8.0",
"block",
"core-graphics-types",
"foreign-types 0.5.0",
"log",
"objc",
"paste",
]
[[package]]
[[package]]
name = "mime"
name = "mime"
version = "0.3.17"
version = "0.3.17"
...
@@ -2744,8 +2700,8 @@ dependencies = [
...
@@ -2744,8 +2700,8 @@ dependencies = [
[[package]]
[[package]]
name = "mistralrs"
name = "mistralrs"
version = "0.
3.5
"
version = "0.
4.0
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
c26e6c8#c26e6c85077186844da029bc2c889f6410539232
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"candle-core",
"candle-core",
...
@@ -2765,8 +2721,8 @@ dependencies = [
...
@@ -2765,8 +2721,8 @@ dependencies = [
[[package]]
[[package]]
name = "mistralrs-core"
name = "mistralrs-core"
version = "0.
3.5
"
version = "0.
4.0
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
c26e6c8#c26e6c85077186844da029bc2c889f6410539232
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5
"
dependencies = [
dependencies = [
"akin",
"akin",
"anyhow",
"anyhow",
...
@@ -2774,7 +2730,6 @@ dependencies = [
...
@@ -2774,7 +2730,6 @@ dependencies = [
"async-trait",
"async-trait",
"base64 0.22.1",
"base64 0.22.1",
"bindgen_cuda 0.1.5",
"bindgen_cuda 0.1.5",
"buildstructor",
"bytemuck",
"bytemuck",
"bytemuck_derive",
"bytemuck_derive",
"candle-core",
"candle-core",
...
@@ -2798,6 +2753,7 @@ dependencies = [
...
@@ -2798,6 +2753,7 @@ dependencies = [
"itertools 0.13.0",
"itertools 0.13.0",
"llguidance",
"llguidance",
"lrtable",
"lrtable",
"metal",
"minijinja",
"minijinja",
"minijinja-contrib",
"minijinja-contrib",
"mistralrs-paged-attn",
"mistralrs-paged-attn",
...
@@ -2837,23 +2793,23 @@ dependencies = [
...
@@ -2837,23 +2793,23 @@ dependencies = [
[[package]]
[[package]]
name = "mistralrs-paged-attn"
name = "mistralrs-paged-attn"
version = "0.
3.5
"
version = "0.
4.0
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
c26e6c8#c26e6c85077186844da029bc2c889f6410539232
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"bindgen_cuda 0.1.6",
"bindgen_cuda 0.1.6",
"candle-core",
"candle-core",
"float8",
"float8",
"half",
"half",
"metal
0.27.0
",
"metal",
"once_cell",
"once_cell",
"thiserror 1.0.69",
"thiserror 1.0.69",
]
]
[[package]]
[[package]]
name = "mistralrs-quant"
name = "mistralrs-quant"
version = "0.
3.5
"
version = "0.
4.0
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
c26e6c8#c26e6c85077186844da029bc2c889f6410539232
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5
"
dependencies = [
dependencies = [
"bindgen_cuda 0.1.5",
"bindgen_cuda 0.1.5",
"byteorder",
"byteorder",
...
@@ -2862,34 +2818,29 @@ dependencies = [
...
@@ -2862,34 +2818,29 @@ dependencies = [
"float8",
"float8",
"half",
"half",
"lazy_static",
"lazy_static",
"memmap2",
"metal",
"once_cell",
"once_cell",
"paste",
"paste",
"rayon",
"rayon",
"regex",
"safetensors",
"serde",
"serde",
"serde_json",
"serde_json",
"thiserror 1.0.69",
"thiserror 1.0.69",
"tracing",
"tracing",
"yoke",
]
]
[[package]]
[[package]]
name = "mistralrs-vision"
name = "mistralrs-vision"
version = "0.
3.5
"
version = "0.
4.0
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
c26e6c8#c26e6c85077186844da029bc2c889f6410539232
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5
"
dependencies = [
dependencies = [
"candle-core",
"candle-core",
"image",
"image",
]
]
[[package]]
name = "mistralrs_cudarc_fork"
version = "0.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c82ee900a0e7080a2ff91042d070b0112a3533b32ebc6d8faaba96ee4f598e8d"
dependencies = [
"half",
"libloading",
]
[[package]]
[[package]]
name = "monostate"
name = "monostate"
version = "0.1.13"
version = "0.1.13"
...
@@ -3130,30 +3081,6 @@ dependencies = [
...
@@ -3130,30 +3081,6 @@ dependencies = [
"rand",
"rand",
]
]
[[package]]
name = "num"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
dependencies = [
"num-bigint",
"num-complex",
"num-integer",
"num-iter",
"num-rational",
"num-traits",
]
[[package]]
name = "num-bigint"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
dependencies = [
"num-integer",
"num-traits",
]
[[package]]
[[package]]
name = "num-complex"
name = "num-complex"
version = "0.4.6"
version = "0.4.6"
...
@@ -3170,37 +3097,6 @@ version = "0.1.0"
...
@@ -3170,37 +3097,6 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num-integer"
version = "0.1.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
dependencies = [
"num-traits",
]
[[package]]
name = "num-iter"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-rational"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
dependencies = [
"num-bigint",
"num-integer",
"num-traits",
]
[[package]]
[[package]]
name = "num-traits"
name = "num-traits"
version = "0.2.19"
version = "0.2.19"
...
@@ -4497,16 +4393,6 @@ version = "1.2.0"
...
@@ -4497,16 +4393,6 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "str_inflector"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0b848d5a7695b33ad1be00f84a3c079fe85c9278a325ff9159e6c99cef4ef7"
dependencies = [
"lazy_static",
"regex",
]
[[package]]
[[package]]
name = "strsim"
name = "strsim"
version = "0.10.0"
version = "0.10.0"
...
@@ -5267,26 +5153,6 @@ version = "0.2.5"
...
@@ -5267,26 +5153,6 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "try_match"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b065c869a3f832418e279aa4c1d7088f9d5d323bde15a60a08e20c2cd4549082"
dependencies = [
"try_match_inner",
]
[[package]]
name = "try_match_inner"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9c81686f7ab4065ccac3df7a910c4249f8c0f3fb70421d6ddec19b9311f63f9"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
[[package]]
name = "tryhard"
name = "tryhard"
version = "0.5.1"
version = "0.5.1"
...
@@ -5304,48 +5170,6 @@ version = "1.17.0"
...
@@ -5304,48 +5170,6 @@ version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "ug"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4eef2ebfc18c67a6dbcacd9d8a4d85e0568cc58c82515552382312c2730ea13"
dependencies = [
"half",
"num",
"serde",
"serde_json",
"thiserror 1.0.69",
]
[[package]]
name = "ug-cuda"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c4dcab280ad0ef3957e153a82dcad608c954d02cf253b695322f502d1f8902e"
dependencies = [
"cudarc 0.12.1",
"half",
"serde",
"serde_json",
"thiserror 1.0.69",
"ug",
]
[[package]]
name = "ug-metal"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e4ed1df2c20a1a138f993041f650cc84ff27aaefb4342b7f986e77d00e80799"
dependencies = [
"half",
"metal 0.29.0",
"objc",
"serde",
"serde_json",
"thiserror 1.0.69",
"ug",
]
[[package]]
[[package]]
name = "uncased"
name = "uncased"
version = "0.9.10"
version = "0.9.10"
...
...
llm/rust/Cargo.lock
View file @
45b3505c
...
@@ -502,21 +502,6 @@ dependencies = [
...
@@ -502,21 +502,6 @@ dependencies = [
"serde",
"serde",
]
]
[[package]]
name = "buildstructor"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3907aac66c65520545ae3cb3c195306e20d5ed5c90bfbb992e061cf12a104d0"
dependencies = [
"lazy_static",
"proc-macro2",
"quote",
"str_inflector",
"syn 2.0.98",
"thiserror 1.0.69",
"try_match",
]
[[package]]
[[package]]
name = "bumpalo"
name = "bumpalo"
version = "3.17.0"
version = "3.17.0"
...
@@ -567,17 +552,17 @@ dependencies = [
...
@@ -567,17 +552,17 @@ dependencies = [
[[package]]
[[package]]
name = "candle-core"
name = "candle-core"
version = "0.8.0"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
524bc6#f524bc68a929e2404ea855f692bf30d98ff15119
"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
b5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe
"
dependencies = [
dependencies = [
"byteorder",
"byteorder",
"candle-kernels",
"candle-kernels",
"candle-metal-kernels",
"candle-metal-kernels",
"cudarc",
"float8",
"float8",
"gemm",
"gemm",
"half",
"half",
"memmap2",
"memmap2",
"metal 0.27.0",
"metal",
"mistralrs_cudarc_fork",
"num-traits",
"num-traits",
"num_cpus",
"num_cpus",
"rand",
"rand",
...
@@ -585,9 +570,6 @@ dependencies = [
...
@@ -585,9 +570,6 @@ dependencies = [
"rayon",
"rayon",
"safetensors",
"safetensors",
"thiserror 1.0.69",
"thiserror 1.0.69",
"ug",
"ug-cuda",
"ug-metal",
"yoke",
"yoke",
"zip",
"zip",
]
]
...
@@ -616,7 +598,7 @@ dependencies = [
...
@@ -616,7 +598,7 @@ dependencies = [
[[package]]
[[package]]
name = "candle-kernels"
name = "candle-kernels"
version = "0.8.0"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
524bc6#f524bc68a929e2404ea855f692bf30d98ff15119
"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
b5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe
"
dependencies = [
dependencies = [
"bindgen_cuda 0.1.5",
"bindgen_cuda 0.1.5",
]
]
...
@@ -624,9 +606,9 @@ dependencies = [
...
@@ -624,9 +606,9 @@ dependencies = [
[[package]]
[[package]]
name = "candle-metal-kernels"
name = "candle-metal-kernels"
version = "0.8.0"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
524bc6#f524bc68a929e2404ea855f692bf30d98ff15119
"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
b5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe
"
dependencies = [
dependencies = [
"metal
0.27.0
",
"metal",
"once_cell",
"once_cell",
"thiserror 1.0.69",
"thiserror 1.0.69",
"tracing",
"tracing",
...
@@ -635,12 +617,12 @@ dependencies = [
...
@@ -635,12 +617,12 @@ dependencies = [
[[package]]
[[package]]
name = "candle-nn"
name = "candle-nn"
version = "0.8.0"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
524bc6#f524bc68a929e2404ea855f692bf30d98ff15119
"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f
b5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe
"
dependencies = [
dependencies = [
"candle-core",
"candle-core",
"candle-metal-kernels",
"candle-metal-kernels",
"half",
"half",
"metal
0.27.0
",
"metal",
"num-traits",
"num-traits",
"rayon",
"rayon",
"safetensors",
"safetensors",
...
@@ -985,16 +967,6 @@ dependencies = [
...
@@ -985,16 +967,6 @@ dependencies = [
"memchr",
"memchr",
]
]
[[package]]
name = "cudarc"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38cd60a9a42ec83a2ed7effb0b1f073270264ea99da7acfc44f7e8d74dee0384"
dependencies = [
"half",
"libloading",
]
[[package]]
[[package]]
name = "cudarc"
name = "cudarc"
version = "0.13.4"
version = "0.13.4"
...
@@ -1503,9 +1475,8 @@ version = "0.1.3"
...
@@ -1503,9 +1475,8 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a14d1c3d88fdab81b5886c34b2a424b3fba5123564ea415ff98b160cf44c432f"
checksum = "a14d1c3d88fdab81b5886c34b2a424b3fba5123564ea415ff98b160cf44c432f"
dependencies = [
dependencies = [
"cudarc
0.13.4
",
"cudarc",
"half",
"half",
"mistralrs_cudarc_fork",
"num-traits",
"num-traits",
"rand",
"rand",
"rand_distr",
"rand_distr",
...
@@ -2675,21 +2646,6 @@ dependencies = [
...
@@ -2675,21 +2646,6 @@ dependencies = [
"paste",
"paste",
]
]
[[package]]
name = "metal"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
dependencies = [
"bitflags 2.8.0",
"block",
"core-graphics-types",
"foreign-types 0.5.0",
"log",
"objc",
"paste",
]
[[package]]
[[package]]
name = "mime"
name = "mime"
version = "0.3.17"
version = "0.3.17"
...
@@ -2788,8 +2744,8 @@ dependencies = [
...
@@ -2788,8 +2744,8 @@ dependencies = [
[[package]]
[[package]]
name = "mistralrs"
name = "mistralrs"
version = "0.
3.5
"
version = "0.
4.0
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
c26e6c8#c26e6c85077186844da029bc2c889f6410539232
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"candle-core",
"candle-core",
...
@@ -2809,8 +2765,8 @@ dependencies = [
...
@@ -2809,8 +2765,8 @@ dependencies = [
[[package]]
[[package]]
name = "mistralrs-core"
name = "mistralrs-core"
version = "0.
3.5
"
version = "0.
4.0
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
c26e6c8#c26e6c85077186844da029bc2c889f6410539232
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5
"
dependencies = [
dependencies = [
"akin",
"akin",
"anyhow",
"anyhow",
...
@@ -2818,7 +2774,6 @@ dependencies = [
...
@@ -2818,7 +2774,6 @@ dependencies = [
"async-trait",
"async-trait",
"base64 0.22.1",
"base64 0.22.1",
"bindgen_cuda 0.1.5",
"bindgen_cuda 0.1.5",
"buildstructor",
"bytemuck",
"bytemuck",
"bytemuck_derive",
"bytemuck_derive",
"candle-core",
"candle-core",
...
@@ -2842,6 +2797,7 @@ dependencies = [
...
@@ -2842,6 +2797,7 @@ dependencies = [
"itertools 0.13.0",
"itertools 0.13.0",
"llguidance",
"llguidance",
"lrtable",
"lrtable",
"metal",
"minijinja",
"minijinja",
"minijinja-contrib",
"minijinja-contrib",
"mistralrs-paged-attn",
"mistralrs-paged-attn",
...
@@ -2881,23 +2837,23 @@ dependencies = [
...
@@ -2881,23 +2837,23 @@ dependencies = [
[[package]]
[[package]]
name = "mistralrs-paged-attn"
name = "mistralrs-paged-attn"
version = "0.
3.5
"
version = "0.
4.0
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
c26e6c8#c26e6c85077186844da029bc2c889f6410539232
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"bindgen_cuda 0.1.6",
"bindgen_cuda 0.1.6",
"candle-core",
"candle-core",
"float8",
"float8",
"half",
"half",
"metal
0.27.0
",
"metal",
"once_cell",
"once_cell",
"thiserror 1.0.69",
"thiserror 1.0.69",
]
]
[[package]]
[[package]]
name = "mistralrs-quant"
name = "mistralrs-quant"
version = "0.
3.5
"
version = "0.
4.0
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
c26e6c8#c26e6c85077186844da029bc2c889f6410539232
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5
"
dependencies = [
dependencies = [
"bindgen_cuda 0.1.5",
"bindgen_cuda 0.1.5",
"byteorder",
"byteorder",
...
@@ -2906,34 +2862,29 @@ dependencies = [
...
@@ -2906,34 +2862,29 @@ dependencies = [
"float8",
"float8",
"half",
"half",
"lazy_static",
"lazy_static",
"memmap2",
"metal",
"once_cell",
"once_cell",
"paste",
"paste",
"rayon",
"rayon",
"regex",
"safetensors",
"serde",
"serde",
"serde_json",
"serde_json",
"thiserror 1.0.69",
"thiserror 1.0.69",
"tracing",
"tracing",
"yoke",
]
]
[[package]]
[[package]]
name = "mistralrs-vision"
name = "mistralrs-vision"
version = "0.
3.5
"
version = "0.
4.0
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
c26e6c8#c26e6c85077186844da029bc2c889f6410539232
"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=
5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5
"
dependencies = [
dependencies = [
"candle-core",
"candle-core",
"image",
"image",
]
]
[[package]]
name = "mistralrs_cudarc_fork"
version = "0.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c82ee900a0e7080a2ff91042d070b0112a3533b32ebc6d8faaba96ee4f598e8d"
dependencies = [
"half",
"libloading",
]
[[package]]
[[package]]
name = "monostate"
name = "monostate"
version = "0.1.13"
version = "0.1.13"
...
@@ -3099,30 +3050,6 @@ dependencies = [
...
@@ -3099,30 +3050,6 @@ dependencies = [
"rand",
"rand",
]
]
[[package]]
name = "num"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
dependencies = [
"num-bigint",
"num-complex",
"num-integer",
"num-iter",
"num-rational",
"num-traits",
]
[[package]]
name = "num-bigint"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
dependencies = [
"num-integer",
"num-traits",
]
[[package]]
[[package]]
name = "num-complex"
name = "num-complex"
version = "0.4.6"
version = "0.4.6"
...
@@ -3139,37 +3066,6 @@ version = "0.1.0"
...
@@ -3139,37 +3066,6 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num-integer"
version = "0.1.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
dependencies = [
"num-traits",
]
[[package]]
name = "num-iter"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-rational"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
dependencies = [
"num-bigint",
"num-integer",
"num-traits",
]
[[package]]
[[package]]
name = "num-traits"
name = "num-traits"
version = "0.2.19"
version = "0.2.19"
...
@@ -4602,16 +4498,6 @@ version = "1.2.0"
...
@@ -4602,16 +4498,6 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "str_inflector"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0b848d5a7695b33ad1be00f84a3c079fe85c9278a325ff9159e6c99cef4ef7"
dependencies = [
"lazy_static",
"regex",
]
[[package]]
[[package]]
name = "strsim"
name = "strsim"
version = "0.10.0"
version = "0.10.0"
...
@@ -5366,26 +5252,6 @@ version = "0.2.5"
...
@@ -5366,26 +5252,6 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "try_match"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b065c869a3f832418e279aa4c1d7088f9d5d323bde15a60a08e20c2cd4549082"
dependencies = [
"try_match_inner",
]
[[package]]
name = "try_match_inner"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9c81686f7ab4065ccac3df7a910c4249f8c0f3fb70421d6ddec19b9311f63f9"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
[[package]]
name = "tryhard"
name = "tryhard"
version = "0.5.1"
version = "0.5.1"
...
@@ -5409,48 +5275,6 @@ version = "0.1.7"
...
@@ -5409,48 +5275,6 @@ version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
[[package]]
name = "ug"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4eef2ebfc18c67a6dbcacd9d8a4d85e0568cc58c82515552382312c2730ea13"
dependencies = [
"half",
"num",
"serde",
"serde_json",
"thiserror 1.0.69",
]
[[package]]
name = "ug-cuda"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c4dcab280ad0ef3957e153a82dcad608c954d02cf253b695322f502d1f8902e"
dependencies = [
"cudarc 0.12.1",
"half",
"serde",
"serde_json",
"thiserror 1.0.69",
"ug",
]
[[package]]
name = "ug-metal"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e4ed1df2c20a1a138f993041f650cc84ff27aaefb4342b7f986e77d00e80799"
dependencies = [
"half",
"metal 0.29.0",
"objc",
"serde",
"serde_json",
"thiserror 1.0.69",
"ug",
]
[[package]]
[[package]]
name = "unarray"
name = "unarray"
version = "0.1.4"
version = "0.1.4"
...
...
llm/rust/triton-llm/Cargo.toml
View file @
45b3505c
...
@@ -60,7 +60,7 @@ prometheus = { version = "0.13" }
...
@@ -60,7 +60,7 @@ prometheus = { version = "0.13" }
# mistralrs
# mistralrs
either
=
{
version
=
"1.13"
}
either
=
{
version
=
"1.13"
}
indexmap
=
{
version
=
"2.6"
}
indexmap
=
{
version
=
"2.6"
}
mistralrs
=
{
git
=
"https://github.com/EricLBuehler/mistral.rs.git"
,
rev
=
"
c26e6c8
"
,
optional
=
true
}
mistralrs
=
{
git
=
"https://github.com/EricLBuehler/mistral.rs.git"
,
rev
=
"
5e689c9
"
,
optional
=
true
}
[dev-dependencies]
[dev-dependencies]
insta
=
{
version
=
"1.41"
,
features
=
[
"glob"
,
"json"
,
"redactions"
]}
insta
=
{
version
=
"1.41"
,
features
=
[
"glob"
,
"json"
,
"redactions"
]}
...
...
llm/rust/triton-llm/src/engines/mistralrs.rs
View file @
45b3505c
...
@@ -20,10 +20,10 @@ use async_trait::async_trait;
...
@@ -20,10 +20,10 @@ use async_trait::async_trait;
use
either
::
Either
;
use
either
::
Either
;
use
indexmap
::
IndexMap
;
use
indexmap
::
IndexMap
;
use
mistralrs
::{
use
mistralrs
::{
Constraint
,
DefaultSchedulerMethod
,
Device
,
DeviceMapMetadata
,
GGUFLoaderBuilder
,
Constraint
,
DefaultSchedulerMethod
,
Device
,
DeviceMapMetadata
,
DeviceMapSetting
,
GGUFSpecificConfig
,
MemoryGpuConfig
,
MistralRs
,
MistralRsBuilder
,
ModelDType
,
NormalRequest
,
GGUFLoaderBuilder
,
GGUFSpecificConfig
,
MemoryGpuConfig
,
MistralRs
,
MistralRsBuilder
,
PagedAttentionConfig
,
Pipeline
,
Request
,
RequestMessage
,
ResponseOk
,
SamplingParams
,
ModelDType
,
NormalRequest
,
PagedAttentionConfig
,
Pipeline
,
Request
,
RequestMessage
,
ResponseOk
,
SchedulerConfig
,
TokenSource
,
SamplingParams
,
SchedulerConfig
,
TokenSource
,
};
};
use
tokio
::
sync
::
mpsc
::
channel
;
use
tokio
::
sync
::
mpsc
::
channel
;
...
@@ -85,7 +85,7 @@ impl MistralRsEngine {
...
@@ -85,7 +85,7 @@ impl MistralRsEngine {
model_dir
.display
()
.to_string
(),
model_dir
.display
()
.to_string
(),
vec!
[
model_filename
.to_string_lossy
()
.into_owned
()],
vec!
[
model_filename
.to_string_lossy
()
.into_owned
()],
GGUFSpecificConfig
{
GGUFSpecificConfig
{
prompt_
batch
size
:
None
,
prompt_
chunk
size
:
None
,
topology
:
None
,
topology
:
None
,
},
},
)
)
...
@@ -108,7 +108,7 @@ impl MistralRsEngine {
...
@@ -108,7 +108,7 @@ impl MistralRsEngine {
&
ModelDType
::
Auto
,
&
ModelDType
::
Auto
,
&
best_device
()
?
,
&
best_device
()
?
,
false
,
false
,
DeviceMapMetadata
::
dummy
(),
DeviceMapSetting
::
Map
(
DeviceMapMetadata
::
dummy
()
)
,
None
,
None
,
paged_attention_config
,
paged_attention_config
,
)
?
;
)
?
;
...
@@ -195,7 +195,7 @@ impl
...
@@ -195,7 +195,7 @@ impl
response
:
tx
,
response
:
tx
,
return_logprobs
:
false
,
return_logprobs
:
false
,
is_streaming
:
true
,
is_streaming
:
true
,
id
:
0
,
id
:
self
.mistralrs
.next_request_id
()
,
constraint
:
Constraint
::
None
,
constraint
:
Constraint
::
None
,
suffix
:
None
,
suffix
:
None
,
adapters
:
None
,
adapters
:
None
,
...
@@ -219,7 +219,10 @@ impl
...
@@ -219,7 +219,10 @@ impl
};
};
match
response
{
match
response
{
ResponseOk
::
Chunk
(
c
)
=>
{
ResponseOk
::
Chunk
(
c
)
=>
{
let
from_assistant
=
c
.choices
[
0
]
.delta.content
.clone
();
let
Some
(
from_assistant
)
=
c
.choices
[
0
]
.delta.content
.clone
()
else
{
tracing
::
warn!
(
"No content from mistralrs. Abandoning request."
);
break
;
};
if
let
Some
(
tok
)
=
maybe_tok
.as_ref
()
{
if
let
Some
(
tok
)
=
maybe_tok
.as_ref
()
{
used_output_tokens
+=
tok
used_output_tokens
+=
tok
.encode
(
from_assistant
.clone
(),
false
)
.encode
(
from_assistant
.clone
(),
false
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment