Commit 45b3505c authored by Graham King's avatar Graham King Committed by GitHub
Browse files

fix: Unique IDs for mistralrs requests (#186)

Upgrade mistralrs to latest.
parent 2f700421
......@@ -477,21 +477,6 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3eeab4423108c5d7c744f4d234de88d18d636100093ae04caf4825134b9c3a32"
[[package]]
name = "buildstructor"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3907aac66c65520545ae3cb3c195306e20d5ed5c90bfbb992e061cf12a104d0"
dependencies = [
"lazy_static",
"proc-macro2",
"quote",
"str_inflector",
"syn 2.0.98",
"thiserror 1.0.69",
"try_match",
]
[[package]]
name = "bumpalo"
version = "3.17.0"
......@@ -542,17 +527,17 @@ dependencies = [
[[package]]
name = "candle-core"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119"
source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [
"byteorder",
"candle-kernels",
"candle-metal-kernels",
"cudarc",
"float8",
"gemm",
"half",
"memmap2",
"metal 0.27.0",
"mistralrs_cudarc_fork",
"metal",
"num-traits",
"num_cpus",
"rand",
......@@ -560,9 +545,6 @@ dependencies = [
"rayon",
"safetensors",
"thiserror 1.0.69",
"ug",
"ug-cuda",
"ug-metal",
"yoke",
"zip",
]
......@@ -591,7 +573,7 @@ dependencies = [
[[package]]
name = "candle-kernels"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119"
source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [
"bindgen_cuda 0.1.5",
]
......@@ -599,9 +581,9 @@ dependencies = [
[[package]]
name = "candle-metal-kernels"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119"
source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [
"metal 0.27.0",
"metal",
"once_cell",
"thiserror 1.0.69",
"tracing",
......@@ -610,12 +592,12 @@ dependencies = [
[[package]]
name = "candle-nn"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119"
source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [
"candle-core",
"candle-metal-kernels",
"half",
"metal 0.27.0",
"metal",
"num-traits",
"rayon",
"safetensors",
......@@ -960,16 +942,6 @@ dependencies = [
"memchr",
]
[[package]]
name = "cudarc"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38cd60a9a42ec83a2ed7effb0b1f073270264ea99da7acfc44f7e8d74dee0384"
dependencies = [
"half",
"libloading",
]
[[package]]
name = "cudarc"
version = "0.13.4"
......@@ -1490,9 +1462,8 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a14d1c3d88fdab81b5886c34b2a424b3fba5123564ea415ff98b160cf44c432f"
dependencies = [
"cudarc 0.13.4",
"cudarc",
"half",
"mistralrs_cudarc_fork",
"num-traits",
"rand",
"rand_distr",
......@@ -2631,21 +2602,6 @@ dependencies = [
"paste",
]
[[package]]
name = "metal"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
dependencies = [
"bitflags 2.8.0",
"block",
"core-graphics-types",
"foreign-types 0.5.0",
"log",
"objc",
"paste",
]
[[package]]
name = "mime"
version = "0.3.17"
......@@ -2744,8 +2700,8 @@ dependencies = [
[[package]]
name = "mistralrs"
version = "0.3.5"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232"
version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [
"anyhow",
"candle-core",
......@@ -2765,8 +2721,8 @@ dependencies = [
[[package]]
name = "mistralrs-core"
version = "0.3.5"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232"
version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [
"akin",
"anyhow",
......@@ -2774,7 +2730,6 @@ dependencies = [
"async-trait",
"base64 0.22.1",
"bindgen_cuda 0.1.5",
"buildstructor",
"bytemuck",
"bytemuck_derive",
"candle-core",
......@@ -2798,6 +2753,7 @@ dependencies = [
"itertools 0.13.0",
"llguidance",
"lrtable",
"metal",
"minijinja",
"minijinja-contrib",
"mistralrs-paged-attn",
......@@ -2837,23 +2793,23 @@ dependencies = [
[[package]]
name = "mistralrs-paged-attn"
version = "0.3.5"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232"
version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [
"anyhow",
"bindgen_cuda 0.1.6",
"candle-core",
"float8",
"half",
"metal 0.27.0",
"metal",
"once_cell",
"thiserror 1.0.69",
]
[[package]]
name = "mistralrs-quant"
version = "0.3.5"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232"
version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [
"bindgen_cuda 0.1.5",
"byteorder",
......@@ -2862,34 +2818,29 @@ dependencies = [
"float8",
"half",
"lazy_static",
"memmap2",
"metal",
"once_cell",
"paste",
"rayon",
"regex",
"safetensors",
"serde",
"serde_json",
"thiserror 1.0.69",
"tracing",
"yoke",
]
[[package]]
name = "mistralrs-vision"
version = "0.3.5"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232"
version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [
"candle-core",
"image",
]
[[package]]
name = "mistralrs_cudarc_fork"
version = "0.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c82ee900a0e7080a2ff91042d070b0112a3533b32ebc6d8faaba96ee4f598e8d"
dependencies = [
"half",
"libloading",
]
[[package]]
name = "monostate"
version = "0.1.13"
......@@ -3130,30 +3081,6 @@ dependencies = [
"rand",
]
[[package]]
name = "num"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
dependencies = [
"num-bigint",
"num-complex",
"num-integer",
"num-iter",
"num-rational",
"num-traits",
]
[[package]]
name = "num-bigint"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
dependencies = [
"num-integer",
"num-traits",
]
[[package]]
name = "num-complex"
version = "0.4.6"
......@@ -3170,37 +3097,6 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num-integer"
version = "0.1.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
dependencies = [
"num-traits",
]
[[package]]
name = "num-iter"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-rational"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
dependencies = [
"num-bigint",
"num-integer",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.19"
......@@ -4497,16 +4393,6 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "str_inflector"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0b848d5a7695b33ad1be00f84a3c079fe85c9278a325ff9159e6c99cef4ef7"
dependencies = [
"lazy_static",
"regex",
]
[[package]]
name = "strsim"
version = "0.10.0"
......@@ -5267,26 +5153,6 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "try_match"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b065c869a3f832418e279aa4c1d7088f9d5d323bde15a60a08e20c2cd4549082"
dependencies = [
"try_match_inner",
]
[[package]]
name = "try_match_inner"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9c81686f7ab4065ccac3df7a910c4249f8c0f3fb70421d6ddec19b9311f63f9"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "tryhard"
version = "0.5.1"
......@@ -5304,48 +5170,6 @@ version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "ug"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4eef2ebfc18c67a6dbcacd9d8a4d85e0568cc58c82515552382312c2730ea13"
dependencies = [
"half",
"num",
"serde",
"serde_json",
"thiserror 1.0.69",
]
[[package]]
name = "ug-cuda"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c4dcab280ad0ef3957e153a82dcad608c954d02cf253b695322f502d1f8902e"
dependencies = [
"cudarc 0.12.1",
"half",
"serde",
"serde_json",
"thiserror 1.0.69",
"ug",
]
[[package]]
name = "ug-metal"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e4ed1df2c20a1a138f993041f650cc84ff27aaefb4342b7f986e77d00e80799"
dependencies = [
"half",
"metal 0.29.0",
"objc",
"serde",
"serde_json",
"thiserror 1.0.69",
"ug",
]
[[package]]
name = "uncased"
version = "0.9.10"
......
......@@ -502,21 +502,6 @@ dependencies = [
"serde",
]
[[package]]
name = "buildstructor"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3907aac66c65520545ae3cb3c195306e20d5ed5c90bfbb992e061cf12a104d0"
dependencies = [
"lazy_static",
"proc-macro2",
"quote",
"str_inflector",
"syn 2.0.98",
"thiserror 1.0.69",
"try_match",
]
[[package]]
name = "bumpalo"
version = "3.17.0"
......@@ -567,17 +552,17 @@ dependencies = [
[[package]]
name = "candle-core"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119"
source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [
"byteorder",
"candle-kernels",
"candle-metal-kernels",
"cudarc",
"float8",
"gemm",
"half",
"memmap2",
"metal 0.27.0",
"mistralrs_cudarc_fork",
"metal",
"num-traits",
"num_cpus",
"rand",
......@@ -585,9 +570,6 @@ dependencies = [
"rayon",
"safetensors",
"thiserror 1.0.69",
"ug",
"ug-cuda",
"ug-metal",
"yoke",
"zip",
]
......@@ -616,7 +598,7 @@ dependencies = [
[[package]]
name = "candle-kernels"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119"
source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [
"bindgen_cuda 0.1.5",
]
......@@ -624,9 +606,9 @@ dependencies = [
[[package]]
name = "candle-metal-kernels"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119"
source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [
"metal 0.27.0",
"metal",
"once_cell",
"thiserror 1.0.69",
"tracing",
......@@ -635,12 +617,12 @@ dependencies = [
[[package]]
name = "candle-nn"
version = "0.8.0"
source = "git+https://github.com/EricLBuehler/candle.git?rev=f524bc6#f524bc68a929e2404ea855f692bf30d98ff15119"
source = "git+https://github.com/EricLBuehler/candle.git?rev=fb5cc8c#fb5cc8c0175bc2999ed9fb75b13886aade453dbe"
dependencies = [
"candle-core",
"candle-metal-kernels",
"half",
"metal 0.27.0",
"metal",
"num-traits",
"rayon",
"safetensors",
......@@ -985,16 +967,6 @@ dependencies = [
"memchr",
]
[[package]]
name = "cudarc"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38cd60a9a42ec83a2ed7effb0b1f073270264ea99da7acfc44f7e8d74dee0384"
dependencies = [
"half",
"libloading",
]
[[package]]
name = "cudarc"
version = "0.13.4"
......@@ -1503,9 +1475,8 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a14d1c3d88fdab81b5886c34b2a424b3fba5123564ea415ff98b160cf44c432f"
dependencies = [
"cudarc 0.13.4",
"cudarc",
"half",
"mistralrs_cudarc_fork",
"num-traits",
"rand",
"rand_distr",
......@@ -2675,21 +2646,6 @@ dependencies = [
"paste",
]
[[package]]
name = "metal"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
dependencies = [
"bitflags 2.8.0",
"block",
"core-graphics-types",
"foreign-types 0.5.0",
"log",
"objc",
"paste",
]
[[package]]
name = "mime"
version = "0.3.17"
......@@ -2788,8 +2744,8 @@ dependencies = [
[[package]]
name = "mistralrs"
version = "0.3.5"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232"
version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [
"anyhow",
"candle-core",
......@@ -2809,8 +2765,8 @@ dependencies = [
[[package]]
name = "mistralrs-core"
version = "0.3.5"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232"
version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [
"akin",
"anyhow",
......@@ -2818,7 +2774,6 @@ dependencies = [
"async-trait",
"base64 0.22.1",
"bindgen_cuda 0.1.5",
"buildstructor",
"bytemuck",
"bytemuck_derive",
"candle-core",
......@@ -2842,6 +2797,7 @@ dependencies = [
"itertools 0.13.0",
"llguidance",
"lrtable",
"metal",
"minijinja",
"minijinja-contrib",
"mistralrs-paged-attn",
......@@ -2881,23 +2837,23 @@ dependencies = [
[[package]]
name = "mistralrs-paged-attn"
version = "0.3.5"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232"
version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [
"anyhow",
"bindgen_cuda 0.1.6",
"candle-core",
"float8",
"half",
"metal 0.27.0",
"metal",
"once_cell",
"thiserror 1.0.69",
]
[[package]]
name = "mistralrs-quant"
version = "0.3.5"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232"
version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [
"bindgen_cuda 0.1.5",
"byteorder",
......@@ -2906,34 +2862,29 @@ dependencies = [
"float8",
"half",
"lazy_static",
"memmap2",
"metal",
"once_cell",
"paste",
"rayon",
"regex",
"safetensors",
"serde",
"serde_json",
"thiserror 1.0.69",
"tracing",
"yoke",
]
[[package]]
name = "mistralrs-vision"
version = "0.3.5"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=c26e6c8#c26e6c85077186844da029bc2c889f6410539232"
version = "0.4.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=5e689c9#5e689c97653cdb1a631ca6fcd53ff447095bd8e5"
dependencies = [
"candle-core",
"image",
]
[[package]]
name = "mistralrs_cudarc_fork"
version = "0.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c82ee900a0e7080a2ff91042d070b0112a3533b32ebc6d8faaba96ee4f598e8d"
dependencies = [
"half",
"libloading",
]
[[package]]
name = "monostate"
version = "0.1.13"
......@@ -3099,30 +3050,6 @@ dependencies = [
"rand",
]
[[package]]
name = "num"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
dependencies = [
"num-bigint",
"num-complex",
"num-integer",
"num-iter",
"num-rational",
"num-traits",
]
[[package]]
name = "num-bigint"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
dependencies = [
"num-integer",
"num-traits",
]
[[package]]
name = "num-complex"
version = "0.4.6"
......@@ -3139,37 +3066,6 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num-integer"
version = "0.1.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
dependencies = [
"num-traits",
]
[[package]]
name = "num-iter"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-rational"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
dependencies = [
"num-bigint",
"num-integer",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.19"
......@@ -4602,16 +4498,6 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "str_inflector"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0b848d5a7695b33ad1be00f84a3c079fe85c9278a325ff9159e6c99cef4ef7"
dependencies = [
"lazy_static",
"regex",
]
[[package]]
name = "strsim"
version = "0.10.0"
......@@ -5366,26 +5252,6 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "try_match"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b065c869a3f832418e279aa4c1d7088f9d5d323bde15a60a08e20c2cd4549082"
dependencies = [
"try_match_inner",
]
[[package]]
name = "try_match_inner"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9c81686f7ab4065ccac3df7a910c4249f8c0f3fb70421d6ddec19b9311f63f9"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.98",
]
[[package]]
name = "tryhard"
version = "0.5.1"
......@@ -5409,48 +5275,6 @@ version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
[[package]]
name = "ug"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4eef2ebfc18c67a6dbcacd9d8a4d85e0568cc58c82515552382312c2730ea13"
dependencies = [
"half",
"num",
"serde",
"serde_json",
"thiserror 1.0.69",
]
[[package]]
name = "ug-cuda"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c4dcab280ad0ef3957e153a82dcad608c954d02cf253b695322f502d1f8902e"
dependencies = [
"cudarc 0.12.1",
"half",
"serde",
"serde_json",
"thiserror 1.0.69",
"ug",
]
[[package]]
name = "ug-metal"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e4ed1df2c20a1a138f993041f650cc84ff27aaefb4342b7f986e77d00e80799"
dependencies = [
"half",
"metal 0.29.0",
"objc",
"serde",
"serde_json",
"thiserror 1.0.69",
"ug",
]
[[package]]
name = "unarray"
version = "0.1.4"
......
......@@ -60,7 +60,7 @@ prometheus = { version = "0.13" }
# mistralrs
either = { version = "1.13" }
indexmap = { version = "2.6" }
mistralrs = { git = "https://github.com/EricLBuehler/mistral.rs.git", rev = "c26e6c8", optional = true }
mistralrs = { git = "https://github.com/EricLBuehler/mistral.rs.git", rev = "5e689c9", optional = true }
[dev-dependencies]
insta = { version = "1.41", features = ["glob", "json", "redactions"]}
......
......@@ -20,10 +20,10 @@ use async_trait::async_trait;
use either::Either;
use indexmap::IndexMap;
use mistralrs::{
Constraint, DefaultSchedulerMethod, Device, DeviceMapMetadata, GGUFLoaderBuilder,
GGUFSpecificConfig, MemoryGpuConfig, MistralRs, MistralRsBuilder, ModelDType, NormalRequest,
PagedAttentionConfig, Pipeline, Request, RequestMessage, ResponseOk, SamplingParams,
SchedulerConfig, TokenSource,
Constraint, DefaultSchedulerMethod, Device, DeviceMapMetadata, DeviceMapSetting,
GGUFLoaderBuilder, GGUFSpecificConfig, MemoryGpuConfig, MistralRs, MistralRsBuilder,
ModelDType, NormalRequest, PagedAttentionConfig, Pipeline, Request, RequestMessage, ResponseOk,
SamplingParams, SchedulerConfig, TokenSource,
};
use tokio::sync::mpsc::channel;
......@@ -85,7 +85,7 @@ impl MistralRsEngine {
model_dir.display().to_string(),
vec![model_filename.to_string_lossy().into_owned()],
GGUFSpecificConfig {
prompt_batchsize: None,
prompt_chunksize: None,
topology: None,
},
)
......@@ -108,7 +108,7 @@ impl MistralRsEngine {
&ModelDType::Auto,
&best_device()?,
false,
DeviceMapMetadata::dummy(),
DeviceMapSetting::Map(DeviceMapMetadata::dummy()),
None,
paged_attention_config,
)?;
......@@ -195,7 +195,7 @@ impl
response: tx,
return_logprobs: false,
is_streaming: true,
id: 0,
id: self.mistralrs.next_request_id(),
constraint: Constraint::None,
suffix: None,
adapters: None,
......@@ -219,7 +219,10 @@ impl
};
match response {
ResponseOk::Chunk(c) => {
let from_assistant = c.choices[0].delta.content.clone();
let Some(from_assistant) = c.choices[0].delta.content.clone() else {
tracing::warn!("No content from mistralrs. Abandoning request.");
break;
};
if let Some(tok) = maybe_tok.as_ref() {
used_output_tokens += tok
.encode(from_assistant.clone(), false)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment