Unverified Commit 07207a1b authored by Graham King's avatar Graham King Committed by GitHub
Browse files

chore: Upgrade mistralrs (#3490)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent 3aeae151
......@@ -905,12 +905,12 @@ dependencies = [
[[package]]
name = "candle-core"
version = "0.9.1"
source = "git+https://github.com/EricLBuehler/candle.git?rev=95d713f9#95d713f996225a3643b1d0c3eeb5a35c40516625"
source = "git+https://github.com/EricLBuehler/candle.git?rev=7511e510#7511e510054973ea4e2043f6d2da14409c195baf"
dependencies = [
"byteorder",
"candle-kernels",
"candle-metal-kernels",
"cudarc 0.16.6",
"cudarc 0.17.3",
"float8",
"gemm 0.17.1",
"half 2.6.0",
......@@ -933,7 +933,7 @@ dependencies = [
[[package]]
name = "candle-kernels"
version = "0.9.1"
source = "git+https://github.com/EricLBuehler/candle.git?rev=95d713f9#95d713f996225a3643b1d0c3eeb5a35c40516625"
source = "git+https://github.com/EricLBuehler/candle.git?rev=7511e510#7511e510054973ea4e2043f6d2da14409c195baf"
dependencies = [
"bindgen_cuda 0.1.5",
]
......@@ -941,7 +941,7 @@ dependencies = [
[[package]]
name = "candle-metal-kernels"
version = "0.9.1"
source = "git+https://github.com/EricLBuehler/candle.git?rev=95d713f9#95d713f996225a3643b1d0c3eeb5a35c40516625"
source = "git+https://github.com/EricLBuehler/candle.git?rev=7511e510#7511e510054973ea4e2043f6d2da14409c195baf"
dependencies = [
"half 2.6.0",
"metal 0.27.0",
......@@ -953,9 +953,9 @@ dependencies = [
[[package]]
name = "candle-nn"
version = "0.9.1"
source = "git+https://github.com/EricLBuehler/candle.git?rev=95d713f9#95d713f996225a3643b1d0c3eeb5a35c40516625"
source = "git+https://github.com/EricLBuehler/candle.git?rev=7511e510#7511e510054973ea4e2043f6d2da14409c195baf"
dependencies = [
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=95d713f9)",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=7511e510)",
"candle-metal-kernels",
"half 2.6.0",
"metal 0.27.0",
......@@ -1611,6 +1611,7 @@ version = "0.17.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72ba848ae5c6f3cb36e71eab5f268763e3fabcabe3f7bc683e16f7fa3d46281e"
dependencies = [
"half 2.6.0",
"libloading",
]
......@@ -2703,11 +2704,11 @@ dependencies = [
[[package]]
name = "float8"
version = "0.3.0"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f498aec3b227cd892ce18967f4033d9d397d28a80a7ab67e9f6b0176a79654e"
checksum = "4203231de188ebbdfb85c11f3c20ca2b063945710de04e7b59268731e728b462"
dependencies = [
"cudarc 0.16.6",
"cudarc 0.17.3",
"half 2.6.0",
"num-traits",
]
......@@ -4737,10 +4738,10 @@ dependencies = [
[[package]]
name = "mistralrs"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=53ebc74#53ebc7480cd08b124bc99ca9f6365af6152c6fa7"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=a13220255#a132202551f4fdf3a1d9e2da51232581789af7ea"
dependencies = [
"anyhow",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=95d713f9)",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=7511e510)",
"candle-nn",
"clap 4.5.48",
"either",
......@@ -4759,7 +4760,7 @@ dependencies = [
[[package]]
name = "mistralrs-audio"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=53ebc74#53ebc7480cd08b124bc99ca9f6365af6152c6fa7"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=a13220255#a132202551f4fdf3a1d9e2da51232581789af7ea"
dependencies = [
"anyhow",
"apodize",
......@@ -4770,7 +4771,7 @@ dependencies = [
[[package]]
name = "mistralrs-core"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=53ebc74#53ebc7480cd08b124bc99ca9f6365af6152c6fa7"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=a13220255#a132202551f4fdf3a1d9e2da51232581789af7ea"
dependencies = [
"ahash",
"akin",
......@@ -4783,7 +4784,7 @@ dependencies = [
"bm25",
"bytemuck",
"bytemuck_derive",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=95d713f9)",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=7511e510)",
"candle-nn",
"cfgrammar",
"chrono",
......@@ -4866,7 +4867,7 @@ dependencies = [
[[package]]
name = "mistralrs-mcp"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=53ebc74#53ebc7480cd08b124bc99ca9f6365af6152c6fa7"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=a13220255#a132202551f4fdf3a1d9e2da51232581789af7ea"
dependencies = [
"anyhow",
"async-trait",
......@@ -4886,11 +4887,11 @@ dependencies = [
[[package]]
name = "mistralrs-paged-attn"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=53ebc74#53ebc7480cd08b124bc99ca9f6365af6152c6fa7"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=a13220255#a132202551f4fdf3a1d9e2da51232581789af7ea"
dependencies = [
"anyhow",
"bindgen_cuda 0.1.7",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=95d713f9)",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=7511e510)",
"float8",
"half 2.6.0",
"metal 0.27.0",
......@@ -4901,11 +4902,11 @@ dependencies = [
[[package]]
name = "mistralrs-quant"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=53ebc74#53ebc7480cd08b124bc99ca9f6365af6152c6fa7"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=a13220255#a132202551f4fdf3a1d9e2da51232581789af7ea"
dependencies = [
"bindgen_cuda 0.1.7",
"byteorder",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=95d713f9)",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=7511e510)",
"candle-nn",
"float8",
"half 2.6.0",
......@@ -4929,9 +4930,9 @@ dependencies = [
[[package]]
name = "mistralrs-vision"
version = "0.6.0"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=53ebc74#53ebc7480cd08b124bc99ca9f6365af6152c6fa7"
source = "git+https://github.com/EricLBuehler/mistral.rs.git?rev=a13220255#a132202551f4fdf3a1d9e2da51232581789af7ea"
dependencies = [
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=95d713f9)",
"candle-core 0.9.1 (git+https://github.com/EricLBuehler/candle.git?rev=7511e510)",
"image",
"rayon",
]
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[package]
name = "dynamo-engine-mistralrs"
......@@ -39,7 +27,7 @@ async-stream = { workspace = true }
async-trait = { workspace = true }
either = { workspace = true }
indexmap = { version = "2.9.0", features = ["serde"] }
mistralrs = { git = "https://github.com/EricLBuehler/mistral.rs.git", version = "0.6.0", rev = "53ebc74" }
mistralrs = { git = "https://github.com/EricLBuehler/mistral.rs.git", version = "0.6.0", rev = "a13220255" }
serde_json = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
......@@ -312,6 +312,7 @@ impl
.unwrap_or(det.top_n_logprobs),
frequency_penalty: request.inner.frequency_penalty.or(det.frequency_penalty),
presence_penalty: request.inner.presence_penalty.or(det.presence_penalty),
repetition_penalty: det.repetition_penalty,
stop_toks: request.inner.stop.map(to_stop_tokens).or(det.stop_toks),
max_len: {
let requested_max_tokens = request
......@@ -510,6 +511,7 @@ impl
.unwrap_or(det.top_n_logprobs),
frequency_penalty: request.inner.frequency_penalty.or(det.frequency_penalty),
presence_penalty: request.inner.presence_penalty.or(det.presence_penalty),
repetition_penalty: det.repetition_penalty,
stop_toks: request
.inner
.stop
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment