"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "b9bcdc715808c2ec110a6e98e98e4fbe0681f8bf"
Unverified Commit 4ab47617 authored by Graham King's avatar Graham King Committed by GitHub
Browse files

chore(engines): Upgrade mistralrs to 0.6.0 (#1767)

parent 7a353e61
This diff is collapsed.
...@@ -76,7 +76,7 @@ tokio-util = { version = "0.7", features = ["codec", "net"] } ...@@ -76,7 +76,7 @@ tokio-util = { version = "0.7", features = ["codec", "net"] }
tracing = { version = "0.1" } tracing = { version = "0.1" }
tracing-subscriber = { version = "0.3", features = ["env-filter", "local-time", "json"] } tracing-subscriber = { version = "0.3", features = ["env-filter", "local-time", "json"] }
validator = { version = "0.20.0", features = ["derive"] } validator = { version = "0.20.0", features = ["derive"] }
uuid = { version = "1", features = ["v4", "serde"] } uuid = { version = "1.17", features = ["v4", "serde"] }
url = {version = "2.5", features = ["serde"]} url = {version = "2.5", features = ["serde"]}
xxhash-rust = { version = "0.8", features = ["xxh3", "const_xxh3"] } xxhash-rust = { version = "0.8", features = ["xxh3", "const_xxh3"] }
......
...@@ -30,7 +30,9 @@ allow = [ ...@@ -30,7 +30,9 @@ allow = [
"OpenSSL", "OpenSSL",
"Unicode-3.0", "Unicode-3.0",
"BSL-1.0", "BSL-1.0",
"MPL-2.0" "MPL-2.0",
"CDLA-Permissive-2.0",
"Zlib"
] ]
# TODO exceptions # TODO exceptions
......
...@@ -5133,12 +5133,14 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" ...@@ -5133,12 +5133,14 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]] [[package]]
name = "uuid" name = "uuid"
version = "1.16.0" version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d"
dependencies = [ dependencies = [
"getrandom 0.3.2", "getrandom 0.3.2",
"js-sys",
"serde", "serde",
"wasm-bindgen",
] ]
[[package]] [[package]]
......
...@@ -26,7 +26,7 @@ keywords.workspace = true ...@@ -26,7 +26,7 @@ keywords.workspace = true
[features] [features]
default = [] default = []
cuda = ["mistralrs/cuda", "candle-core/cuda"] cuda = ["mistralrs/cuda"]
metal = ["mistralrs/metal"] metal = ["mistralrs/metal"]
[dependencies] [dependencies]
...@@ -37,10 +37,9 @@ anyhow = { workspace = true } ...@@ -37,10 +37,9 @@ anyhow = { workspace = true }
async-openai = { workspace = true } async-openai = { workspace = true }
async-stream = { workspace = true } async-stream = { workspace = true }
async-trait = { workspace = true } async-trait = { workspace = true }
candle-core = { version = "0.8.0" }
either = { workspace = true } either = { workspace = true }
indexmap = { version = "2.6" } indexmap = { version = "2.9.0", features = ["serde"] }
mistralrs = { git = "https://github.com/EricLBuehler/mistral.rs.git", rev = "ebd50e35e" } mistralrs = { git = "https://github.com/EricLBuehler/mistral.rs.git", version = "0.6.0" }
serde_json = { workspace = true } serde_json = { workspace = true }
tokio = { workspace = true } tokio = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }
...@@ -13,8 +13,8 @@ use mistralrs::{ ...@@ -13,8 +13,8 @@ use mistralrs::{
AutoDeviceMapParams, Constraint, DefaultSchedulerMethod, Device, DeviceMapSetting, AutoDeviceMapParams, Constraint, DefaultSchedulerMethod, Device, DeviceMapSetting,
GGUFLoaderBuilder, GGUFSpecificConfig, IsqType, MemoryGpuConfig, MistralRs, MistralRsBuilder, GGUFLoaderBuilder, GGUFSpecificConfig, IsqType, MemoryGpuConfig, MistralRs, MistralRsBuilder,
ModelDType, NormalLoaderBuilder, NormalRequest, NormalSpecificConfig, PagedAttentionConfig, ModelDType, NormalLoaderBuilder, NormalRequest, NormalSpecificConfig, PagedAttentionConfig,
Request, RequestMessage, ResponseOk, SamplingParams, SchedulerConfig, StopTokens, TokenSource, PagedCacheType, Request, RequestMessage, ResponseOk, SamplingParams, SchedulerConfig,
VisionLoaderBuilder, VisionLoaderType, VisionSpecificConfig, StopTokens, TokenSource, VisionLoaderBuilder, VisionLoaderType, VisionSpecificConfig,
}; };
use tokio::sync::mpsc::channel; use tokio::sync::mpsc::channel;
...@@ -66,6 +66,7 @@ fn best_device() -> pipeline_error::Result<Device> { ...@@ -66,6 +66,7 @@ fn best_device() -> pipeline_error::Result<Device> {
struct MistralRsEngine { struct MistralRsEngine {
mistralrs: Arc<MistralRs>, mistralrs: Arc<MistralRs>,
context_length: usize, context_length: usize,
display_name: String,
} }
impl MistralRsEngine { impl MistralRsEngine {
...@@ -114,7 +115,7 @@ impl MistralRsEngine { ...@@ -114,7 +115,7 @@ impl MistralRsEngine {
Some(model_path.display().to_string()), Some(model_path.display().to_string()),
jinja_explicit, jinja_explicit,
) )
.build(vlt) .build(Some(vlt))
} else { } else {
// Load from a HF repo dir // Load from a HF repo dir
NormalLoaderBuilder::new( NormalLoaderBuilder::new(
...@@ -140,6 +141,7 @@ impl MistralRsEngine { ...@@ -140,6 +141,7 @@ impl MistralRsEngine {
None, // Block size, default 32 None, // Block size, default 32
4096, // CPU memory in MiB 4096, // CPU memory in MiB
MemoryGpuConfig::ContextSize(max_seq_len), MemoryGpuConfig::ContextSize(max_seq_len),
PagedCacheType::Auto,
)?) )?)
} else { } else {
None None
...@@ -203,8 +205,9 @@ impl MistralRsEngine { ...@@ -203,8 +205,9 @@ impl MistralRsEngine {
) )
.with_prefix_cache_n(16); .with_prefix_cache_n(16);
let engine = MistralRsEngine { let engine = MistralRsEngine {
mistralrs: builder.build(), mistralrs: builder.build().await,
context_length: max_seq_len, context_length: max_seq_len,
display_name: display_name.to_string(),
}; };
// skip the id used for dummy run https://github.com/EricLBuehler/mistral.rs/issues/1218 // skip the id used for dummy run https://github.com/EricLBuehler/mistral.rs/issues/1218
...@@ -213,8 +216,9 @@ impl MistralRsEngine { ...@@ -213,8 +216,9 @@ impl MistralRsEngine {
// Perform warmup request // Perform warmup request
let (tx, mut rx) = channel(1); let (tx, mut rx) = channel(1);
let request_id = engine.mistralrs.next_request_id(); let request_id = engine.mistralrs.next_request_id();
let warmup_request = Request::Normal(NormalRequest { let warmup_request = Request::Normal(Box::new(NormalRequest {
id: request_id, id: request_id,
model_id: Some(display_name.to_string()),
messages: RequestMessage::Chat { messages: RequestMessage::Chat {
messages: vec![IndexMap::from([ messages: vec![IndexMap::from([
("role".to_string(), Either::Left("user".to_string())), ("role".to_string(), Either::Left("user".to_string())),
...@@ -236,10 +240,10 @@ impl MistralRsEngine { ...@@ -236,10 +240,10 @@ impl MistralRsEngine {
logits_processors: None, logits_processors: None,
return_raw_logits: false, return_raw_logits: false,
web_search_options: None, web_search_options: None,
}); }));
// Send warmup request and consume response // Send warmup request and consume response
if let Ok(sender) = engine.mistralrs.get_sender() { if let Ok(sender) = engine.mistralrs.get_sender(None) {
if let Ok(()) = sender.send(warmup_request).await { if let Ok(()) = sender.send(warmup_request).await {
if let Some(response) = rx.recv().await { if let Some(response) = rx.recv().await {
match response.as_result() { match response.as_result() {
...@@ -339,8 +343,9 @@ impl ...@@ -339,8 +343,9 @@ impl
dry_params: det.dry_params, dry_params: det.dry_params,
}; };
let request_id = self.mistralrs.next_request_id(); let request_id = self.mistralrs.next_request_id();
let mistralrs_request = Request::Normal(NormalRequest { let mistralrs_request = Request::Normal(Box::new(NormalRequest {
id: request_id, id: request_id,
model_id: Some(self.display_name.clone()),
messages: RequestMessage::Chat { messages: RequestMessage::Chat {
messages, messages,
enable_thinking: None, enable_thinking: None,
...@@ -356,9 +361,12 @@ impl ...@@ -356,9 +361,12 @@ impl
logits_processors: None, logits_processors: None,
return_raw_logits: false, return_raw_logits: false,
web_search_options: None, web_search_options: None,
}); }));
self.mistralrs.get_sender()?.send(mistralrs_request).await?; self.mistralrs
.get_sender(None)?
.send(mistralrs_request)
.await?;
let output = stream! { let output = stream! {
while let Some(response) = rx.recv().await { while let Some(response) = rx.recv().await {
...@@ -536,8 +544,9 @@ impl ...@@ -536,8 +544,9 @@ impl
}; };
let request_id = self.mistralrs.next_request_id(); let request_id = self.mistralrs.next_request_id();
let mistralrs_request = Request::Normal(NormalRequest { let mistralrs_request = Request::Normal(Box::new(NormalRequest {
id: request_id, id: request_id,
model_id: Some(self.display_name.clone()),
messages, messages,
sampling_params, sampling_params,
response: tx, response: tx,
...@@ -550,9 +559,12 @@ impl ...@@ -550,9 +559,12 @@ impl
logits_processors: None, logits_processors: None,
return_raw_logits: false, return_raw_logits: false,
web_search_options: None, web_search_options: None,
}); }));
self.mistralrs.get_sender()?.send(mistralrs_request).await?; self.mistralrs
.get_sender(None)?
.send(mistralrs_request)
.await?;
let output = stream! { let output = stream! {
while let Some(response) = rx.recv().await { while let Some(response) = rx.recv().await {
......
...@@ -2951,12 +2951,14 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" ...@@ -2951,12 +2951,14 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]] [[package]]
name = "uuid" name = "uuid"
version = "1.16.0" version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d"
dependencies = [ dependencies = [
"getrandom 0.3.2", "getrandom 0.3.2",
"js-sys",
"serde", "serde",
"wasm-bindgen",
] ]
[[package]] [[package]]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment