"git@developer.sourcefind.cn:OpenDAS/Uni-Core.git" did not exist on "3e9c05a6d3df81fdb629375534a7bf068facbe2c"
Unverified Commit 4761baa6 authored by Abrar Shivani's avatar Abrar Shivani Committed by GitHub
Browse files

feat: Warm‑up mistral.rs engine to reduce latency on subsequent requests (#796)

Send a warm‑up request to the mistralrs engine so that subsequent requests are faster.
parent aae0d405
...@@ -165,8 +165,48 @@ impl MistralRsEngine { ...@@ -165,8 +165,48 @@ impl MistralRsEngine {
let engine = MistralRsEngine { let engine = MistralRsEngine {
mistralrs: builder.build(), mistralrs: builder.build(),
}; };
// skip the id used for dummy run https://github.com/EricLBuehler/mistral.rs/issues/1218 // skip the id used for dummy run https://github.com/EricLBuehler/mistral.rs/issues/1218
let _ = engine.mistralrs.next_request_id(); let _ = engine.mistralrs.next_request_id();
// Perform warmup request
let (tx, mut rx) = channel(1);
let request_id = engine.mistralrs.next_request_id();
let warmup_request = Request::Normal(NormalRequest {
id: request_id,
messages: RequestMessage::Chat(vec![IndexMap::from([
("role".to_string(), Either::Left("user".to_string())),
("content".to_string(), Either::Left("test".to_string())),
])]),
sampling_params: SamplingParams::deterministic(),
response: tx,
return_logprobs: false,
is_streaming: false,
constraint: Constraint::None,
suffix: None,
adapters: None,
tools: None,
tool_choice: None,
logits_processors: None,
return_raw_logits: false,
});
// Send warmup request and consume response
if let Ok(sender) = engine.mistralrs.get_sender() {
if let Ok(()) = sender.send(warmup_request).await {
if let Some(response) = rx.recv().await {
match response.as_result() {
Ok(r) => {
tracing::debug!(request_id, "Warmup response: {r:?}");
}
Err(err) => {
tracing::error!(request_id, %err, "Failed converting response to result.");
}
}
}
}
}
Ok(engine) Ok(engine)
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment