Unverified Commit bce74588 authored by Graham King's avatar Graham King Committed by GitHub
Browse files

chore: Rust to 1.89 and edition 2024 (#2659)

parent 268d017e
......@@ -589,7 +589,7 @@ impl<S: Storage, L: LocalityProvider + 'static, M: BlockMetadata> ProgressEngine
#[cfg(test)]
mod tests {
use crate::block_manager::block::{BasicMetadata, Blocks};
use crate::block_manager::layout::{tests::setup_layout, FullyContiguous, LayoutConfig};
use crate::block_manager::layout::{FullyContiguous, LayoutConfig, tests::setup_layout};
use crate::block_manager::locality::Local;
use crate::tokens::{TokenBlockSequence, Tokens};
......
......@@ -51,10 +51,10 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> ActiveBlockPool<S, L, M>
// Set the parent of the block if it has one.
// This is needed to ensure the lifetime of the parent is at least as long as the child.
if let Ok(Some(parent)) = block.parent_sequence_hash() {
if let Some(parent_block) = self.match_sequence_hash(parent) {
block.set_parent(parent_block.mutable_block().clone());
}
if let Ok(Some(parent)) = block.parent_sequence_hash()
&& let Some(parent_block) = self.match_sequence_hash(parent)
{
block.set_parent(parent_block.mutable_block().clone());
}
let shared = Arc::new(block);
......@@ -78,14 +78,14 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> ActiveBlockPool<S, L, M>
}
pub fn remove(&mut self, block: &mut Block<S, L, M>) {
if let Ok(sequence_hash) = block.sequence_hash() {
if let Some(weak) = self.map.get(&sequence_hash) {
if let Some(_arc) = weak.upgrade() {
block.reset();
return;
}
self.map.remove(&sequence_hash);
if let Ok(sequence_hash) = block.sequence_hash()
&& let Some(weak) = self.map.get(&sequence_hash)
{
if let Some(_arc) = weak.upgrade() {
block.reset();
return;
}
self.map.remove(&sequence_hash);
}
}
......
......@@ -15,7 +15,7 @@
use std::sync::atomic::AtomicU64;
use crate::block_manager::block::{locality::LocalityProvider, BlockState};
use crate::block_manager::block::{BlockState, locality::LocalityProvider};
use super::*;
use priority_key::PriorityKey;
......@@ -113,7 +113,9 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> InactiveBlockPool<S, L,
fn insert_with_sequence_hash(&mut self, block: Block<S, L, M>, sequence_hash: SequenceHash) {
let priority_key = PriorityKey::new(block.metadata().clone(), sequence_hash);
if self.priority_set.contains(&priority_key) {
tracing::trace!("multiple entries with the same sequence hash, resetting block and inserting into uninitialized set");
tracing::trace!(
"multiple entries with the same sequence hash, resetting block and inserting into uninitialized set"
);
let mut block = block;
block.reset();
self.uninitialized_set.push_back(block);
......@@ -546,8 +548,8 @@ pub(crate) mod tests {
use crate::{
block_manager::{
block::{
locality::Local, registry::BlockRegistry, state::CompleteState, Blocks,
PrivateBlockExt,
Blocks, PrivateBlockExt, locality::Local, registry::BlockRegistry,
state::CompleteState,
},
events::NullEventManager,
layout::{BlockLayout, FullyContiguous, LayoutConfigBuilder},
......
......@@ -14,7 +14,7 @@
// limitations under the License.
use crate::block_manager::{
block::{registry::BlockRegistrationError, BlockState, PrivateBlockExt},
block::{BlockState, PrivateBlockExt, registry::BlockRegistrationError},
events::Publisher,
};
......@@ -266,18 +266,16 @@ impl<S: Storage, L: LocalityProvider + 'static, M: BlockMetadata> State<S, L, M>
}
}
BlockRegistrationDuplicationSetting::Disabled => {
if let Some(block) = duplicate {
if let Some(raw_blocks) = block.try_take_block(private::PrivateToken) {
self.inactive.return_blocks(raw_blocks);
}
if let Some(block) = duplicate
&& let Some(raw_blocks) = block.try_take_block(private::PrivateToken)
{
self.inactive.return_blocks(raw_blocks);
}
}
}
if offload {
if let Some(priority) = immutable.metadata().offload_priority() {
immutable.enqueue_offload(priority).await.unwrap();
}
if offload && let Some(priority) = immutable.metadata().offload_priority() {
immutable.enqueue_offload(priority).await.unwrap();
}
immutable_blocks.push(immutable);
......
......@@ -17,7 +17,7 @@ mod local;
mod logical;
mod resources;
use crate::block_manager::block::{factory::IntoBlocks, MutableBlock};
use crate::block_manager::block::{MutableBlock, factory::IntoBlocks};
use crate::block_manager::locality::LogicalResources;
use crate::block_manager::offload::request::BlockResult;
......@@ -26,8 +26,8 @@ use super::*;
// use super::offload::OffloadManager;
use super::{
block::{
factory::LocalBlockDataFactory, locality::LocalityProvider, Block, GlobalRegistry,
ImmutableBlock,
Block, GlobalRegistry, ImmutableBlock, factory::LocalBlockDataFactory,
locality::LocalityProvider,
},
config::NixlOptions,
events::{EventManager, NullEventManager},
......
......@@ -88,7 +88,7 @@ pub use disk::*;
use torch::*;
use std::{
alloc::{alloc_zeroed, dealloc, Layout},
alloc::{Layout, alloc_zeroed, dealloc},
collections::HashMap,
fmt::Debug,
ptr::NonNull,
......@@ -322,7 +322,10 @@ impl std::fmt::Debug for RegistrationHandles {
impl Drop for RegistrationHandles {
fn drop(&mut self) {
if !self.handles.is_empty() {
panic!("RegistrationHandles dropped with {} handles remaining; RegistrationHandles::release() needs to be explicitly called", self.handles.len());
panic!(
"RegistrationHandles dropped with {} handles remaining; RegistrationHandles::release() needs to be explicitly called",
self.handles.len()
);
}
}
}
......
......@@ -207,7 +207,7 @@ mod nixl {
S: MemoryRegion,
{
unsafe fn as_ptr(&self) -> *const u8 {
Storage::as_ptr(self.storage.as_ref())
unsafe { Storage::as_ptr(self.storage.as_ref()) }
}
fn size(&self) -> usize {
......
......@@ -86,7 +86,7 @@ use std::{
sync::{Arc, Mutex, OnceLock},
};
use cudarc::driver::{sys, CudaContext};
use cudarc::driver::{CudaContext, sys};
/// Trait for [Storage] types that can be accessed by CUDA
pub trait CudaAccessible: Storage {}
......
......@@ -16,7 +16,7 @@
use super::*;
use core::ffi::c_char;
use nix::fcntl::{fallocate, FallocateFlags};
use nix::fcntl::{FallocateFlags, fallocate};
use nix::unistd::unlink;
use std::ffi::CStr;
use std::ffi::CString;
......
......@@ -342,7 +342,7 @@ impl NixlRegisterableStorage for PinnedStorage {}
impl MemoryRegion for PinnedStorage {
unsafe fn as_ptr(&self) -> *const u8 {
Storage::as_ptr(self)
unsafe { Storage::as_ptr(self) }
}
fn size(&self) -> usize {
......@@ -367,7 +367,7 @@ impl NixlRegisterableStorage for DeviceStorage {}
impl MemoryRegion for DeviceStorage {
unsafe fn as_ptr(&self) -> *const u8 {
Storage::as_ptr(self)
unsafe { Storage::as_ptr(self) }
}
fn size(&self) -> usize {
......@@ -406,7 +406,7 @@ impl NixlRegisterableStorage for DiskStorage {
impl MemoryRegion for DiskStorage {
unsafe fn as_ptr(&self) -> *const u8 {
Storage::as_ptr(self)
unsafe { Storage::as_ptr(self) }
}
fn size(&self) -> usize {
......
......@@ -17,8 +17,8 @@
//! them within Dynamo.
use cudarc::driver::{
sys::{cuCtxPopCurrent_v2, cuCtxPushCurrent_v2, cudaError_enum, CUcontext, CUstream},
CudaContext, CudaStream,
sys::{CUcontext, CUstream, cuCtxPopCurrent_v2, cuCtxPushCurrent_v2, cudaError_enum},
};
use std::pin::Pin;
use std::{marker::PhantomData, sync::Arc};
......
......@@ -18,8 +18,8 @@ use std::sync::{Arc, Mutex};
use tokio::sync::watch;
use tracing;
use dynamo_runtime::transports::etcd::WatchEvent;
use dynamo_runtime::DistributedRuntime;
use dynamo_runtime::transports::etcd::WatchEvent;
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct DisaggRouterConf {
......@@ -218,23 +218,23 @@ impl DisaggregatedRouter {
}
pub fn check_for_updates(&self) {
if let Some(watcher) = &self.config_watcher {
if watcher.has_changed().unwrap_or(false) {
let config = watcher.borrow().clone();
let new_value = config.max_local_prefill_length;
// Update the value using the mutex
let mut current_value = self.max_local_prefill_length.lock().unwrap();
let old_value = *current_value;
if old_value != new_value {
*current_value = new_value;
tracing::info!(
"Applied config update for model {}: max_local_prefill_length changed from {} to {}",
self.model_name,
old_value,
new_value
);
}
if let Some(watcher) = &self.config_watcher
&& watcher.has_changed().unwrap_or(false)
{
let config = watcher.borrow().clone();
let new_value = config.max_local_prefill_length;
// Update the value using the mutex
let mut current_value = self.max_local_prefill_length.lock().unwrap();
let old_value = *current_value;
if old_value != new_value {
*current_value = new_value;
tracing::info!(
"Applied config update for model {}: max_local_prefill_length changed from {} to {}",
self.model_name,
old_value,
new_value
);
}
}
}
......
......@@ -7,7 +7,7 @@ use dynamo_runtime::slug::Slug;
use crate::discovery::ModelEntry;
use crate::kv_router::{scheduler::DefaultWorkerSelector, KvRouterConfig};
use crate::kv_router::{KvRouterConfig, scheduler::DefaultWorkerSelector};
use crate::{
kv_router::KvRouter,
types::openai::{
......
......@@ -5,16 +5,16 @@ use std::sync::Arc;
use tokio::sync::mpsc::Sender;
use anyhow::Context as _;
use tokio::sync::{mpsc::Receiver, Notify};
use tokio::sync::{Notify, mpsc::Receiver};
use dynamo_runtime::{
DistributedRuntime,
pipeline::{
network::egress::push_router::PushRouter, ManyOut, Operator, RouterMode, SegmentSource,
ServiceBackend, SingleIn, Source,
ManyOut, Operator, RouterMode, SegmentSource, ServiceBackend, SingleIn, Source,
network::egress::push_router::PushRouter,
},
protocols::annotated::Annotated,
transports::etcd::{KeyValue, WatchEvent},
DistributedRuntime,
};
use crate::{
......@@ -35,7 +35,7 @@ use crate::{
},
};
use super::{ModelEntry, ModelManager, MODEL_ROOT_PATH};
use super::{MODEL_ROOT_PATH, ModelEntry, ModelManager};
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ModelUpdate {
......@@ -213,10 +213,8 @@ impl ModelWatcher {
);
update_tx = false;
}
if update_tx {
if let Some(tx) = &self.model_update_tx {
tx.send(ModelUpdate::Removed(model_type)).await.ok();
}
if update_tx && let Some(tx) = &self.model_update_tx {
tx.send(ModelUpdate::Removed(model_type)).await.ok();
}
return Ok(None);
}
......@@ -251,13 +249,12 @@ impl ModelWatcher {
);
} else {
for model_type in ALL_MODEL_TYPES {
if (chat_model_removed && *model_type == ModelType::Chat)
if ((chat_model_removed && *model_type == ModelType::Chat)
|| (completions_model_removed && *model_type == ModelType::Completion)
|| (embeddings_model_removed && *model_type == ModelType::Embedding)
|| (embeddings_model_removed && *model_type == ModelType::Embedding))
&& let Some(tx) = &self.model_update_tx
{
if let Some(tx) = &self.model_update_tx {
tx.send(ModelUpdate::Removed(*model_type)).await.ok();
}
tx.send(ModelUpdate::Removed(*model_type)).await.ok();
}
}
}
......
......@@ -18,7 +18,7 @@ use crate::preprocessor::PreprocessedRequest;
use crate::protocols::common::llm_backend::LLMEngineOutput;
use crate::protocols::openai::{
chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse},
completions::{prompt_to_string, NvCreateCompletionRequest, NvCreateCompletionResponse},
completions::{NvCreateCompletionRequest, NvCreateCompletionResponse, prompt_to_string},
};
use crate::types::openai::embeddings::NvCreateEmbeddingRequest;
use crate::types::openai::embeddings::NvCreateEmbeddingResponse;
......
......@@ -8,18 +8,18 @@ use crate::types::openai::chat_completions::{
};
use anyhow::Context as _;
use dynamo_async_openai::types::FinishReason;
use dynamo_runtime::{pipeline::Context, runtime::CancellationToken, Runtime};
use dynamo_runtime::{Runtime, pipeline::Context, runtime::CancellationToken};
use futures::StreamExt;
use serde::{Deserialize, Serialize};
use std::cmp;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{Duration, Instant};
use tokio::io::{AsyncBufReadExt, AsyncWriteExt};
use crate::entrypoint::input::common;
use crate::entrypoint::EngineConfig;
use crate::entrypoint::input::common;
/// Max tokens in each response.
/// TODO: For batch mode this should be the full context size of the model
......
......@@ -5,7 +5,7 @@ use std::pin::Pin;
use crate::{
backend::{Backend, ExecutionContext},
discovery::{ModelManager, ModelWatcher, MODEL_ROOT_PATH},
discovery::{MODEL_ROOT_PATH, ModelManager, ModelWatcher},
engines::StreamingEngineAdapter,
entrypoint::{self, EngineConfig},
kv_router::{KvPushRouter, KvRouter},
......@@ -15,15 +15,16 @@ use crate::{
protocols::common::llm_backend::{BackendOutput, LLMEngineOutput, PreprocessedRequest},
request_template::RequestTemplate,
types::{
Annotated,
openai::chat_completions::{
NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
OpenAIChatCompletionsStreamingEngine,
},
Annotated,
},
};
use dynamo_runtime::{
DistributedRuntime, Runtime,
component::Client,
distributed::DistributedConfig,
engine::{AsyncEngineStream, Data},
......@@ -31,7 +32,6 @@ use dynamo_runtime::{
Context, ManyOut, Operator, PushRouter, RouterMode, SegmentSource, ServiceBackend,
ServiceEngine, ServiceFrontend, SingleIn, Source,
},
DistributedRuntime, Runtime,
};
use std::sync::Arc;
......@@ -191,11 +191,11 @@ where
Req: Data,
Resp: Data,
OpenAIPreprocessor: Operator<
Context<Req>,
Pin<Box<dyn AsyncEngineStream<Annotated<Resp>>>>,
Context<PreprocessedRequest>,
Pin<Box<dyn AsyncEngineStream<Annotated<BackendOutput>>>>,
>,
Context<Req>,
Pin<Box<dyn AsyncEngineStream<Annotated<Resp>>>>,
Context<PreprocessedRequest>,
Pin<Box<dyn AsyncEngineStream<Annotated<BackendOutput>>>>,
>,
{
let frontend = ServiceFrontend::<SingleIn<Req>, ManyOut<Annotated<Resp>>>::new();
let preprocessor = OpenAIPreprocessor::new((*card).clone())
......@@ -224,11 +224,11 @@ where
Req: Data,
Resp: Data,
OpenAIPreprocessor: Operator<
Context<Req>,
Pin<Box<dyn AsyncEngineStream<Annotated<Resp>>>>,
Context<PreprocessedRequest>,
Pin<Box<dyn AsyncEngineStream<Annotated<BackendOutput>>>>,
>,
Context<Req>,
Pin<Box<dyn AsyncEngineStream<Annotated<Resp>>>>,
Context<PreprocessedRequest>,
Pin<Box<dyn AsyncEngineStream<Annotated<BackendOutput>>>>,
>,
{
let frontend = SegmentSource::<SingleIn<Req>, ManyOut<Annotated<Resp>>>::new();
let preprocessor = OpenAIPreprocessor::new(card.clone()).await?.into_operator();
......
......@@ -9,18 +9,18 @@ use crate::{
model_type::ModelType,
preprocessor::{BackendOutput, PreprocessedRequest},
types::{
Annotated,
openai::chat_completions::{
NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
},
Annotated,
},
};
use dynamo_runtime::engine::AsyncEngineStream;
use dynamo_runtime::pipeline::{
network::Ingress, Context, ManyOut, Operator, SegmentSource, ServiceBackend, SingleIn, Source,
Context, ManyOut, Operator, SegmentSource, ServiceBackend, SingleIn, Source, network::Ingress,
};
use dynamo_runtime::{protocols::EndpointId, DistributedRuntime};
use dynamo_runtime::{DistributedRuntime, protocols::EndpointId};
use crate::entrypoint::EngineConfig;
......@@ -125,13 +125,12 @@ pub async fn run(
result?;
// Cleanup on shutdown
if let Some(mut card) = card {
if let Err(err) = card
if let Some(mut card) = card
&& let Err(err) = card
.delete_from_nats(distributed_runtime.nats_client())
.await
{
tracing::error!(%err, "delete_from_nats error on shutdown");
}
{
tracing::error!(%err, "delete_from_nats error on shutdown");
}
Ok(())
......
......@@ -4,10 +4,10 @@
use std::sync::Arc;
use crate::{
discovery::{ModelManager, ModelUpdate, ModelWatcher, MODEL_ROOT_PATH},
discovery::{MODEL_ROOT_PATH, ModelManager, ModelUpdate, ModelWatcher},
endpoint_type::EndpointType,
engines::StreamingEngineAdapter,
entrypoint::{self, input::common, EngineConfig},
entrypoint::{self, EngineConfig, input::common},
http::service::service_v2::{self, HttpService},
kv_router::KvRouterConfig,
model_type::ModelType,
......@@ -17,8 +17,8 @@ use crate::{
},
};
use dynamo_runtime::transports::etcd;
use dynamo_runtime::{distributed::DistributedConfig, pipeline::RouterMode};
use dynamo_runtime::{DistributedRuntime, Runtime};
use dynamo_runtime::{distributed::DistributedConfig, pipeline::RouterMode};
/// Build and run an HTTP service
pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Result<()> {
......
......@@ -6,12 +6,12 @@ use crate::request_template::RequestTemplate;
use crate::types::openai::chat_completions::{
NvCreateChatCompletionRequest, OpenAIChatCompletionsStreamingEngine,
};
use dynamo_runtime::{pipeline::Context, runtime::CancellationToken, Runtime};
use dynamo_runtime::{Runtime, pipeline::Context, runtime::CancellationToken};
use futures::StreamExt;
use std::io::{ErrorKind, Write};
use crate::entrypoint::input::common;
use crate::entrypoint::EngineConfig;
use crate::entrypoint::input::common;
/// Max response tokens for each single query. Must be less than model context size.
/// TODO: Cmd line flag to overwrite this
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment