chore: Rust to 1.89 and edition 2024 (#2659)

bce74588 · Graham King · GitHub · 268d017e · bce74588 · bce74588
Unverified Commit bce74588 authored Aug 22, 2025 by Graham King Committed by GitHub Aug 22, 2025
20 changed files
--- a/lib/llm/src/block_manager/pool/managed.rs
+++ b/lib/llm/src/block_manager/pool/managed.rs
@@ -589,7 +589,7 @@ impl<S: Storage, L: LocalityProvider + 'static, M: BlockMetadata> ProgressEngine
 #[cfg(test)]
 mod tests {
    use crate::block_manager::block::{BasicMetadata, Blocks};
-    use crate::block_manager::layout::{tests::setup_layout, FullyContiguous, LayoutConfig};
+    use crate::block_manager::layout::{FullyContiguous, LayoutConfig, tests::setup_layout};

    use crate::block_manager::locality::Local;
    use crate::tokens::{TokenBlockSequence, Tokens};

--- a/lib/llm/src/block_manager/pool/managed/active.rs
+++ b/lib/llm/src/block_manager/pool/managed/active.rs
@@ -51,10 +51,10 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> ActiveBlockPool<S, L, M>

        // Set the parent of the block if it has one.
        // This is needed to ensure the lifetime of the parent is at least as long as the child.
-        if let Ok(Some(parent)) = block.parent_sequence_hash() {
-            if let Some(parent_block) = self.match_sequence_hash(parent) {
-                block.set_parent(parent_block.mutable_block().clone());
-            }
+        if let Ok(Some(parent)) = block.parent_sequence_hash()
+            && let Some(parent_block) = self.match_sequence_hash(parent)
+        {
+            block.set_parent(parent_block.mutable_block().clone());
        }

        let shared = Arc::new(block);
@@ -78,14 +78,14 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> ActiveBlockPool<S, L, M>
    }

    pub fn remove(&mut self, block: &mut Block<S, L, M>) {
-        if let Ok(sequence_hash) = block.sequence_hash() {
-            if let Some(weak) = self.map.get(&sequence_hash) {
-                if let Some(_arc) = weak.upgrade() {
-                    block.reset();
-                    return;
-                }
-                self.map.remove(&sequence_hash);
+        if let Ok(sequence_hash) = block.sequence_hash()
+            && let Some(weak) = self.map.get(&sequence_hash)
+        {
+            if let Some(_arc) = weak.upgrade() {
+                block.reset();
+                return;
            }
+            self.map.remove(&sequence_hash);
        }
    }


--- a/lib/llm/src/block_manager/pool/managed/inactive.rs
+++ b/lib/llm/src/block_manager/pool/managed/inactive.rs
@@ -15,7 +15,7 @@

 use std::sync::atomic::AtomicU64;

-use crate::block_manager::block::{locality::LocalityProvider, BlockState};
+use crate::block_manager::block::{BlockState, locality::LocalityProvider};

 use super::*;
 use priority_key::PriorityKey;
@@ -113,7 +113,9 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> InactiveBlockPool<S, L,
    fn insert_with_sequence_hash(&mut self, block: Block<S, L, M>, sequence_hash: SequenceHash) {
        let priority_key = PriorityKey::new(block.metadata().clone(), sequence_hash);
        if self.priority_set.contains(&priority_key) {
-            tracing::trace!("multiple entries with the same sequence hash, resetting block and inserting into uninitialized set");
+            tracing::trace!(
+                "multiple entries with the same sequence hash, resetting block and inserting into uninitialized set"
+            );
            let mut block = block;
            block.reset();
            self.uninitialized_set.push_back(block);
@@ -546,8 +548,8 @@ pub(crate) mod tests {
    use crate::{
        block_manager::{
            block::{
-                locality::Local, registry::BlockRegistry, state::CompleteState, Blocks,
-                PrivateBlockExt,
+                Blocks, PrivateBlockExt, locality::Local, registry::BlockRegistry,
+                state::CompleteState,
            },
            events::NullEventManager,
            layout::{BlockLayout, FullyContiguous, LayoutConfigBuilder},

--- a/lib/llm/src/block_manager/pool/managed/state.rs
+++ b/lib/llm/src/block_manager/pool/managed/state.rs
@@ -14,7 +14,7 @@
 // limitations under the License.

 use crate::block_manager::{
-    block::{registry::BlockRegistrationError, BlockState, PrivateBlockExt},
+    block::{BlockState, PrivateBlockExt, registry::BlockRegistrationError},
    events::Publisher,
 };

@@ -266,18 +266,16 @@ impl<S: Storage, L: LocalityProvider + 'static, M: BlockMetadata> State<S, L, M>
                    }
                }
                BlockRegistrationDuplicationSetting::Disabled => {
-                    if let Some(block) = duplicate {
-                        if let Some(raw_blocks) = block.try_take_block(private::PrivateToken) {
-                            self.inactive.return_blocks(raw_blocks);
-                        }
+                    if let Some(block) = duplicate
+                        && let Some(raw_blocks) = block.try_take_block(private::PrivateToken)
+                    {
+                        self.inactive.return_blocks(raw_blocks);
                    }
                }
            }

-            if offload {
-                if let Some(priority) = immutable.metadata().offload_priority() {
-                    immutable.enqueue_offload(priority).await.unwrap();
-                }
+            if offload && let Some(priority) = immutable.metadata().offload_priority() {
+                immutable.enqueue_offload(priority).await.unwrap();
            }

            immutable_blocks.push(immutable);

--- a/lib/llm/src/block_manager/state.rs
+++ b/lib/llm/src/block_manager/state.rs
@@ -17,7 +17,7 @@ mod local;
 mod logical;
 mod resources;

-use crate::block_manager::block::{factory::IntoBlocks, MutableBlock};
+use crate::block_manager::block::{MutableBlock, factory::IntoBlocks};
 use crate::block_manager::locality::LogicalResources;
 use crate::block_manager::offload::request::BlockResult;

@@ -26,8 +26,8 @@ use super::*;
 // use super::offload::OffloadManager;
 use super::{
    block::{
-        factory::LocalBlockDataFactory, locality::LocalityProvider, Block, GlobalRegistry,
-        ImmutableBlock,
+        Block, GlobalRegistry, ImmutableBlock, factory::LocalBlockDataFactory,
+        locality::LocalityProvider,
    },
    config::NixlOptions,
    events::{EventManager, NullEventManager},

--- a/lib/llm/src/block_manager/storage.rs
+++ b/lib/llm/src/block_manager/storage.rs
@@ -88,7 +88,7 @@ pub use disk::*;
 use torch::*;

 use std::{
-    alloc::{alloc_zeroed, dealloc, Layout},
+    alloc::{Layout, alloc_zeroed, dealloc},
    collections::HashMap,
    fmt::Debug,
    ptr::NonNull,
@@ -322,7 +322,10 @@ impl std::fmt::Debug for RegistrationHandles {
 impl Drop for RegistrationHandles {
    fn drop(&mut self) {
        if !self.handles.is_empty() {
-            panic!("RegistrationHandles dropped with {} handles remaining; RegistrationHandles::release() needs to be explicitly called", self.handles.len());
+            panic!(
+                "RegistrationHandles dropped with {} handles remaining; RegistrationHandles::release() needs to be explicitly called",
+                self.handles.len()
+            );
        }
    }
 }

--- a/lib/llm/src/block_manager/storage/arena.rs
+++ b/lib/llm/src/block_manager/storage/arena.rs
@@ -207,7 +207,7 @@ mod nixl {
        S: MemoryRegion,
    {
        unsafe fn as_ptr(&self) -> *const u8 {
-            Storage::as_ptr(self.storage.as_ref())
+            unsafe { Storage::as_ptr(self.storage.as_ref()) }
        }

        fn size(&self) -> usize {

--- a/lib/llm/src/block_manager/storage/cuda.rs
+++ b/lib/llm/src/block_manager/storage/cuda.rs
@@ -86,7 +86,7 @@ use std::{
    sync::{Arc, Mutex, OnceLock},
 };

-use cudarc::driver::{sys, CudaContext};
+use cudarc::driver::{CudaContext, sys};

 /// Trait for [Storage] types that can be accessed by CUDA
 pub trait CudaAccessible: Storage {}

--- a/lib/llm/src/block_manager/storage/disk.rs
+++ b/lib/llm/src/block_manager/storage/disk.rs
@@ -16,7 +16,7 @@
 use super::*;

 use core::ffi::c_char;
-use nix::fcntl::{fallocate, FallocateFlags};
+use nix::fcntl::{FallocateFlags, fallocate};
 use nix::unistd::unlink;
 use std::ffi::CStr;
 use std::ffi::CString;

--- a/lib/llm/src/block_manager/storage/nixl.rs
+++ b/lib/llm/src/block_manager/storage/nixl.rs
@@ -342,7 +342,7 @@ impl NixlRegisterableStorage for PinnedStorage {}

 impl MemoryRegion for PinnedStorage {
    unsafe fn as_ptr(&self) -> *const u8 {
-        Storage::as_ptr(self)
+        unsafe { Storage::as_ptr(self) }
    }

    fn size(&self) -> usize {
@@ -367,7 +367,7 @@ impl NixlRegisterableStorage for DeviceStorage {}

 impl MemoryRegion for DeviceStorage {
    unsafe fn as_ptr(&self) -> *const u8 {
-        Storage::as_ptr(self)
+        unsafe { Storage::as_ptr(self) }
    }

    fn size(&self) -> usize {
@@ -406,7 +406,7 @@ impl NixlRegisterableStorage for DiskStorage {

 impl MemoryRegion for DiskStorage {
    unsafe fn as_ptr(&self) -> *const u8 {
-        Storage::as_ptr(self)
+        unsafe { Storage::as_ptr(self) }
    }

    fn size(&self) -> usize {

--- a/lib/llm/src/cuda.rs
+++ b/lib/llm/src/cuda.rs
@@ -17,8 +17,8 @@
 //! them within Dynamo.

 use cudarc::driver::{
-    sys::{cuCtxPopCurrent_v2, cuCtxPushCurrent_v2, cudaError_enum, CUcontext, CUstream},
    CudaContext, CudaStream,
+    sys::{CUcontext, CUstream, cuCtxPopCurrent_v2, cuCtxPushCurrent_v2, cudaError_enum},
 };
 use std::pin::Pin;
 use std::{marker::PhantomData, sync::Arc};

--- a/lib/llm/src/disagg_router.rs
+++ b/lib/llm/src/disagg_router.rs
@@ -18,8 +18,8 @@ use std::sync::{Arc, Mutex};
 use tokio::sync::watch;
 use tracing;

-use dynamo_runtime::transports::etcd::WatchEvent;
 use dynamo_runtime::DistributedRuntime;
+use dynamo_runtime::transports::etcd::WatchEvent;

 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct DisaggRouterConf {
@@ -218,23 +218,23 @@ impl DisaggregatedRouter {
    }

    pub fn check_for_updates(&self) {
-        if let Some(watcher) = &self.config_watcher {
-            if watcher.has_changed().unwrap_or(false) {
-                let config = watcher.borrow().clone();
-                let new_value = config.max_local_prefill_length;
-
-                // Update the value using the mutex
-                let mut current_value = self.max_local_prefill_length.lock().unwrap();
-                let old_value = *current_value;
-                if old_value != new_value {
-                    *current_value = new_value;
-                    tracing::info!(
-                        "Applied config update for model {}: max_local_prefill_length changed from {} to {}",
-                        self.model_name,
-                        old_value,
-                        new_value
-                    );
-                }
+        if let Some(watcher) = &self.config_watcher
+            && watcher.has_changed().unwrap_or(false)
+        {
+            let config = watcher.borrow().clone();
+            let new_value = config.max_local_prefill_length;
+
+            // Update the value using the mutex
+            let mut current_value = self.max_local_prefill_length.lock().unwrap();
+            let old_value = *current_value;
+            if old_value != new_value {
+                *current_value = new_value;
+                tracing::info!(
+                    "Applied config update for model {}: max_local_prefill_length changed from {} to {}",
+                    self.model_name,
+                    old_value,
+                    new_value
+                );
            }
        }
    }

--- a/lib/llm/src/discovery/model_manager.rs
+++ b/lib/llm/src/discovery/model_manager.rs
@@ -7,7 +7,7 @@ use dynamo_runtime::slug::Slug;

 use crate::discovery::ModelEntry;

-use crate::kv_router::{scheduler::DefaultWorkerSelector, KvRouterConfig};
+use crate::kv_router::{KvRouterConfig, scheduler::DefaultWorkerSelector};
 use crate::{
    kv_router::KvRouter,
    types::openai::{

--- a/lib/llm/src/discovery/watcher.rs
+++ b/lib/llm/src/discovery/watcher.rs
@@ -5,16 +5,16 @@ use std::sync::Arc;
 use tokio::sync::mpsc::Sender;

 use anyhow::Context as _;
-use tokio::sync::{mpsc::Receiver, Notify};
+use tokio::sync::{Notify, mpsc::Receiver};

 use dynamo_runtime::{
+    DistributedRuntime,
    pipeline::{
-        network::egress::push_router::PushRouter, ManyOut, Operator, RouterMode, SegmentSource,
-        ServiceBackend, SingleIn, Source,
+        ManyOut, Operator, RouterMode, SegmentSource, ServiceBackend, SingleIn, Source,
+        network::egress::push_router::PushRouter,
    },
    protocols::annotated::Annotated,
    transports::etcd::{KeyValue, WatchEvent},
-    DistributedRuntime,
 };

 use crate::{
@@ -35,7 +35,7 @@ use crate::{
    },
 };

-use super::{ModelEntry, ModelManager, MODEL_ROOT_PATH};
+use super::{MODEL_ROOT_PATH, ModelEntry, ModelManager};

 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum ModelUpdate {
@@ -213,10 +213,8 @@ impl ModelWatcher {
                );
                update_tx = false;
            }
-            if update_tx {
-                if let Some(tx) = &self.model_update_tx {
-                    tx.send(ModelUpdate::Removed(model_type)).await.ok();
-                }
+            if update_tx && let Some(tx) = &self.model_update_tx {
+                tx.send(ModelUpdate::Removed(model_type)).await.ok();
            }
            return Ok(None);
        }
@@ -251,13 +249,12 @@ impl ModelWatcher {
            );
        } else {
            for model_type in ALL_MODEL_TYPES {
-                if (chat_model_removed && *model_type == ModelType::Chat)
+                if ((chat_model_removed && *model_type == ModelType::Chat)
                    || (completions_model_removed && *model_type == ModelType::Completion)
-                    || (embeddings_model_removed && *model_type == ModelType::Embedding)
+                    || (embeddings_model_removed && *model_type == ModelType::Embedding))
+                    && let Some(tx) = &self.model_update_tx
                {
-                    if let Some(tx) = &self.model_update_tx {
-                        tx.send(ModelUpdate::Removed(*model_type)).await.ok();
-                    }
+                    tx.send(ModelUpdate::Removed(*model_type)).await.ok();
                }
            }
        }

--- a/lib/llm/src/engines.rs
+++ b/lib/llm/src/engines.rs
@@ -18,7 +18,7 @@ use crate::preprocessor::PreprocessedRequest;
 use crate::protocols::common::llm_backend::LLMEngineOutput;
 use crate::protocols::openai::{
    chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse},
-    completions::{prompt_to_string, NvCreateCompletionRequest, NvCreateCompletionResponse},
+    completions::{NvCreateCompletionRequest, NvCreateCompletionResponse, prompt_to_string},
 };
 use crate::types::openai::embeddings::NvCreateEmbeddingRequest;
 use crate::types::openai::embeddings::NvCreateEmbeddingResponse;

--- a/lib/llm/src/entrypoint/input/batch.rs
+++ b/lib/llm/src/entrypoint/input/batch.rs
@@ -8,18 +8,18 @@ use crate::types::openai::chat_completions::{
 };
 use anyhow::Context as _;
 use dynamo_async_openai::types::FinishReason;
-use dynamo_runtime::{pipeline::Context, runtime::CancellationToken, Runtime};
+use dynamo_runtime::{Runtime, pipeline::Context, runtime::CancellationToken};
 use futures::StreamExt;
 use serde::{Deserialize, Serialize};
 use std::cmp;
 use std::path::{Path, PathBuf};
-use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
 use std::time::{Duration, Instant};
 use tokio::io::{AsyncBufReadExt, AsyncWriteExt};

-use crate::entrypoint::input::common;
 use crate::entrypoint::EngineConfig;
+use crate::entrypoint::input::common;

 /// Max tokens in each response.
 /// TODO: For batch mode this should be the full context size of the model

--- a/lib/llm/src/entrypoint/input/common.rs
+++ b/lib/llm/src/entrypoint/input/common.rs
@@ -5,7 +5,7 @@ use std::pin::Pin;

 use crate::{
    backend::{Backend, ExecutionContext},
-    discovery::{ModelManager, ModelWatcher, MODEL_ROOT_PATH},
+    discovery::{MODEL_ROOT_PATH, ModelManager, ModelWatcher},
    engines::StreamingEngineAdapter,
    entrypoint::{self, EngineConfig},
    kv_router::{KvPushRouter, KvRouter},
@@ -15,15 +15,16 @@ use crate::{
    protocols::common::llm_backend::{BackendOutput, LLMEngineOutput, PreprocessedRequest},
    request_template::RequestTemplate,
    types::{
+        Annotated,
        openai::chat_completions::{
            NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
            OpenAIChatCompletionsStreamingEngine,
        },
-        Annotated,
    },
 };

 use dynamo_runtime::{
+    DistributedRuntime, Runtime,
    component::Client,
    distributed::DistributedConfig,
    engine::{AsyncEngineStream, Data},
@@ -31,7 +32,6 @@ use dynamo_runtime::{
        Context, ManyOut, Operator, PushRouter, RouterMode, SegmentSource, ServiceBackend,
        ServiceEngine, ServiceFrontend, SingleIn, Source,
    },
-    DistributedRuntime, Runtime,
 };
 use std::sync::Arc;

@@ -191,11 +191,11 @@ where
    Req: Data,
    Resp: Data,
    OpenAIPreprocessor: Operator<
-        Context<Req>,
-        Pin<Box<dyn AsyncEngineStream<Annotated<Resp>>>>,
-        Context<PreprocessedRequest>,
-        Pin<Box<dyn AsyncEngineStream<Annotated<BackendOutput>>>>,
-    >,
+            Context<Req>,
+            Pin<Box<dyn AsyncEngineStream<Annotated<Resp>>>>,
+            Context<PreprocessedRequest>,
+            Pin<Box<dyn AsyncEngineStream<Annotated<BackendOutput>>>>,
+        >,
 {
    let frontend = ServiceFrontend::<SingleIn<Req>, ManyOut<Annotated<Resp>>>::new();
    let preprocessor = OpenAIPreprocessor::new((*card).clone())
@@ -224,11 +224,11 @@ where
    Req: Data,
    Resp: Data,
    OpenAIPreprocessor: Operator<
-        Context<Req>,
-        Pin<Box<dyn AsyncEngineStream<Annotated<Resp>>>>,
-        Context<PreprocessedRequest>,
-        Pin<Box<dyn AsyncEngineStream<Annotated<BackendOutput>>>>,
-    >,
+            Context<Req>,
+            Pin<Box<dyn AsyncEngineStream<Annotated<Resp>>>>,
+            Context<PreprocessedRequest>,
+            Pin<Box<dyn AsyncEngineStream<Annotated<BackendOutput>>>>,
+        >,
 {
    let frontend = SegmentSource::<SingleIn<Req>, ManyOut<Annotated<Resp>>>::new();
    let preprocessor = OpenAIPreprocessor::new(card.clone()).await?.into_operator();

--- a/lib/llm/src/entrypoint/input/endpoint.rs
+++ b/lib/llm/src/entrypoint/input/endpoint.rs
@@ -9,18 +9,18 @@ use crate::{
    model_type::ModelType,
    preprocessor::{BackendOutput, PreprocessedRequest},
    types::{
+        Annotated,
        openai::chat_completions::{
            NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
        },
-        Annotated,
    },
 };

 use dynamo_runtime::engine::AsyncEngineStream;
 use dynamo_runtime::pipeline::{
-    network::Ingress, Context, ManyOut, Operator, SegmentSource, ServiceBackend, SingleIn, Source,
+    Context, ManyOut, Operator, SegmentSource, ServiceBackend, SingleIn, Source, network::Ingress,
 };
-use dynamo_runtime::{protocols::EndpointId, DistributedRuntime};
+use dynamo_runtime::{DistributedRuntime, protocols::EndpointId};

 use crate::entrypoint::EngineConfig;

@@ -125,13 +125,12 @@ pub async fn run(
    result?;

    // Cleanup on shutdown
-    if let Some(mut card) = card {
-        if let Err(err) = card
+    if let Some(mut card) = card
+        && let Err(err) = card
            .delete_from_nats(distributed_runtime.nats_client())
            .await
-        {
-            tracing::error!(%err, "delete_from_nats error on shutdown");
-        }
+    {
+        tracing::error!(%err, "delete_from_nats error on shutdown");
    }

    Ok(())

--- a/lib/llm/src/entrypoint/input/http.rs
+++ b/lib/llm/src/entrypoint/input/http.rs
@@ -4,10 +4,10 @@
 use std::sync::Arc;

 use crate::{
-    discovery::{ModelManager, ModelUpdate, ModelWatcher, MODEL_ROOT_PATH},
+    discovery::{MODEL_ROOT_PATH, ModelManager, ModelUpdate, ModelWatcher},
    endpoint_type::EndpointType,
    engines::StreamingEngineAdapter,
-    entrypoint::{self, input::common, EngineConfig},
+    entrypoint::{self, EngineConfig, input::common},
    http::service::service_v2::{self, HttpService},
    kv_router::KvRouterConfig,
    model_type::ModelType,
@@ -17,8 +17,8 @@ use crate::{
    },
 };
 use dynamo_runtime::transports::etcd;
-use dynamo_runtime::{distributed::DistributedConfig, pipeline::RouterMode};
 use dynamo_runtime::{DistributedRuntime, Runtime};
+use dynamo_runtime::{distributed::DistributedConfig, pipeline::RouterMode};

 /// Build and run an HTTP service
 pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Result<()> {

--- a/lib/llm/src/entrypoint/input/text.rs
+++ b/lib/llm/src/entrypoint/input/text.rs
@@ -6,12 +6,12 @@ use crate::request_template::RequestTemplate;
 use crate::types::openai::chat_completions::{
    NvCreateChatCompletionRequest, OpenAIChatCompletionsStreamingEngine,
 };
-use dynamo_runtime::{pipeline::Context, runtime::CancellationToken, Runtime};
+use dynamo_runtime::{Runtime, pipeline::Context, runtime::CancellationToken};
 use futures::StreamExt;
 use std::io::{ErrorKind, Write};

-use crate::entrypoint::input::common;
 use crate::entrypoint::EngineConfig;
+use crate::entrypoint::input::common;

 /// Max response tokens for each single query. Must be less than model context size.
 /// TODO: Cmd line flag to overwrite this