chore: Upgrade Rust to 1.90 (#3147)

Signed-off-by: Graham King <grahamk@nvidia.com>

chore: Upgrade Rust to 1.90 (#3147)
Signed-off-by: Graham King <grahamk@nvidia.com>
7a5a0bd6 · Graham King · GitHub · 406c4d4e · 7a5a0bd6 · 7a5a0bd6
Unverified Commit 7a5a0bd6 authored Sep 19, 2025 by Graham King Committed by GitHub Sep 19, 2025
20 changed files
--- a/Earthfile
+++ b/Earthfile
@@ -91,13 +91,13 @@ rust-base:
    ENV RUSTUP_HOME=/usr/local/rustup
    ENV CARGO_HOME=/usr/local/cargo
    ENV PATH=/usr/local/cargo/bin:$PATH
-    ENV RUST_VERSION=1.89.0
+    ENV RUST_VERSION=1.90.0
    ENV RUSTARCH=x86_64-unknown-linux-gnu

    RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/x86_64-unknown-linux-gnu/rustup-init" && \
        echo "a3339fb004c3d0bb9862ba0bce001861fe5cbde9c10d16591eb3f39ee6cd3e7f *rustup-init" | sha256sum -c - && \
        chmod +x rustup-init && \
-        ./rustup-init -y --no-modify-path --profile minimal --default-toolchain 1.89.0 --default-host x86_64-unknown-linux-gnu && \
+        ./rustup-init -y --no-modify-path --profile minimal --default-toolchain 1.90.0 --default-host x86_64-unknown-linux-gnu && \
        rm rustup-init && \
        chmod -R a+w $RUSTUP_HOME $CARGO_HOME


--- a/container/Dockerfile
+++ b/container/Dockerfile
@@ -93,7 +93,7 @@ ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
 ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
-    RUST_VERSION=1.89.0
+    RUST_VERSION=1.90.0

 # Define Rust target based on ARCH_ALT ARG
 ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu

--- a/container/Dockerfile.sglang-wideep
+++ b/container/Dockerfile.sglang-wideep
@@ -11,7 +11,7 @@ ARG ARCH_ALT="x86_64"
 ARG NIXL_UCX_REF="v1.19.0"
 ARG NIXL_TAG="0.5.0"
 ARG CMAKE_VERSION="3.31.8"
-ARG RUST_VERSION="1.89.0"
+ARG RUST_VERSION="1.90.0"
 ARG CARGO_BUILD_JOBS="16"

 RUN apt-get update -y && \

--- a/container/Dockerfile.trtllm_prebuilt
+++ b/container/Dockerfile.trtllm_prebuilt
@@ -41,7 +41,7 @@ ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
 ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
-    RUST_VERSION=1.89.0
+    RUST_VERSION=1.90.0

 # Install Rust using RUSTARCH derived from ARCH_ALT
 RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \

--- a/lib/bindings/python/Cargo.toml
+++ b/lib/bindings/python/Cargo.toml
@@ -8,7 +8,7 @@
 [package]
 name = "dynamo-py3"
 version = "0.5.0"
-edition = "2021"
+edition = "2024"
 authors = ["NVIDIA"]
 license = "Apache-2.0"
 homepage = "https://github.com/ai-dynamo/dynamo"

--- a/lib/bindings/python/rust/context.rs
+++ b/lib/bindings/python/rust/context.rs
@@ -3,8 +3,8 @@

 // Context is a wrapper around the AsyncEngineContext to allow for Python bindings.

-use dynamo_runtime::pipeline::context::Controller;
 pub use dynamo_runtime::pipeline::AsyncEngineContext;
+use dynamo_runtime::pipeline::context::Controller;
 use pyo3::prelude::*;
 use std::sync::Arc;


--- a/lib/bindings/python/rust/engine.rs
+++ b/lib/bindings/python/rust/engine.rs
 // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0

-use super::context::{callable_accepts_kwarg, Context};
+use super::context::{Context, callable_accepts_kwarg};
 use pyo3::prelude::*;
 use pyo3::types::{PyDict, PyModule};
 use pyo3::{PyAny, PyErr};
@@ -9,15 +9,15 @@ use pyo3_async_runtimes::TaskLocals;
 use pythonize::{depythonize, pythonize};
 use std::sync::Arc;
 use tokio::sync::mpsc;
-use tokio_stream::{wrappers::ReceiverStream, StreamExt};
+use tokio_stream::{StreamExt, wrappers::ReceiverStream};

 pub use dynamo_runtime::{
+    CancellationToken, Error, Result,
    pipeline::{
-        async_trait, AsyncEngine, AsyncEngineContextProvider, Data, ManyOut, ResponseStream,
-        SingleIn,
+        AsyncEngine, AsyncEngineContextProvider, Data, ManyOut, ResponseStream, SingleIn,
+        async_trait,
    },
    protocols::annotated::Annotated,
-    CancellationToken, Error, Result,
 };
 pub use serde::{Deserialize, Serialize};

@@ -180,7 +180,7 @@ where
                let py_request = pythonize(py, &request)?;
                let py_ctx = Py::new(py, Context::new(ctx_python.clone()))?;

-                let gen = if has_context {
+                let gen_result = if has_context {
                    // Pass context as a kwarg
                    let kwarg = PyDict::new(py);
                    kwarg.set_item("context", &py_ctx)?;
@@ -191,7 +191,10 @@ where
                }?;

                let locals = TaskLocals::new(event_loop.bind(py).clone());
-                pyo3_async_runtimes::tokio::into_stream_with_locals_v1(locals, gen.into_bound(py))
+                pyo3_async_runtimes::tokio::into_stream_with_locals_v1(
+                    locals,
+                    gen_result.into_bound(py),
+                )
            })
        })
        .await??;
@@ -234,18 +237,27 @@ where
                                // right now, this is impossible as we are not passing the context to the python async generator
                                // todo: add task-local context to the python async generator
                                ctx.stop_generating();
-                                let msg = format!("critical error: invalid response object from python async generator; application-logic-mismatch: {}", e);
+                                let msg = format!(
+                                    "critical error: invalid response object from python async generator; application-logic-mismatch: {}",
+                                    e
+                                );
                                msg
                            }
                            ResponseProcessingError::PyGeneratorExit(_) => {
                                "Stream ended before generation completed".to_string()
                            }
                            ResponseProcessingError::PythonException(e) => {
-                                let msg = format!("a python exception was caught while processing the async generator: {}", e);
+                                let msg = format!(
+                                    "a python exception was caught while processing the async generator: {}",
+                                    e
+                                );
                                msg
                            }
                            ResponseProcessingError::OffloadError(e) => {
-                                let msg = format!("critical error: failed to offload the python async generator to a new thread: {}", e);
+                                let msg = format!(
+                                    "critical error: failed to offload the python async generator to a new thread: {}",
+                                    e
+                                );
                                msg
                            }
                        };

--- a/lib/bindings/python/rust/http.rs
+++ b/lib/bindings/python/rust/http.rs
@@ -5,15 +5,14 @@ use std::sync::Arc;

 use pyo3::{exceptions::PyException, prelude::*};

-use crate::{engine::*, to_pyerr, CancellationToken};
+use crate::{CancellationToken, engine::*, to_pyerr};

 pub use dynamo_llm::endpoint_type::EndpointType;
 pub use dynamo_llm::http::service::{error as http_error, service_v2};
 pub use dynamo_runtime::{
-    error,
-    pipeline::{async_trait, AsyncEngine, Data, ManyOut, SingleIn},
+    Error, Result, error,
+    pipeline::{AsyncEngine, Data, ManyOut, SingleIn, async_trait},
    protocols::annotated::Annotated,
-    Error, Result,
 };

 #[pyclass]

--- a/lib/bindings/python/rust/lib.rs
+++ b/lib/bindings/python/rust/lib.rs
@@ -3,10 +3,10 @@

 use futures::StreamExt;
 use once_cell::sync::OnceCell;
+use pyo3::IntoPyObjectExt;
 use pyo3::exceptions::PyStopAsyncIteration;
 use pyo3::types::PyBytes;
 use pyo3::types::{PyDict, PyList, PyString};
-use pyo3::IntoPyObjectExt;
 use pyo3::{exceptions::PyException, prelude::*};
 use rand::seq::IteratorRandom as _;
 use rs::pipeline::network::Ingress;
@@ -19,8 +19,8 @@ use tokio::sync::Mutex;
 use dynamo_runtime::{
    self as rs, logging,
    pipeline::{
-        context::Context as RsContext, network::egress::push_router::RouterMode as RsRouterMode,
-        EngineStream, ManyOut, SingleIn,
+        EngineStream, ManyOut, SingleIn, context::Context as RsContext,
+        network::egress::push_router::RouterMode as RsRouterMode,
    },
    protocols::annotated::Annotated as RsAnnotated,
    traits::DistributedRuntimeProvider,
@@ -504,7 +504,8 @@ impl DistributedRuntime {
            }

            Err(PyErr::new::<PyException, _>(format!(
-            "Failed to allocate and reserve a port block of size {block_size} from range {min}-{max} after {candidate_count} attempts")))
+                "Failed to allocate and reserve a port block of size {block_size} from range {min}-{max} after {candidate_count} attempts"
+            )))
        })
    }

@@ -735,13 +736,13 @@ impl Endpoint {
            })?;

        // Require an object/dict
-        if let Some(ref payload) = health_payload_json {
-            if !payload.is_object() {
+        if let Some(ref payload) = health_payload_json
+            && !payload.is_object()
+        {
            return Err(pyo3::exceptions::PyTypeError::new_err(
                "health_check_payload must be a JSON object (dict)",
            ));
        }
-        }

        let mut builder = self
            .inner
@@ -1087,11 +1088,10 @@ async fn process_stream(
        // Convert the response to a PyObject using Python's GIL
        let annotated: RsAnnotated<serde_json::Value> = response;
        let annotated: RsAnnotated<PyObject> = annotated.map_data(|data| {
-            let result = Python::with_gil(|py| match pythonize::pythonize(py, &data) {
+            Python::with_gil(|py| match pythonize::pythonize(py, &data) {
                Ok(pyobj) => Ok(pyobj.into()),
                Err(e) => Err(e.to_string()),
-            });
-            result
+            })
        });

        let is_error = annotated.is_error();

--- a/lib/bindings/python/rust/llm/block_manager/vllm.rs
+++ b/lib/bindings/python/rust/llm/block_manager/vllm.rs
@@ -11,13 +11,13 @@ use pyo3::{prelude::*, wrap_pymodule};

 use dynamo_llm::{
    block_manager::{
+        BasicMetadata, DeviceStorage, Storage,
        block::{
+            BlockId, ImmutableBlock, MutableBlock,
            data::logical::distributed_leader_worker::DistributedLeaderWorkerResources,
            locality::{LocalityProvider, Logical},
-            BlockId, ImmutableBlock, MutableBlock,
        },
        pool::{BlockPool, BlockPoolError},
-        BasicMetadata, DeviceStorage, Storage,
    },
    tokens::{SaltHash, SequenceHash, TokenBlockSequence, Tokens},
 };
@@ -314,7 +314,19 @@ impl std::fmt::Debug for GenericSlotUpdate<String> {
            format!("{:?}", self.tokens_to_append)
        };

-        write!(f, "GenericSlotUpdate(request_id: {}, request_num_tokens: {}, request_num_computed_tokens: {}, tokens_to_append: {}, num_new_tokens: {}, num_new_computed_tokens: {:?}, new_computed_blocks: {:?}, num_lookahead_blocks: {:?}, delay_cache_blocks: {:?})", self.request_id, self.request_num_tokens, self.request_num_computed_tokens, tokens_display, self.num_new_tokens, self.num_new_computed_tokens, self.new_computed_blocks, self.num_lookahead_blocks, self.delay_cache_blocks)
+        write!(
+            f,
+            "GenericSlotUpdate(request_id: {}, request_num_tokens: {}, request_num_computed_tokens: {}, tokens_to_append: {}, num_new_tokens: {}, num_new_computed_tokens: {:?}, new_computed_blocks: {:?}, num_lookahead_blocks: {:?}, delay_cache_blocks: {:?})",
+            self.request_id,
+            self.request_num_tokens,
+            self.request_num_computed_tokens,
+            tokens_display,
+            self.num_new_tokens,
+            self.num_new_computed_tokens,
+            self.new_computed_blocks,
+            self.num_lookahead_blocks,
+            self.delay_cache_blocks
+        )
    }
 }

@@ -558,7 +570,8 @@ impl<R: RequestKey> SlotManager<R> {
                let isl_host = slot.num_blocks_cached_from_host() * self.block_size;
                let isl_disk = slot.num_blocks_cached_from_disk() * self.block_size;
                tracing::info!(
-                    request_id, "request complete isl: {} - cache hits: device: {}, host: {}, disk: {} - prefilled: {}",
+                    request_id,
+                    "request complete isl: {} - cache hits: device: {}, host: {}, disk: {} - prefilled: {}",
                    isl,
                    isl_device,
                    isl_host,

--- a/lib/bindings/python/rust/llm/block_manager/vllm/connector/leader.rs
+++ b/lib/bindings/python/rust/llm/block_manager/vllm/connector/leader.rs
@@ -9,21 +9,21 @@ use dynamo_llm::block_manager::metrics_kvbm::KvbmMetrics;
 use dynamo_runtime::DistributedRuntime;
 use slot::{ConnectorSlotManager, SlotError, SlotManager, SlotState};

+use crate::DistributedRuntime as PyDistributedRuntime;
 use crate::llm::block_manager::BlockManagerBuilder;
 use crate::llm::block_manager::{
-    distributed::KvbmLeader as PyKvbmLeader, vllm::connector::leader::slot::VllmConnectorSlot,
-    vllm::KvbmRequest, VllmBlockManager,
+    VllmBlockManager, distributed::KvbmLeader as PyKvbmLeader, vllm::KvbmRequest,
+    vllm::connector::leader::slot::VllmConnectorSlot,
 };
-use crate::DistributedRuntime as PyDistributedRuntime;
 use dynamo_runtime::metrics::prometheus_names::kvbm_connector;

 use dynamo_llm::block_manager::{
+    BasicMetadata, DiskStorage, ImmutableBlock, PinnedStorage,
    block::{
        data::logical::distributed_leader_worker::DistributedLeaderWorkerResources,
        locality::Logical,
    },
    connector::*,
-    BasicMetadata, DiskStorage, ImmutableBlock, PinnedStorage,
 };
 use dynamo_llm::tokens::{SaltHash, TokenBlockSequence, Tokens};
 use std::sync::{Arc, OnceLock};
@@ -218,7 +218,9 @@ impl Leader for KvConnectorLeader {
        );

        if slot.state() == SlotState::SkippedPrefill || slot.state() == SlotState::SkippedDecode {
-            tracing::debug!("slot is in the SkippedPrefill or SkippedDecode state; will resume from skipped and return early");
+            tracing::debug!(
+                "slot is in the SkippedPrefill or SkippedDecode state; will resume from skipped and return early"
+            );
            match slot.state() {
                SlotState::SkippedPrefill => {
                    slot.mark_as_prefilling(self.iteration_counter)?;

--- a/lib/bindings/python/rust/llm/block_manager/vllm/connector/leader/slot.rs
+++ b/lib/bindings/python/rust/llm/block_manager/vllm/connector/leader/slot.rs
@@ -5,10 +5,10 @@ use std::{any::Any, sync::Arc};

 use dynamo_llm::{
    block_manager::{
-        block::{locality::LocalityProvider, BlockMetadata},
+        Storage,
+        block::{BlockMetadata, locality::LocalityProvider},
        connector::protocol::{LeaderTransferRequest, RequestType, TransferType},
        distributed::{BlockTransferPool, BlockTransferRequest, KvbmLeader},
-        Storage,
    },
    tokens::TokenBlock,
 };
@@ -398,7 +398,11 @@ impl VllmConnectorSlot {
            SlotState::SkippedPrefill => Ok(()), // already skipped
            SlotState::SkippedDecode => Ok(()),  // already skipped
            _ => {
-                tracing::debug!("slot is in the {:?} state; will not explicitly mark as skipped, request_id: {}", self.state, self.request_id);
+                tracing::debug!(
+                    "slot is in the {:?} state; will not explicitly mark as skipped, request_id: {}",
+                    self.state,
+                    self.request_id
+                );
                Ok(())
            }
        }
@@ -594,7 +598,8 @@ impl Slot for VllmConnectorSlot {
        if computed_position < self.current_position {
            tracing::debug!(
                "computed_position={} < current_position={}, so we are onboarding during prefilling phase",
-                computed_position, self.current_position
+                computed_position,
+                self.current_position
            );
            return Ok(());
        }
@@ -916,7 +921,8 @@ impl ExternallyManagedDeviceSlot for VllmConnectorSlot {
        if self.current_position + num_tokens > self.sequence().total_tokens() {
            return Err(SlotError::InvalidOperation(format!(
                "cannot advance computed position from {} by {num_tokens} tokens, total tokens is {}",
-                self.current_position, self.sequence().total_tokens()
+                self.current_position,
+                self.sequence().total_tokens()
            )));
        }


--- a/lib/bindings/python/rust/llm/block_manager/vllm/connector/trtllm_leader.rs
+++ b/lib/bindings/python/rust/llm/block_manager/vllm/connector/trtllm_leader.rs
@@ -3,12 +3,12 @@

 use super::*;

+use crate::DistributedRuntime as PyDistributedRuntime;
+use crate::llm::block_manager::BlockManagerBuilder;
 use crate::llm::block_manager::vllm::connector::leader::slot::{
    ConnectorSlotManager, SlotManager, SlotState,
 };
-use crate::llm::block_manager::BlockManagerBuilder;
 use crate::llm::block_manager::{distributed::KvbmLeader as PyKvbmLeader, vllm::KvbmRequest};
-use crate::DistributedRuntime as PyDistributedRuntime;
 use anyhow;
 use dynamo_llm::block_manager::metrics_kvbm::KvbmMetrics;
 use dynamo_runtime::metrics::prometheus_names::kvbm_connector;

--- a/lib/bindings/python/rust/llm/block_manager/vllm/connector/trtllm_worker.rs
+++ b/lib/bindings/python/rust/llm/block_manager/vllm/connector/trtllm_worker.rs
@@ -13,15 +13,15 @@ use super::*;
 use crate::llm::block_manager::distributed::get_barrier_id_prefix;
 use crate::llm::block_manager::vllm::connector::worker::event_sync_blocking;
 use crate::{
-    llm::block_manager::distributed::VllmTensor, to_pyerr,
-    DistributedRuntime as PyDistributedRuntime,
+    DistributedRuntime as PyDistributedRuntime, llm::block_manager::distributed::VllmTensor,
+    to_pyerr,
 };

 use anyhow;
 use dynamo_llm::block_manager::distributed::{KvbmWorker, KvbmWorkerConfig};
 use dynamo_llm::block_manager::storage::torch::TorchTensor;
-use dynamo_runtime::utils::task::CriticalTaskExecutionHandle;
 use dynamo_runtime::DistributedRuntime;
+use dynamo_runtime::utils::task::CriticalTaskExecutionHandle;

 pub trait Worker: Send + Sync {
    fn register_kv_caches(
@@ -274,7 +274,10 @@ impl Worker for KvConnectorWorker {
                .maybe_finished_offloading
                .contains(&request_id.to_string())
            {
-                tracing::warn!(request_id, "possibly got a duplicate finished request; request_id already in the maybe_finished_offloading set");
+                tracing::warn!(
+                    request_id,
+                    "possibly got a duplicate finished request; request_id already in the maybe_finished_offloading set"
+                );
            } else {
                tracing::debug!(
                    request_id,
@@ -300,7 +303,10 @@ impl Worker for KvConnectorWorker {
                .maybe_finished_onboarding
                .contains(&request_id.to_string())
            {
-                tracing::warn!(request_id, "possibly got a duplicate finished request; request_id already in the maybe_finished_onboarding set");
+                tracing::warn!(
+                    request_id,
+                    "possibly got a duplicate finished request; request_id already in the maybe_finished_onboarding set"
+                );
            }
        }

@@ -316,7 +322,9 @@ impl Worker for KvConnectorWorker {
            } else {
                // made this condition more strict slot existence checks were added as a prerequesite
                // to be added to the maybe_finished_offloading set.
-                panic!("request slot missing for {request_id}; however, it was present when added to the maybe finished offloading set");
+                panic!(
+                    "request slot missing for {request_id}; however, it was present when added to the maybe finished offloading set"
+                );
            }
        }

@@ -329,7 +337,10 @@ impl Worker for KvConnectorWorker {
            if self.connector.has_slot(request_id) {
                self.connector.remove_slot(request_id);
            } else {
-                tracing::debug!(request_id, "is_finished_offloading: request slot is not found - likely aborted, removing from is finished offloading set");
+                tracing::debug!(
+                    request_id,
+                    "is_finished_offloading: request slot is not found - likely aborted, removing from is finished offloading set"
+                );
            }
        }

@@ -343,7 +354,9 @@ impl Worker for KvConnectorWorker {
                    tracing::debug!(request_id, "request slot is not finished onboarding");
                }
            } else {
-                panic!("request slot missing for {request_id}; however, it was present when added to the maybe finished onboarding set");
+                panic!(
+                    "request slot missing for {request_id}; however, it was present when added to the maybe finished onboarding set"
+                );
            }
        }


--- a/lib/bindings/python/rust/llm/block_manager/vllm/connector/worker.rs
+++ b/lib/bindings/python/rust/llm/block_manager/vllm/connector/worker.rs
@@ -13,16 +13,16 @@ use std::sync::{Arc, OnceLock};
 use super::*;
 use crate::llm::block_manager::distributed::get_barrier_id_prefix;
 use crate::{
-    llm::block_manager::distributed::VllmTensor, to_pyerr,
-    DistributedRuntime as PyDistributedRuntime,
+    DistributedRuntime as PyDistributedRuntime, llm::block_manager::distributed::VllmTensor,
+    to_pyerr,
 };
 use dynamo_runtime::metrics::prometheus_names::kvbm_connector;

 use anyhow;
 use dynamo_llm::block_manager::distributed::{KvbmWorker, KvbmWorkerConfig};
 use dynamo_llm::block_manager::storage::torch::TorchTensor;
-use dynamo_runtime::utils::task::CriticalTaskExecutionHandle;
 use dynamo_runtime::DistributedRuntime;
+use dynamo_runtime::utils::task::CriticalTaskExecutionHandle;

 pub trait Worker: Send + Sync {
    fn register_kv_caches(
@@ -326,7 +326,10 @@ impl Worker for KvConnectorWorker {
                    "got a finished warning for a request that is onboarding"
                );
            } else if self.maybe_finished_offloading.contains(&request_id) {
-                tracing::warn!(request_id, "possibly got a duplicate finished request; request_id already in the maybe_finished_offloading set");
+                tracing::warn!(
+                    request_id,
+                    "possibly got a duplicate finished request; request_id already in the maybe_finished_offloading set"
+                );
            } else {
                tracing::debug!(
                    request_id,
@@ -348,7 +351,9 @@ impl Worker for KvConnectorWorker {
            } else {
                // made this condition more strict slot existence checks were added as a prerequesite
                // to be added to the maybe_finished_offloading set.
-                panic!("request slot missing for {request_id}; however, it was present when added to the maybe finished offloading set");
+                panic!(
+                    "request slot missing for {request_id}; however, it was present when added to the maybe finished offloading set"
+                );
            }
        }

@@ -361,7 +366,10 @@ impl Worker for KvConnectorWorker {
            if self.connector.has_slot(request_id) {
                self.connector.remove_slot(request_id);
            } else {
-                tracing::debug!(request_id, "is_finished_offloading: request slot is not found - likely aborted, removing from is finished offloading set");
+                tracing::debug!(
+                    request_id,
+                    "is_finished_offloading: request slot is not found - likely aborted, removing from is finished offloading set"
+                );
            }
        }

@@ -375,7 +383,9 @@ impl Worker for KvConnectorWorker {
                    tracing::debug!(request_id, "request slot is not finished");
                }
            } else {
-                panic!("request slot missing for {request_id}; however, it was present when added to the maybe finished onboarding set");
+                panic!(
+                    "request slot missing for {request_id}; however, it was present when added to the maybe finished onboarding set"
+                );
            }
        }

@@ -460,7 +470,7 @@ impl PyKvConnectorWorker {
 }

 use cudarc::driver::sys::{
-    cuCtxGetCurrent, cuEventSynchronize, cudaError_enum, CUcontext, CUevent,
+    CUcontext, CUevent, cuCtxGetCurrent, cuEventSynchronize, cudaError_enum,
 };
 use std::ptr;


--- a/lib/bindings/python/rust/llm/block_manager/vllm/slot.rs
+++ b/lib/bindings/python/rust/llm/block_manager/vllm/slot.rs
@@ -67,7 +67,11 @@ impl<S: Storage, L: LocalityProvider> std::fmt::Debug for Slot<S, L> {
            .iter()
            .map(|b| b.block_id())
            .collect::<Vec<_>>();
-        write!(f, "Slot(computed_position: {}, prefill_position: {}, immutable_block_ids: {:?}, mutable_block_ids: {:?})", self.computed_position, self.prefill_position, immutable_block_ids, mutable_block_ids)
+        write!(
+            f,
+            "Slot(computed_position: {}, prefill_position: {}, immutable_block_ids: {:?}, mutable_block_ids: {:?})",
+            self.computed_position, self.prefill_position, immutable_block_ids, mutable_block_ids
+        )
    }
 }


--- a/lib/bindings/python/rust/llm/entrypoint.rs
+++ b/lib/bindings/python/rust/llm/entrypoint.rs
@@ -6,9 +6,9 @@ use std::path::PathBuf;

 use pyo3::{exceptions::PyException, prelude::*};

-use dynamo_llm::entrypoint::input::Input;
 use dynamo_llm::entrypoint::EngineConfig as RsEngineConfig;
 use dynamo_llm::entrypoint::RouterConfig as RsRouterConfig;
+use dynamo_llm::entrypoint::input::Input;
 use dynamo_llm::kv_router::KvRouterConfig as RsKvRouterConfig;
 use dynamo_llm::local_model::DEFAULT_HTTP_PORT;
 use dynamo_llm::local_model::{LocalModel, LocalModelBuilder};

--- a/lib/bindings/python/rust/llm/kv.rs
+++ b/lib/bindings/python/rust/llm/kv.rs
@@ -8,8 +8,8 @@ use tokio_stream::StreamExt;

 use super::*;
 use crate::Component;
-use llm_rs::kv_router::indexer::compute_block_hash_for_seq;
 use llm_rs::kv_router::indexer::KvIndexerInterface;
+use llm_rs::kv_router::indexer::compute_block_hash_for_seq;
 use llm_rs::kv_router::protocols::ForwardPassMetrics as RsForwardPassMetrics;
 use llm_rs::kv_router::protocols::KvStats as RsKvStats;
 use llm_rs::kv_router::protocols::SpecDecodeStats as RsSpecDecodeStats;
@@ -18,7 +18,7 @@ use rs::traits::events::EventSubscriber;
 use tracing;

 use llm_rs::kv_router::protocols::*;
-use llm_rs::kv_router::publisher::{create_stored_blocks, KvEventSourceConfig};
+use llm_rs::kv_router::publisher::{KvEventSourceConfig, create_stored_blocks};
 use llm_rs::protocols::common::{OutputOptions, SamplingOptions, StopConditions};

 #[pyfunction]

--- a/lib/bindings/python/rust/llm/preprocessor.rs
+++ b/lib/bindings/python/rust/llm/preprocessor.rs
@@ -8,10 +8,10 @@ use llm_rs::{
    preprocessor::OpenAIPreprocessor,
    protocols::common::llm_backend::{BackendOutput, PreprocessedRequest},
    types::{
+        Annotated,
        openai::chat_completions::{
            NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
        },
-        Annotated,
    },
 };


--- a/lib/llm/src/mocker/sequence.rs
+++ b/lib/llm/src/mocker/sequence.rs
@@ -28,7 +28,7 @@ fn create_unique_blocks_from_sequence(
        .collect();

    // Only push the partial block if tokens count isn't a multiple of block_size
-    if tokens.total_tokens() % block_size != 0 {
+    if !tokens.total_tokens().is_multiple_of(block_size) {
        unique_blocks.push(match uuid {
            Some(uuid) => UniqueBlock::PartialBlock(uuid),
            None => UniqueBlock::default(),
@@ -258,7 +258,7 @@ impl ActiveSequence {
        self.generated_tokens = self.generated_tokens.saturating_sub(1);

        // Reverts to the last full block
-        if self.tokens.total_tokens() % self.block_size == 0 {
+        if self.tokens.total_tokens().is_multiple_of(self.block_size) {
            self.unique_blocks.pop();
        }
    }