Unverified Commit bce74588 authored by Graham King's avatar Graham King Committed by GitHub
Browse files

chore: Rust to 1.89 and edition 2024 (#2659)

parent 268d017e
......@@ -18,7 +18,7 @@ resolver = "3"
[workspace.package]
version = "0.4.1"
edition = "2021"
edition = "2024"
description = "Dynamo Inference Framework"
authors = ["NVIDIA Inc. <sw-dl-dynamo@nvidia.com>"]
license = "Apache-2.0"
......
......@@ -91,13 +91,13 @@ rust-base:
ENV RUSTUP_HOME=/usr/local/rustup
ENV CARGO_HOME=/usr/local/cargo
ENV PATH=/usr/local/cargo/bin:$PATH
ENV RUST_VERSION=1.87.0
ENV RUST_VERSION=1.89.0
ENV RUSTARCH=x86_64-unknown-linux-gnu
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/x86_64-unknown-linux-gnu/rustup-init" && \
echo "a3339fb004c3d0bb9862ba0bce001861fe5cbde9c10d16591eb3f39ee6cd3e7f *rustup-init" | sha256sum -c - && \
chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain 1.87.0 --default-host x86_64-unknown-linux-gnu && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain 1.89.0 --default-host x86_64-unknown-linux-gnu && \
rm rustup-init && \
chmod -R a+w $RUSTUP_HOME $CARGO_HOME
......
......@@ -14,25 +14,25 @@
// limitations under the License.
use dynamo_llm::kv_router::{
KV_HIT_RATE_SUBJECT,
protocols::{ForwardPassMetrics, KvStats, WorkerStats},
scheduler::KVHitRateEvent,
KV_HIT_RATE_SUBJECT,
};
use dynamo_runtime::{
component::{service::EndpointStats, Namespace},
DistributedRuntime, Result, Runtime, Worker,
component::{Namespace, service::EndpointStats},
logging,
pipeline::{
async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,
ResponseStream, SingleIn,
AsyncEngine, AsyncEngineContextProvider, Error, ManyOut, ResponseStream, SingleIn,
async_trait, network::Ingress,
},
protocols::annotated::Annotated,
stream,
traits::events::EventPublisher,
DistributedRuntime, Result, Runtime, Worker,
};
use rand::Rng;
use std::sync::Arc;
use tokio::time::{interval, Duration};
use tokio::time::{Duration, interval};
fn main() -> Result<()> {
logging::init();
......
......@@ -76,8 +76,8 @@
//! Ok(())
//! }
use axum::{routing::get, Router};
use prometheus::{register_counter_vec, register_gauge_vec, Encoder, TextEncoder};
use axum::{Router, routing::get};
use prometheus::{Encoder, TextEncoder, register_counter_vec, register_gauge_vec};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::net::SocketAddr;
......@@ -88,7 +88,7 @@ use dynamo_llm::kv_router::scoring::Endpoint;
use dynamo_llm::kv_router::scoring::ProcessedEndpoints;
use dynamo_runtime::{
distributed::Component, error, service::EndpointInfo, utils::Duration, Result,
Result, distributed::Component, error, service::EndpointInfo, utils::Duration,
};
/// Configuration for metrics collection mode
......
......@@ -27,21 +27,20 @@
//! - ISL Blocks: Cumulative count of total blocks in all KV hit rate events
//! - Overlap Blocks: Cumulative count of blocks that were already in the KV cache
use clap::Parser;
use dynamo_llm::kv_router::scheduler::KVHitRateEvent;
use dynamo_llm::kv_router::KV_HIT_RATE_SUBJECT;
use dynamo_llm::kv_router::scheduler::KVHitRateEvent;
use dynamo_runtime::{
error, logging,
DistributedRuntime, ErrorContext, Result, Runtime, Worker, error, logging,
traits::events::{EventPublisher, EventSubscriber},
utils::{Duration, Instant},
DistributedRuntime, ErrorContext, Result, Runtime, Worker,
};
use futures::stream::StreamExt;
use std::sync::Arc;
// Import from our library
use metrics::{
collect_endpoints, extract_metrics, postprocess_metrics, LLMWorkerLoadCapacityConfig,
MetricsMode, PrometheusMetricsCollector,
LLMWorkerLoadCapacityConfig, MetricsMode, PrometheusMetricsCollector, collect_endpoints,
extract_metrics, postprocess_metrics,
};
/// CLI arguments for the metrics application
......@@ -274,7 +273,7 @@ mod tests {
#[test]
fn test_namespace_from_env() {
env::set_var("DYN_NAMESPACE", "test-namespace");
unsafe { env::set_var("DYN_NAMESPACE", "test-namespace") };
let args = Args::parse_from(["count", "--component", "comp", "--endpoint", "end"]);
assert_eq!(args.namespace, "test-namespace");
}
......
......@@ -26,13 +26,13 @@ use std::sync::Arc;
use clap::Parser;
use dynamo_llm::kv_router::{
KvRouter, WorkerSelector,
protocols::WorkerSelectionResult,
scheduler::{DefaultWorkerSelector, KvSchedulerError, SchedulingRequest},
KvRouter, WorkerSelector,
};
use dynamo_llm::local_model::runtime_config::ModelRuntimeConfig;
use dynamo_runtime::{
logging, pipeline::network::Ingress, DistributedRuntime, Result, Runtime, Worker,
DistributedRuntime, Result, Runtime, Worker, logging, pipeline::network::Ingress,
};
#[derive(Parser)]
......
......@@ -53,7 +53,7 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
PATH=/usr/local/cargo/bin:$PATH \
RUST_VERSION=1.87.0
RUST_VERSION=1.89.0
WORKDIR /opt/dynamo
......
......@@ -220,7 +220,7 @@ RUN apt update -y && \
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
PATH=/usr/local/cargo/bin:$PATH \
RUST_VERSION=1.87.0
RUST_VERSION=1.89.0
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
......
......@@ -212,7 +212,7 @@ RUN apt update -y && \
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
PATH=/usr/local/cargo/bin:$PATH \
RUST_VERSION=1.87.0
RUST_VERSION=1.89.0
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG SGLANG_IMAGE_TAG="v0.5.0rc2-cu126"
......@@ -23,7 +11,7 @@ ARG ARCH_ALT="x86_64"
ARG NIXL_UCX_REF="v1.19.0"
ARG NIXL_TAG="0.5.0"
ARG CMAKE_VERSION="3.31.8"
ARG RUST_VERSION="1.87.0"
ARG RUST_VERSION="1.89.0"
ARG CARGO_BUILD_JOBS="16"
RUN apt-get update -y && \
......
......@@ -188,7 +188,7 @@ RUN apt-get update && \
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
PATH=/usr/local/cargo/bin:$PATH \
RUST_VERSION=1.87.0
RUST_VERSION=1.89.0
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
......
......@@ -41,7 +41,7 @@ ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
PATH=/usr/local/cargo/bin:$PATH \
RUST_VERSION=1.87.0
RUST_VERSION=1.89.0
# Install Rust using RUSTARCH derived from ARCH_ALT
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
......
......@@ -238,7 +238,7 @@ RUN ARCH=$(dpkg --print-architecture) && \
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
PATH=/usr/local/cargo/bin:$PATH \
RUST_VERSION=1.87.0
RUST_VERSION=1.89.0
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
......
......@@ -17,8 +17,8 @@ use std::collections::HashMap;
use std::path::PathBuf;
use clap::ValueEnum;
use dynamo_llm::entrypoint::input::Input;
use dynamo_llm::entrypoint::RouterConfig;
use dynamo_llm::entrypoint::input::Input;
use dynamo_llm::kv_router::KvRouterConfig;
use dynamo_llm::local_model::LocalModel;
use dynamo_llm::mocker::protocols::MockEngineArgs;
......@@ -176,13 +176,19 @@ impl Flags {
match out_opt {
Output::Auto => {
if self.context_length.is_some() {
anyhow::bail!("'--context-length' flag should only be used on the worker node, not on the ingress");
anyhow::bail!(
"'--context-length' flag should only be used on the worker node, not on the ingress"
);
}
if self.kv_cache_block_size.is_some() {
anyhow::bail!("'--kv-cache-block-size' flag should only be used on the worker node, not on the ingress");
anyhow::bail!(
"'--kv-cache-block-size' flag should only be used on the worker node, not on the ingress"
);
}
if self.migration_limit.is_some() {
anyhow::bail!("'--migration-limit' flag should only be used on the worker node, not on the ingress");
anyhow::bail!(
"'--migration-limit' flag should only be used on the worker node, not on the ingress"
);
}
}
Output::Static(_) => {
......@@ -211,7 +217,9 @@ impl Flags {
#[cfg(feature = "llamacpp")]
Output::LlamaCpp => {
if !local_model.path().is_file() {
anyhow::bail!("--model-path should refer to a GGUF file. llama_cpp does not support safetensors.");
anyhow::bail!(
"--model-path should refer to a GGUF file. llama_cpp does not support safetensors."
);
}
}
Output::Mocker => {
......
......@@ -2,11 +2,11 @@
// SPDX-License-Identifier: Apache-2.0
use anyhow::Context as _;
use dynamo_llm::entrypoint::input::Input;
use dynamo_llm::entrypoint::EngineConfig;
use dynamo_llm::entrypoint::input::Input;
use dynamo_llm::local_model::{LocalModel, LocalModelBuilder};
use dynamo_runtime::distributed::DistributedConfig;
use dynamo_runtime::CancellationToken;
use dynamo_runtime::distributed::DistributedConfig;
use dynamo_runtime::{DistributedRuntime, Runtime};
mod flags;
......
......@@ -38,14 +38,14 @@ fn main() -> anyhow::Result<()> {
_ => {
return Err(anyhow::anyhow!(
"Invalid verbosity level. Valid values are v (debug) or vv (trace)"
))
));
}
},
Err(_) => "info",
};
if log_level != "info" {
std::env::set_var("DYN_LOG", log_level);
unsafe { std::env::set_var("DYN_LOG", log_level) };
}
logging::init();
......@@ -94,7 +94,9 @@ async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> {
}
"out" => {
if val == "sglang" || val == "trtllm" || val == "vllm" {
tracing::error!("To run the {val} engine please use the Python interface, see root README or look in directory `components/backends/`.");
tracing::error!(
"To run the {val} engine please use the Python interface, see root README or look in directory `components/backends/`."
);
std::process::exit(1);
}
......
......@@ -11,13 +11,13 @@
use serde::Serialize;
use crate::{
Client,
config::Config,
error::OpenAIError,
types::{
AssistantObject, CreateAssistantRequest, DeleteAssistantResponse, ListAssistantsResponse,
ModifyAssistantRequest,
},
Client,
};
/// Build assistants that can call models and use tools to perform tasks.
......
......@@ -11,6 +11,7 @@
use bytes::Bytes;
use crate::{
Client,
config::Config,
error::OpenAIError,
types::{
......@@ -19,7 +20,6 @@ use crate::{
CreateTranslationRequest, CreateTranslationResponseJson,
CreateTranslationResponseVerboseJson,
},
Client,
};
/// Turn audio into text or text into audio.
......
......@@ -10,7 +10,7 @@
use serde::Serialize;
use crate::{config::Config, error::OpenAIError, types::ListAuditLogsResponse, Client};
use crate::{Client, config::Config, error::OpenAIError, types::ListAuditLogsResponse};
/// Logs of user actions and configuration changes within this organization.
/// To log events, you must activate logging in the [Organization Settings](https://platform.openai.com/settings/organization/general).
......
......@@ -11,10 +11,10 @@
use serde::Serialize;
use crate::{
Client,
config::Config,
error::OpenAIError,
types::{Batch, BatchRequest, ListBatchesResponse},
Client,
};
/// Create large batches of API requests for asynchronous processing. The Batch API returns completions within 24 hours for a 50% discount.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment