Unverified Commit bce74588 authored by Graham King's avatar Graham King Committed by GitHub
Browse files

chore: Rust to 1.89 and edition 2024 (#2659)

parent 268d017e
...@@ -589,7 +589,7 @@ impl<S: Storage, L: LocalityProvider + 'static, M: BlockMetadata> ProgressEngine ...@@ -589,7 +589,7 @@ impl<S: Storage, L: LocalityProvider + 'static, M: BlockMetadata> ProgressEngine
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::block_manager::block::{BasicMetadata, Blocks}; use crate::block_manager::block::{BasicMetadata, Blocks};
use crate::block_manager::layout::{tests::setup_layout, FullyContiguous, LayoutConfig}; use crate::block_manager::layout::{FullyContiguous, LayoutConfig, tests::setup_layout};
use crate::block_manager::locality::Local; use crate::block_manager::locality::Local;
use crate::tokens::{TokenBlockSequence, Tokens}; use crate::tokens::{TokenBlockSequence, Tokens};
......
...@@ -51,11 +51,11 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> ActiveBlockPool<S, L, M> ...@@ -51,11 +51,11 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> ActiveBlockPool<S, L, M>
// Set the parent of the block if it has one. // Set the parent of the block if it has one.
// This is needed to ensure the lifetime of the parent is at least as long as the child. // This is needed to ensure the lifetime of the parent is at least as long as the child.
if let Ok(Some(parent)) = block.parent_sequence_hash() { if let Ok(Some(parent)) = block.parent_sequence_hash()
if let Some(parent_block) = self.match_sequence_hash(parent) { && let Some(parent_block) = self.match_sequence_hash(parent)
{
block.set_parent(parent_block.mutable_block().clone()); block.set_parent(parent_block.mutable_block().clone());
} }
}
let shared = Arc::new(block); let shared = Arc::new(block);
...@@ -78,8 +78,9 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> ActiveBlockPool<S, L, M> ...@@ -78,8 +78,9 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> ActiveBlockPool<S, L, M>
} }
pub fn remove(&mut self, block: &mut Block<S, L, M>) { pub fn remove(&mut self, block: &mut Block<S, L, M>) {
if let Ok(sequence_hash) = block.sequence_hash() { if let Ok(sequence_hash) = block.sequence_hash()
if let Some(weak) = self.map.get(&sequence_hash) { && let Some(weak) = self.map.get(&sequence_hash)
{
if let Some(_arc) = weak.upgrade() { if let Some(_arc) = weak.upgrade() {
block.reset(); block.reset();
return; return;
...@@ -87,7 +88,6 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> ActiveBlockPool<S, L, M> ...@@ -87,7 +88,6 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> ActiveBlockPool<S, L, M>
self.map.remove(&sequence_hash); self.map.remove(&sequence_hash);
} }
} }
}
pub fn match_sequence_hash( pub fn match_sequence_hash(
&mut self, &mut self,
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
use std::sync::atomic::AtomicU64; use std::sync::atomic::AtomicU64;
use crate::block_manager::block::{locality::LocalityProvider, BlockState}; use crate::block_manager::block::{BlockState, locality::LocalityProvider};
use super::*; use super::*;
use priority_key::PriorityKey; use priority_key::PriorityKey;
...@@ -113,7 +113,9 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> InactiveBlockPool<S, L, ...@@ -113,7 +113,9 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> InactiveBlockPool<S, L,
fn insert_with_sequence_hash(&mut self, block: Block<S, L, M>, sequence_hash: SequenceHash) { fn insert_with_sequence_hash(&mut self, block: Block<S, L, M>, sequence_hash: SequenceHash) {
let priority_key = PriorityKey::new(block.metadata().clone(), sequence_hash); let priority_key = PriorityKey::new(block.metadata().clone(), sequence_hash);
if self.priority_set.contains(&priority_key) { if self.priority_set.contains(&priority_key) {
tracing::trace!("multiple entries with the same sequence hash, resetting block and inserting into uninitialized set"); tracing::trace!(
"multiple entries with the same sequence hash, resetting block and inserting into uninitialized set"
);
let mut block = block; let mut block = block;
block.reset(); block.reset();
self.uninitialized_set.push_back(block); self.uninitialized_set.push_back(block);
...@@ -546,8 +548,8 @@ pub(crate) mod tests { ...@@ -546,8 +548,8 @@ pub(crate) mod tests {
use crate::{ use crate::{
block_manager::{ block_manager::{
block::{ block::{
locality::Local, registry::BlockRegistry, state::CompleteState, Blocks, Blocks, PrivateBlockExt, locality::Local, registry::BlockRegistry,
PrivateBlockExt, state::CompleteState,
}, },
events::NullEventManager, events::NullEventManager,
layout::{BlockLayout, FullyContiguous, LayoutConfigBuilder}, layout::{BlockLayout, FullyContiguous, LayoutConfigBuilder},
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
// limitations under the License. // limitations under the License.
use crate::block_manager::{ use crate::block_manager::{
block::{registry::BlockRegistrationError, BlockState, PrivateBlockExt}, block::{BlockState, PrivateBlockExt, registry::BlockRegistrationError},
events::Publisher, events::Publisher,
}; };
...@@ -266,19 +266,17 @@ impl<S: Storage, L: LocalityProvider + 'static, M: BlockMetadata> State<S, L, M> ...@@ -266,19 +266,17 @@ impl<S: Storage, L: LocalityProvider + 'static, M: BlockMetadata> State<S, L, M>
} }
} }
BlockRegistrationDuplicationSetting::Disabled => { BlockRegistrationDuplicationSetting::Disabled => {
if let Some(block) = duplicate { if let Some(block) = duplicate
if let Some(raw_blocks) = block.try_take_block(private::PrivateToken) { && let Some(raw_blocks) = block.try_take_block(private::PrivateToken)
{
self.inactive.return_blocks(raw_blocks); self.inactive.return_blocks(raw_blocks);
} }
} }
} }
}
if offload { if offload && let Some(priority) = immutable.metadata().offload_priority() {
if let Some(priority) = immutable.metadata().offload_priority() {
immutable.enqueue_offload(priority).await.unwrap(); immutable.enqueue_offload(priority).await.unwrap();
} }
}
immutable_blocks.push(immutable); immutable_blocks.push(immutable);
} }
......
...@@ -17,7 +17,7 @@ mod local; ...@@ -17,7 +17,7 @@ mod local;
mod logical; mod logical;
mod resources; mod resources;
use crate::block_manager::block::{factory::IntoBlocks, MutableBlock}; use crate::block_manager::block::{MutableBlock, factory::IntoBlocks};
use crate::block_manager::locality::LogicalResources; use crate::block_manager::locality::LogicalResources;
use crate::block_manager::offload::request::BlockResult; use crate::block_manager::offload::request::BlockResult;
...@@ -26,8 +26,8 @@ use super::*; ...@@ -26,8 +26,8 @@ use super::*;
// use super::offload::OffloadManager; // use super::offload::OffloadManager;
use super::{ use super::{
block::{ block::{
factory::LocalBlockDataFactory, locality::LocalityProvider, Block, GlobalRegistry, Block, GlobalRegistry, ImmutableBlock, factory::LocalBlockDataFactory,
ImmutableBlock, locality::LocalityProvider,
}, },
config::NixlOptions, config::NixlOptions,
events::{EventManager, NullEventManager}, events::{EventManager, NullEventManager},
......
...@@ -88,7 +88,7 @@ pub use disk::*; ...@@ -88,7 +88,7 @@ pub use disk::*;
use torch::*; use torch::*;
use std::{ use std::{
alloc::{alloc_zeroed, dealloc, Layout}, alloc::{Layout, alloc_zeroed, dealloc},
collections::HashMap, collections::HashMap,
fmt::Debug, fmt::Debug,
ptr::NonNull, ptr::NonNull,
...@@ -322,7 +322,10 @@ impl std::fmt::Debug for RegistrationHandles { ...@@ -322,7 +322,10 @@ impl std::fmt::Debug for RegistrationHandles {
impl Drop for RegistrationHandles { impl Drop for RegistrationHandles {
fn drop(&mut self) { fn drop(&mut self) {
if !self.handles.is_empty() { if !self.handles.is_empty() {
panic!("RegistrationHandles dropped with {} handles remaining; RegistrationHandles::release() needs to be explicitly called", self.handles.len()); panic!(
"RegistrationHandles dropped with {} handles remaining; RegistrationHandles::release() needs to be explicitly called",
self.handles.len()
);
} }
} }
} }
......
...@@ -207,7 +207,7 @@ mod nixl { ...@@ -207,7 +207,7 @@ mod nixl {
S: MemoryRegion, S: MemoryRegion,
{ {
unsafe fn as_ptr(&self) -> *const u8 { unsafe fn as_ptr(&self) -> *const u8 {
Storage::as_ptr(self.storage.as_ref()) unsafe { Storage::as_ptr(self.storage.as_ref()) }
} }
fn size(&self) -> usize { fn size(&self) -> usize {
......
...@@ -86,7 +86,7 @@ use std::{ ...@@ -86,7 +86,7 @@ use std::{
sync::{Arc, Mutex, OnceLock}, sync::{Arc, Mutex, OnceLock},
}; };
use cudarc::driver::{sys, CudaContext}; use cudarc::driver::{CudaContext, sys};
/// Trait for [Storage] types that can be accessed by CUDA /// Trait for [Storage] types that can be accessed by CUDA
pub trait CudaAccessible: Storage {} pub trait CudaAccessible: Storage {}
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
use super::*; use super::*;
use core::ffi::c_char; use core::ffi::c_char;
use nix::fcntl::{fallocate, FallocateFlags}; use nix::fcntl::{FallocateFlags, fallocate};
use nix::unistd::unlink; use nix::unistd::unlink;
use std::ffi::CStr; use std::ffi::CStr;
use std::ffi::CString; use std::ffi::CString;
......
...@@ -342,7 +342,7 @@ impl NixlRegisterableStorage for PinnedStorage {} ...@@ -342,7 +342,7 @@ impl NixlRegisterableStorage for PinnedStorage {}
impl MemoryRegion for PinnedStorage { impl MemoryRegion for PinnedStorage {
unsafe fn as_ptr(&self) -> *const u8 { unsafe fn as_ptr(&self) -> *const u8 {
Storage::as_ptr(self) unsafe { Storage::as_ptr(self) }
} }
fn size(&self) -> usize { fn size(&self) -> usize {
...@@ -367,7 +367,7 @@ impl NixlRegisterableStorage for DeviceStorage {} ...@@ -367,7 +367,7 @@ impl NixlRegisterableStorage for DeviceStorage {}
impl MemoryRegion for DeviceStorage { impl MemoryRegion for DeviceStorage {
unsafe fn as_ptr(&self) -> *const u8 { unsafe fn as_ptr(&self) -> *const u8 {
Storage::as_ptr(self) unsafe { Storage::as_ptr(self) }
} }
fn size(&self) -> usize { fn size(&self) -> usize {
...@@ -406,7 +406,7 @@ impl NixlRegisterableStorage for DiskStorage { ...@@ -406,7 +406,7 @@ impl NixlRegisterableStorage for DiskStorage {
impl MemoryRegion for DiskStorage { impl MemoryRegion for DiskStorage {
unsafe fn as_ptr(&self) -> *const u8 { unsafe fn as_ptr(&self) -> *const u8 {
Storage::as_ptr(self) unsafe { Storage::as_ptr(self) }
} }
fn size(&self) -> usize { fn size(&self) -> usize {
......
...@@ -17,8 +17,8 @@ ...@@ -17,8 +17,8 @@
//! them within Dynamo. //! them within Dynamo.
use cudarc::driver::{ use cudarc::driver::{
sys::{cuCtxPopCurrent_v2, cuCtxPushCurrent_v2, cudaError_enum, CUcontext, CUstream},
CudaContext, CudaStream, CudaContext, CudaStream,
sys::{CUcontext, CUstream, cuCtxPopCurrent_v2, cuCtxPushCurrent_v2, cudaError_enum},
}; };
use std::pin::Pin; use std::pin::Pin;
use std::{marker::PhantomData, sync::Arc}; use std::{marker::PhantomData, sync::Arc};
......
...@@ -18,8 +18,8 @@ use std::sync::{Arc, Mutex}; ...@@ -18,8 +18,8 @@ use std::sync::{Arc, Mutex};
use tokio::sync::watch; use tokio::sync::watch;
use tracing; use tracing;
use dynamo_runtime::transports::etcd::WatchEvent;
use dynamo_runtime::DistributedRuntime; use dynamo_runtime::DistributedRuntime;
use dynamo_runtime::transports::etcd::WatchEvent;
#[derive(Clone, Debug, Serialize, Deserialize)] #[derive(Clone, Debug, Serialize, Deserialize)]
pub struct DisaggRouterConf { pub struct DisaggRouterConf {
...@@ -218,8 +218,9 @@ impl DisaggregatedRouter { ...@@ -218,8 +218,9 @@ impl DisaggregatedRouter {
} }
pub fn check_for_updates(&self) { pub fn check_for_updates(&self) {
if let Some(watcher) = &self.config_watcher { if let Some(watcher) = &self.config_watcher
if watcher.has_changed().unwrap_or(false) { && watcher.has_changed().unwrap_or(false)
{
let config = watcher.borrow().clone(); let config = watcher.borrow().clone();
let new_value = config.max_local_prefill_length; let new_value = config.max_local_prefill_length;
...@@ -237,7 +238,6 @@ impl DisaggregatedRouter { ...@@ -237,7 +238,6 @@ impl DisaggregatedRouter {
} }
} }
} }
}
pub fn prefill_remote(&self, prefill_length: i32, prefix_hit_length: i32) -> bool { pub fn prefill_remote(&self, prefill_length: i32, prefix_hit_length: i32) -> bool {
// Check for updates before making the decision // Check for updates before making the decision
......
...@@ -7,7 +7,7 @@ use dynamo_runtime::slug::Slug; ...@@ -7,7 +7,7 @@ use dynamo_runtime::slug::Slug;
use crate::discovery::ModelEntry; use crate::discovery::ModelEntry;
use crate::kv_router::{scheduler::DefaultWorkerSelector, KvRouterConfig}; use crate::kv_router::{KvRouterConfig, scheduler::DefaultWorkerSelector};
use crate::{ use crate::{
kv_router::KvRouter, kv_router::KvRouter,
types::openai::{ types::openai::{
......
...@@ -5,16 +5,16 @@ use std::sync::Arc; ...@@ -5,16 +5,16 @@ use std::sync::Arc;
use tokio::sync::mpsc::Sender; use tokio::sync::mpsc::Sender;
use anyhow::Context as _; use anyhow::Context as _;
use tokio::sync::{mpsc::Receiver, Notify}; use tokio::sync::{Notify, mpsc::Receiver};
use dynamo_runtime::{ use dynamo_runtime::{
DistributedRuntime,
pipeline::{ pipeline::{
network::egress::push_router::PushRouter, ManyOut, Operator, RouterMode, SegmentSource, ManyOut, Operator, RouterMode, SegmentSource, ServiceBackend, SingleIn, Source,
ServiceBackend, SingleIn, Source, network::egress::push_router::PushRouter,
}, },
protocols::annotated::Annotated, protocols::annotated::Annotated,
transports::etcd::{KeyValue, WatchEvent}, transports::etcd::{KeyValue, WatchEvent},
DistributedRuntime,
}; };
use crate::{ use crate::{
...@@ -35,7 +35,7 @@ use crate::{ ...@@ -35,7 +35,7 @@ use crate::{
}, },
}; };
use super::{ModelEntry, ModelManager, MODEL_ROOT_PATH}; use super::{MODEL_ROOT_PATH, ModelEntry, ModelManager};
#[derive(Debug, Clone, Copy, PartialEq)] #[derive(Debug, Clone, Copy, PartialEq)]
pub enum ModelUpdate { pub enum ModelUpdate {
...@@ -213,11 +213,9 @@ impl ModelWatcher { ...@@ -213,11 +213,9 @@ impl ModelWatcher {
); );
update_tx = false; update_tx = false;
} }
if update_tx { if update_tx && let Some(tx) = &self.model_update_tx {
if let Some(tx) = &self.model_update_tx {
tx.send(ModelUpdate::Removed(model_type)).await.ok(); tx.send(ModelUpdate::Removed(model_type)).await.ok();
} }
}
return Ok(None); return Ok(None);
} }
...@@ -251,16 +249,15 @@ impl ModelWatcher { ...@@ -251,16 +249,15 @@ impl ModelWatcher {
); );
} else { } else {
for model_type in ALL_MODEL_TYPES { for model_type in ALL_MODEL_TYPES {
if (chat_model_removed && *model_type == ModelType::Chat) if ((chat_model_removed && *model_type == ModelType::Chat)
|| (completions_model_removed && *model_type == ModelType::Completion) || (completions_model_removed && *model_type == ModelType::Completion)
|| (embeddings_model_removed && *model_type == ModelType::Embedding) || (embeddings_model_removed && *model_type == ModelType::Embedding))
&& let Some(tx) = &self.model_update_tx
{ {
if let Some(tx) = &self.model_update_tx {
tx.send(ModelUpdate::Removed(*model_type)).await.ok(); tx.send(ModelUpdate::Removed(*model_type)).await.ok();
} }
} }
} }
}
Ok(Some(model_name)) Ok(Some(model_name))
} }
......
...@@ -18,7 +18,7 @@ use crate::preprocessor::PreprocessedRequest; ...@@ -18,7 +18,7 @@ use crate::preprocessor::PreprocessedRequest;
use crate::protocols::common::llm_backend::LLMEngineOutput; use crate::protocols::common::llm_backend::LLMEngineOutput;
use crate::protocols::openai::{ use crate::protocols::openai::{
chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse}, chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse},
completions::{prompt_to_string, NvCreateCompletionRequest, NvCreateCompletionResponse}, completions::{NvCreateCompletionRequest, NvCreateCompletionResponse, prompt_to_string},
}; };
use crate::types::openai::embeddings::NvCreateEmbeddingRequest; use crate::types::openai::embeddings::NvCreateEmbeddingRequest;
use crate::types::openai::embeddings::NvCreateEmbeddingResponse; use crate::types::openai::embeddings::NvCreateEmbeddingResponse;
......
...@@ -8,18 +8,18 @@ use crate::types::openai::chat_completions::{ ...@@ -8,18 +8,18 @@ use crate::types::openai::chat_completions::{
}; };
use anyhow::Context as _; use anyhow::Context as _;
use dynamo_async_openai::types::FinishReason; use dynamo_async_openai::types::FinishReason;
use dynamo_runtime::{pipeline::Context, runtime::CancellationToken, Runtime}; use dynamo_runtime::{Runtime, pipeline::Context, runtime::CancellationToken};
use futures::StreamExt; use futures::StreamExt;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::cmp; use std::cmp;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc; use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use tokio::io::{AsyncBufReadExt, AsyncWriteExt}; use tokio::io::{AsyncBufReadExt, AsyncWriteExt};
use crate::entrypoint::input::common;
use crate::entrypoint::EngineConfig; use crate::entrypoint::EngineConfig;
use crate::entrypoint::input::common;
/// Max tokens in each response. /// Max tokens in each response.
/// TODO: For batch mode this should be the full context size of the model /// TODO: For batch mode this should be the full context size of the model
......
...@@ -5,7 +5,7 @@ use std::pin::Pin; ...@@ -5,7 +5,7 @@ use std::pin::Pin;
use crate::{ use crate::{
backend::{Backend, ExecutionContext}, backend::{Backend, ExecutionContext},
discovery::{ModelManager, ModelWatcher, MODEL_ROOT_PATH}, discovery::{MODEL_ROOT_PATH, ModelManager, ModelWatcher},
engines::StreamingEngineAdapter, engines::StreamingEngineAdapter,
entrypoint::{self, EngineConfig}, entrypoint::{self, EngineConfig},
kv_router::{KvPushRouter, KvRouter}, kv_router::{KvPushRouter, KvRouter},
...@@ -15,15 +15,16 @@ use crate::{ ...@@ -15,15 +15,16 @@ use crate::{
protocols::common::llm_backend::{BackendOutput, LLMEngineOutput, PreprocessedRequest}, protocols::common::llm_backend::{BackendOutput, LLMEngineOutput, PreprocessedRequest},
request_template::RequestTemplate, request_template::RequestTemplate,
types::{ types::{
Annotated,
openai::chat_completions::{ openai::chat_completions::{
NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse, NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
OpenAIChatCompletionsStreamingEngine, OpenAIChatCompletionsStreamingEngine,
}, },
Annotated,
}, },
}; };
use dynamo_runtime::{ use dynamo_runtime::{
DistributedRuntime, Runtime,
component::Client, component::Client,
distributed::DistributedConfig, distributed::DistributedConfig,
engine::{AsyncEngineStream, Data}, engine::{AsyncEngineStream, Data},
...@@ -31,7 +32,6 @@ use dynamo_runtime::{ ...@@ -31,7 +32,6 @@ use dynamo_runtime::{
Context, ManyOut, Operator, PushRouter, RouterMode, SegmentSource, ServiceBackend, Context, ManyOut, Operator, PushRouter, RouterMode, SegmentSource, ServiceBackend,
ServiceEngine, ServiceFrontend, SingleIn, Source, ServiceEngine, ServiceFrontend, SingleIn, Source,
}, },
DistributedRuntime, Runtime,
}; };
use std::sync::Arc; use std::sync::Arc;
......
...@@ -9,18 +9,18 @@ use crate::{ ...@@ -9,18 +9,18 @@ use crate::{
model_type::ModelType, model_type::ModelType,
preprocessor::{BackendOutput, PreprocessedRequest}, preprocessor::{BackendOutput, PreprocessedRequest},
types::{ types::{
Annotated,
openai::chat_completions::{ openai::chat_completions::{
NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse, NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
}, },
Annotated,
}, },
}; };
use dynamo_runtime::engine::AsyncEngineStream; use dynamo_runtime::engine::AsyncEngineStream;
use dynamo_runtime::pipeline::{ use dynamo_runtime::pipeline::{
network::Ingress, Context, ManyOut, Operator, SegmentSource, ServiceBackend, SingleIn, Source, Context, ManyOut, Operator, SegmentSource, ServiceBackend, SingleIn, Source, network::Ingress,
}; };
use dynamo_runtime::{protocols::EndpointId, DistributedRuntime}; use dynamo_runtime::{DistributedRuntime, protocols::EndpointId};
use crate::entrypoint::EngineConfig; use crate::entrypoint::EngineConfig;
...@@ -125,14 +125,13 @@ pub async fn run( ...@@ -125,14 +125,13 @@ pub async fn run(
result?; result?;
// Cleanup on shutdown // Cleanup on shutdown
if let Some(mut card) = card { if let Some(mut card) = card
if let Err(err) = card && let Err(err) = card
.delete_from_nats(distributed_runtime.nats_client()) .delete_from_nats(distributed_runtime.nats_client())
.await .await
{ {
tracing::error!(%err, "delete_from_nats error on shutdown"); tracing::error!(%err, "delete_from_nats error on shutdown");
} }
}
Ok(()) Ok(())
} }
......
...@@ -4,10 +4,10 @@ ...@@ -4,10 +4,10 @@
use std::sync::Arc; use std::sync::Arc;
use crate::{ use crate::{
discovery::{ModelManager, ModelUpdate, ModelWatcher, MODEL_ROOT_PATH}, discovery::{MODEL_ROOT_PATH, ModelManager, ModelUpdate, ModelWatcher},
endpoint_type::EndpointType, endpoint_type::EndpointType,
engines::StreamingEngineAdapter, engines::StreamingEngineAdapter,
entrypoint::{self, input::common, EngineConfig}, entrypoint::{self, EngineConfig, input::common},
http::service::service_v2::{self, HttpService}, http::service::service_v2::{self, HttpService},
kv_router::KvRouterConfig, kv_router::KvRouterConfig,
model_type::ModelType, model_type::ModelType,
...@@ -17,8 +17,8 @@ use crate::{ ...@@ -17,8 +17,8 @@ use crate::{
}, },
}; };
use dynamo_runtime::transports::etcd; use dynamo_runtime::transports::etcd;
use dynamo_runtime::{distributed::DistributedConfig, pipeline::RouterMode};
use dynamo_runtime::{DistributedRuntime, Runtime}; use dynamo_runtime::{DistributedRuntime, Runtime};
use dynamo_runtime::{distributed::DistributedConfig, pipeline::RouterMode};
/// Build and run an HTTP service /// Build and run an HTTP service
pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Result<()> { pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Result<()> {
......
...@@ -6,12 +6,12 @@ use crate::request_template::RequestTemplate; ...@@ -6,12 +6,12 @@ use crate::request_template::RequestTemplate;
use crate::types::openai::chat_completions::{ use crate::types::openai::chat_completions::{
NvCreateChatCompletionRequest, OpenAIChatCompletionsStreamingEngine, NvCreateChatCompletionRequest, OpenAIChatCompletionsStreamingEngine,
}; };
use dynamo_runtime::{pipeline::Context, runtime::CancellationToken, Runtime}; use dynamo_runtime::{Runtime, pipeline::Context, runtime::CancellationToken};
use futures::StreamExt; use futures::StreamExt;
use std::io::{ErrorKind, Write}; use std::io::{ErrorKind, Write};
use crate::entrypoint::input::common;
use crate::entrypoint::EngineConfig; use crate::entrypoint::EngineConfig;
use crate::entrypoint::input::common;
/// Max response tokens for each single query. Must be less than model context size. /// Max response tokens for each single query. Must be less than model context size.
/// TODO: Cmd line flag to overwrite this /// TODO: Cmd line flag to overwrite this
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment