Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
bce74588
"docs/vscode:/vscode.git/clone" did not exist on "3c500ae7a9e8b8bc9dae8b558342eec79dc86106"
Unverified
Commit
bce74588
authored
Aug 22, 2025
by
Graham King
Committed by
GitHub
Aug 22, 2025
Browse files
chore: Rust to 1.89 and edition 2024 (#2659)
parent
268d017e
Changes
199
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
99 additions
and
100 deletions
+99
-100
lib/llm/src/block_manager/pool/managed.rs
lib/llm/src/block_manager/pool/managed.rs
+1
-1
lib/llm/src/block_manager/pool/managed/active.rs
lib/llm/src/block_manager/pool/managed/active.rs
+11
-11
lib/llm/src/block_manager/pool/managed/inactive.rs
lib/llm/src/block_manager/pool/managed/inactive.rs
+6
-4
lib/llm/src/block_manager/pool/managed/state.rs
lib/llm/src/block_manager/pool/managed/state.rs
+7
-9
lib/llm/src/block_manager/state.rs
lib/llm/src/block_manager/state.rs
+3
-3
lib/llm/src/block_manager/storage.rs
lib/llm/src/block_manager/storage.rs
+5
-2
lib/llm/src/block_manager/storage/arena.rs
lib/llm/src/block_manager/storage/arena.rs
+1
-1
lib/llm/src/block_manager/storage/cuda.rs
lib/llm/src/block_manager/storage/cuda.rs
+1
-1
lib/llm/src/block_manager/storage/disk.rs
lib/llm/src/block_manager/storage/disk.rs
+1
-1
lib/llm/src/block_manager/storage/nixl.rs
lib/llm/src/block_manager/storage/nixl.rs
+3
-3
lib/llm/src/cuda.rs
lib/llm/src/cuda.rs
+1
-1
lib/llm/src/disagg_router.rs
lib/llm/src/disagg_router.rs
+18
-18
lib/llm/src/discovery/model_manager.rs
lib/llm/src/discovery/model_manager.rs
+1
-1
lib/llm/src/discovery/watcher.rs
lib/llm/src/discovery/watcher.rs
+11
-14
lib/llm/src/engines.rs
lib/llm/src/engines.rs
+1
-1
lib/llm/src/entrypoint/input/batch.rs
lib/llm/src/entrypoint/input/batch.rs
+3
-3
lib/llm/src/entrypoint/input/common.rs
lib/llm/src/entrypoint/input/common.rs
+13
-13
lib/llm/src/entrypoint/input/endpoint.rs
lib/llm/src/entrypoint/input/endpoint.rs
+7
-8
lib/llm/src/entrypoint/input/http.rs
lib/llm/src/entrypoint/input/http.rs
+3
-3
lib/llm/src/entrypoint/input/text.rs
lib/llm/src/entrypoint/input/text.rs
+2
-2
No files found.
lib/llm/src/block_manager/pool/managed.rs
View file @
bce74588
...
...
@@ -589,7 +589,7 @@ impl<S: Storage, L: LocalityProvider + 'static, M: BlockMetadata> ProgressEngine
#[cfg(test)]
mod
tests
{
use
crate
::
block_manager
::
block
::{
BasicMetadata
,
Blocks
};
use
crate
::
block_manager
::
layout
::{
tests
::
setup_layout
,
FullyContiguous
,
LayoutConfig
};
use
crate
::
block_manager
::
layout
::{
FullyContiguous
,
LayoutConfig
,
tests
::
setup_layout
};
use
crate
::
block_manager
::
locality
::
Local
;
use
crate
::
tokens
::{
TokenBlockSequence
,
Tokens
};
...
...
lib/llm/src/block_manager/pool/managed/active.rs
View file @
bce74588
...
...
@@ -51,10 +51,10 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> ActiveBlockPool<S, L, M>
// Set the parent of the block if it has one.
// This is needed to ensure the lifetime of the parent is at least as long as the child.
if
let
Ok
(
Some
(
parent
))
=
block
.parent_sequence_hash
()
{
if
let
Some
(
parent_block
)
=
self
.match_sequence_hash
(
parent
)
{
block
.set_parent
(
parent_block
.mutable_block
()
.clone
());
}
if
let
Ok
(
Some
(
parent
))
=
block
.parent_sequence_hash
()
&&
let
Some
(
parent_block
)
=
self
.match_sequence_hash
(
parent
)
{
block
.set_parent
(
parent_block
.mutable_block
()
.clone
());
}
let
shared
=
Arc
::
new
(
block
);
...
...
@@ -78,14 +78,14 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> ActiveBlockPool<S, L, M>
}
pub
fn
remove
(
&
mut
self
,
block
:
&
mut
Block
<
S
,
L
,
M
>
)
{
if
let
Ok
(
sequence_hash
)
=
block
.sequence_hash
()
{
if
let
Some
(
weak
)
=
self
.map
.get
(
&
sequence_hash
)
{
if
let
Some
(
_
arc
)
=
weak
.upgrade
()
{
block
.reset
();
return
;
}
self
.map
.remove
(
&
sequence_hash
);
if
let
Ok
(
sequence_hash
)
=
block
.sequence_hash
()
&&
let
Some
(
weak
)
=
self
.map
.get
(
&
sequence_hash
)
{
if
let
Some
(
_
arc
)
=
weak
.upgrade
()
{
block
.reset
();
return
;
}
self
.map
.remove
(
&
sequence_hash
);
}
}
...
...
lib/llm/src/block_manager/pool/managed/inactive.rs
View file @
bce74588
...
...
@@ -15,7 +15,7 @@
use
std
::
sync
::
atomic
::
AtomicU64
;
use
crate
::
block_manager
::
block
::{
locality
::
LocalityProvider
,
BlockState
};
use
crate
::
block_manager
::
block
::{
BlockState
,
locality
::
LocalityProvider
};
use
super
::
*
;
use
priority_key
::
PriorityKey
;
...
...
@@ -113,7 +113,9 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> InactiveBlockPool<S, L,
fn
insert_with_sequence_hash
(
&
mut
self
,
block
:
Block
<
S
,
L
,
M
>
,
sequence_hash
:
SequenceHash
)
{
let
priority_key
=
PriorityKey
::
new
(
block
.metadata
()
.clone
(),
sequence_hash
);
if
self
.priority_set
.contains
(
&
priority_key
)
{
tracing
::
trace!
(
"multiple entries with the same sequence hash, resetting block and inserting into uninitialized set"
);
tracing
::
trace!
(
"multiple entries with the same sequence hash, resetting block and inserting into uninitialized set"
);
let
mut
block
=
block
;
block
.reset
();
self
.uninitialized_set
.push_back
(
block
);
...
...
@@ -546,8 +548,8 @@ pub(crate) mod tests {
use
crate
::{
block_manager
::{
block
::{
locality
::
Local
,
registry
::
BlockRegistry
,
state
::
CompleteState
,
Blocks
,
PrivateBlockExt
,
Blocks
,
PrivateBlockExt
,
locality
::
Local
,
registry
::
BlockRegistry
,
state
::
CompleteState
,
},
events
::
NullEventManager
,
layout
::{
BlockLayout
,
FullyContiguous
,
LayoutConfigBuilder
},
...
...
lib/llm/src/block_manager/pool/managed/state.rs
View file @
bce74588
...
...
@@ -14,7 +14,7 @@
// limitations under the License.
use
crate
::
block_manager
::{
block
::{
registry
::
BlockRegistrationError
,
BlockState
,
PrivateBlockExt
},
block
::{
BlockState
,
PrivateBlockExt
,
registry
::
BlockRegistrationError
},
events
::
Publisher
,
};
...
...
@@ -266,18 +266,16 @@ impl<S: Storage, L: LocalityProvider + 'static, M: BlockMetadata> State<S, L, M>
}
}
BlockRegistrationDuplicationSetting
::
Disabled
=>
{
if
let
Some
(
block
)
=
duplicate
{
if
let
Some
(
raw_blocks
)
=
block
.try_take_block
(
private
::
PrivateToken
)
{
self
.inactive
.return_blocks
(
raw_blocks
);
}
if
let
Some
(
block
)
=
duplicate
&&
let
Some
(
raw_blocks
)
=
block
.try_take_block
(
private
::
PrivateToken
)
{
self
.inactive
.return_blocks
(
raw_blocks
);
}
}
}
if
offload
{
if
let
Some
(
priority
)
=
immutable
.metadata
()
.offload_priority
()
{
immutable
.enqueue_offload
(
priority
)
.await
.unwrap
();
}
if
offload
&&
let
Some
(
priority
)
=
immutable
.metadata
()
.offload_priority
()
{
immutable
.enqueue_offload
(
priority
)
.await
.unwrap
();
}
immutable_blocks
.push
(
immutable
);
...
...
lib/llm/src/block_manager/state.rs
View file @
bce74588
...
...
@@ -17,7 +17,7 @@ mod local;
mod
logical
;
mod
resources
;
use
crate
::
block_manager
::
block
::{
factory
::
IntoBlocks
,
MutableBlock
};
use
crate
::
block_manager
::
block
::{
MutableBlock
,
factory
::
IntoBlocks
};
use
crate
::
block_manager
::
locality
::
LogicalResources
;
use
crate
::
block_manager
::
offload
::
request
::
BlockResult
;
...
...
@@ -26,8 +26,8 @@ use super::*;
// use super::offload::OffloadManager;
use
super
::{
block
::{
factory
::
LocalBlockDataFactory
,
locality
::
LocalityProvider
,
Block
,
GlobalRegist
ry
,
ImmutableBlock
,
Block
,
GlobalRegistry
,
ImmutableBlock
,
factory
::
LocalBlockDataFacto
ry
,
locality
::
LocalityProvider
,
},
config
::
NixlOptions
,
events
::{
EventManager
,
NullEventManager
},
...
...
lib/llm/src/block_manager/storage.rs
View file @
bce74588
...
...
@@ -88,7 +88,7 @@ pub use disk::*;
use
torch
::
*
;
use
std
::{
alloc
::{
alloc_zeroed
,
dealloc
,
Layout
},
alloc
::{
Layout
,
alloc_zeroed
,
dealloc
},
collections
::
HashMap
,
fmt
::
Debug
,
ptr
::
NonNull
,
...
...
@@ -322,7 +322,10 @@ impl std::fmt::Debug for RegistrationHandles {
impl
Drop
for
RegistrationHandles
{
fn
drop
(
&
mut
self
)
{
if
!
self
.handles
.is_empty
()
{
panic!
(
"RegistrationHandles dropped with {} handles remaining; RegistrationHandles::release() needs to be explicitly called"
,
self
.handles
.len
());
panic!
(
"RegistrationHandles dropped with {} handles remaining; RegistrationHandles::release() needs to be explicitly called"
,
self
.handles
.len
()
);
}
}
}
...
...
lib/llm/src/block_manager/storage/arena.rs
View file @
bce74588
...
...
@@ -207,7 +207,7 @@ mod nixl {
S
:
MemoryRegion
,
{
unsafe
fn
as_ptr
(
&
self
)
->
*
const
u8
{
Storage
::
as_ptr
(
self
.storage
.as_ref
())
unsafe
{
Storage
::
as_ptr
(
self
.storage
.as_ref
())
}
}
fn
size
(
&
self
)
->
usize
{
...
...
lib/llm/src/block_manager/storage/cuda.rs
View file @
bce74588
...
...
@@ -86,7 +86,7 @@ use std::{
sync
::{
Arc
,
Mutex
,
OnceLock
},
};
use
cudarc
::
driver
::{
sys
,
CudaContext
};
use
cudarc
::
driver
::{
CudaContext
,
sys
};
/// Trait for [Storage] types that can be accessed by CUDA
pub
trait
CudaAccessible
:
Storage
{}
...
...
lib/llm/src/block_manager/storage/disk.rs
View file @
bce74588
...
...
@@ -16,7 +16,7 @@
use
super
::
*
;
use
core
::
ffi
::
c_char
;
use
nix
::
fcntl
::{
fallocate
,
FallocateFlags
};
use
nix
::
fcntl
::{
FallocateFlags
,
fallocate
};
use
nix
::
unistd
::
unlink
;
use
std
::
ffi
::
CStr
;
use
std
::
ffi
::
CString
;
...
...
lib/llm/src/block_manager/storage/nixl.rs
View file @
bce74588
...
...
@@ -342,7 +342,7 @@ impl NixlRegisterableStorage for PinnedStorage {}
impl
MemoryRegion
for
PinnedStorage
{
unsafe
fn
as_ptr
(
&
self
)
->
*
const
u8
{
Storage
::
as_ptr
(
self
)
unsafe
{
Storage
::
as_ptr
(
self
)
}
}
fn
size
(
&
self
)
->
usize
{
...
...
@@ -367,7 +367,7 @@ impl NixlRegisterableStorage for DeviceStorage {}
impl
MemoryRegion
for
DeviceStorage
{
unsafe
fn
as_ptr
(
&
self
)
->
*
const
u8
{
Storage
::
as_ptr
(
self
)
unsafe
{
Storage
::
as_ptr
(
self
)
}
}
fn
size
(
&
self
)
->
usize
{
...
...
@@ -406,7 +406,7 @@ impl NixlRegisterableStorage for DiskStorage {
impl
MemoryRegion
for
DiskStorage
{
unsafe
fn
as_ptr
(
&
self
)
->
*
const
u8
{
Storage
::
as_ptr
(
self
)
unsafe
{
Storage
::
as_ptr
(
self
)
}
}
fn
size
(
&
self
)
->
usize
{
...
...
lib/llm/src/cuda.rs
View file @
bce74588
...
...
@@ -17,8 +17,8 @@
//! them within Dynamo.
use
cudarc
::
driver
::{
sys
::{
cuCtxPopCurrent_v2
,
cuCtxPushCurrent_v2
,
cudaError_enum
,
CUcontext
,
CUstream
},
CudaContext
,
CudaStream
,
sys
::{
CUcontext
,
CUstream
,
cuCtxPopCurrent_v2
,
cuCtxPushCurrent_v2
,
cudaError_enum
},
};
use
std
::
pin
::
Pin
;
use
std
::{
marker
::
PhantomData
,
sync
::
Arc
};
...
...
lib/llm/src/disagg_router.rs
View file @
bce74588
...
...
@@ -18,8 +18,8 @@ use std::sync::{Arc, Mutex};
use
tokio
::
sync
::
watch
;
use
tracing
;
use
dynamo_runtime
::
transports
::
etcd
::
WatchEvent
;
use
dynamo_runtime
::
DistributedRuntime
;
use
dynamo_runtime
::
transports
::
etcd
::
WatchEvent
;
#[derive(Clone,
Debug,
Serialize,
Deserialize)]
pub
struct
DisaggRouterConf
{
...
...
@@ -218,23 +218,23 @@ impl DisaggregatedRouter {
}
pub
fn
check_for_updates
(
&
self
)
{
if
let
Some
(
watcher
)
=
&
self
.config_watcher
{
if
watcher
.has_changed
()
.unwrap_or
(
false
)
{
let
config
=
watcher
.borrow
()
.clone
();
let
new_value
=
config
.max_local_prefill_length
;
// Update the value using the mutex
let
mut
current_value
=
self
.max_local_prefill_le
ngth
.lock
()
.unwrap
();
let
old_value
=
*
current_value
;
if
old_value
!
=
new
_value
{
*
current
_value
=
new_value
;
tracing
::
info!
(
"Applied config update for model {}: max_local_prefill_length changed from {} to {}"
,
self
.model_name
,
old_valu
e
,
new
_value
);
}
if
let
Some
(
watcher
)
=
&
self
.config_watcher
&&
watcher
.has_changed
()
.unwrap_or
(
false
)
{
let
config
=
watcher
.borrow
()
.clone
()
;
let
new_value
=
config
.max_local_prefill_length
;
// Update the value usi
ng
th
e mutex
let
mut
current_value
=
self
.max_local_prefill_length
.lock
()
.unwrap
()
;
let
old_value
=
*
current
_value
;
if
old
_value
!
=
new_value
{
*
current_value
=
new_value
;
tracing
::
info!
(
"Applied config update for model {}: max_local_prefill_length changed from {} to {}"
,
self
.model_nam
e
,
old
_value
,
new_value
);
}
}
}
...
...
lib/llm/src/discovery/model_manager.rs
View file @
bce74588
...
...
@@ -7,7 +7,7 @@ use dynamo_runtime::slug::Slug;
use
crate
::
discovery
::
ModelEntry
;
use
crate
::
kv_router
::{
scheduler
::
DefaultWorkerSelector
,
KvRouterConfig
};
use
crate
::
kv_router
::{
KvRouterConfig
,
scheduler
::
DefaultWorkerSelector
};
use
crate
::{
kv_router
::
KvRouter
,
types
::
openai
::{
...
...
lib/llm/src/discovery/watcher.rs
View file @
bce74588
...
...
@@ -5,16 +5,16 @@ use std::sync::Arc;
use
tokio
::
sync
::
mpsc
::
Sender
;
use
anyhow
::
Context
as
_
;
use
tokio
::
sync
::{
mpsc
::
Receiver
,
Notify
};
use
tokio
::
sync
::{
Notify
,
mpsc
::
Receiver
};
use
dynamo_runtime
::{
DistributedRuntime
,
pipeline
::{
network
::
egress
::
push_router
::
PushRouter
,
ManyOut
,
Operator
,
RouterMode
,
SegmentSource
,
ServiceBackend
,
SingleIn
,
Source
,
ManyOut
,
Operator
,
RouterMode
,
SegmentSource
,
ServiceBackend
,
SingleIn
,
Source
,
network
::
egress
::
push_router
::
PushRouter
,
},
protocols
::
annotated
::
Annotated
,
transports
::
etcd
::{
KeyValue
,
WatchEvent
},
DistributedRuntime
,
};
use
crate
::{
...
...
@@ -35,7 +35,7 @@ use crate::{
},
};
use
super
::{
ModelEntry
,
ModelManager
,
MODEL_ROOT_PATH
};
use
super
::{
MODEL_ROOT_PATH
,
ModelEntry
,
ModelManager
};
#[derive(Debug,
Clone,
Copy,
PartialEq)]
pub
enum
ModelUpdate
{
...
...
@@ -213,10 +213,8 @@ impl ModelWatcher {
);
update_tx
=
false
;
}
if
update_tx
{
if
let
Some
(
tx
)
=
&
self
.model_update_tx
{
tx
.send
(
ModelUpdate
::
Removed
(
model_type
))
.await
.ok
();
}
if
update_tx
&&
let
Some
(
tx
)
=
&
self
.model_update_tx
{
tx
.send
(
ModelUpdate
::
Removed
(
model_type
))
.await
.ok
();
}
return
Ok
(
None
);
}
...
...
@@ -251,13 +249,12 @@ impl ModelWatcher {
);
}
else
{
for
model_type
in
ALL_MODEL_TYPES
{
if
(
chat_model_removed
&&
*
model_type
==
ModelType
::
Chat
)
if
(
(
chat_model_removed
&&
*
model_type
==
ModelType
::
Chat
)
||
(
completions_model_removed
&&
*
model_type
==
ModelType
::
Completion
)
||
(
embeddings_model_removed
&&
*
model_type
==
ModelType
::
Embedding
)
||
(
embeddings_model_removed
&&
*
model_type
==
ModelType
::
Embedding
))
&&
let
Some
(
tx
)
=
&
self
.model_update_tx
{
if
let
Some
(
tx
)
=
&
self
.model_update_tx
{
tx
.send
(
ModelUpdate
::
Removed
(
*
model_type
))
.await
.ok
();
}
tx
.send
(
ModelUpdate
::
Removed
(
*
model_type
))
.await
.ok
();
}
}
}
...
...
lib/llm/src/engines.rs
View file @
bce74588
...
...
@@ -18,7 +18,7 @@ use crate::preprocessor::PreprocessedRequest;
use
crate
::
protocols
::
common
::
llm_backend
::
LLMEngineOutput
;
use
crate
::
protocols
::
openai
::{
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
},
completions
::{
prompt_to_string
,
NvCreateCompletionRequest
,
NvCreateCompletionResponse
},
completions
::{
NvCreateCompletionRequest
,
NvCreateCompletionResponse
,
prompt_to_string
},
};
use
crate
::
types
::
openai
::
embeddings
::
NvCreateEmbeddingRequest
;
use
crate
::
types
::
openai
::
embeddings
::
NvCreateEmbeddingResponse
;
...
...
lib/llm/src/entrypoint/input/batch.rs
View file @
bce74588
...
...
@@ -8,18 +8,18 @@ use crate::types::openai::chat_completions::{
};
use
anyhow
::
Context
as
_
;
use
dynamo_async_openai
::
types
::
FinishReason
;
use
dynamo_runtime
::{
pipeline
::
Context
,
runtime
::
CancellationToken
,
Runtime
};
use
dynamo_runtime
::{
Runtime
,
pipeline
::
Context
,
runtime
::
CancellationToken
};
use
futures
::
StreamExt
;
use
serde
::{
Deserialize
,
Serialize
};
use
std
::
cmp
;
use
std
::
path
::{
Path
,
PathBuf
};
use
std
::
sync
::
atomic
::{
AtomicU64
,
Ordering
};
use
std
::
sync
::
Arc
;
use
std
::
sync
::
atomic
::{
AtomicU64
,
Ordering
};
use
std
::
time
::{
Duration
,
Instant
};
use
tokio
::
io
::{
AsyncBufReadExt
,
AsyncWriteExt
};
use
crate
::
entrypoint
::
input
::
common
;
use
crate
::
entrypoint
::
EngineConfig
;
use
crate
::
entrypoint
::
input
::
common
;
/// Max tokens in each response.
/// TODO: For batch mode this should be the full context size of the model
...
...
lib/llm/src/entrypoint/input/common.rs
View file @
bce74588
...
...
@@ -5,7 +5,7 @@ use std::pin::Pin;
use
crate
::{
backend
::{
Backend
,
ExecutionContext
},
discovery
::{
ModelManager
,
ModelWatcher
,
MODEL_ROOT_PATH
},
discovery
::{
MODEL_ROOT_PATH
,
ModelManager
,
ModelWatcher
},
engines
::
StreamingEngineAdapter
,
entrypoint
::{
self
,
EngineConfig
},
kv_router
::{
KvPushRouter
,
KvRouter
},
...
...
@@ -15,15 +15,16 @@ use crate::{
protocols
::
common
::
llm_backend
::{
BackendOutput
,
LLMEngineOutput
,
PreprocessedRequest
},
request_template
::
RequestTemplate
,
types
::{
Annotated
,
openai
::
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
,
OpenAIChatCompletionsStreamingEngine
,
},
Annotated
,
},
};
use
dynamo_runtime
::{
DistributedRuntime
,
Runtime
,
component
::
Client
,
distributed
::
DistributedConfig
,
engine
::{
AsyncEngineStream
,
Data
},
...
...
@@ -31,7 +32,6 @@ use dynamo_runtime::{
Context
,
ManyOut
,
Operator
,
PushRouter
,
RouterMode
,
SegmentSource
,
ServiceBackend
,
ServiceEngine
,
ServiceFrontend
,
SingleIn
,
Source
,
},
DistributedRuntime
,
Runtime
,
};
use
std
::
sync
::
Arc
;
...
...
@@ -191,11 +191,11 @@ where
Req
:
Data
,
Resp
:
Data
,
OpenAIPreprocessor
:
Operator
<
Context
<
Req
>
,
Pin
<
Box
<
dyn
AsyncEngineStream
<
Annotated
<
Resp
>>>>
,
Context
<
PreprocessedRequest
>
,
Pin
<
Box
<
dyn
AsyncEngineStream
<
Annotated
<
BackendOutput
>>>>
,
>
,
Context
<
Req
>
,
Pin
<
Box
<
dyn
AsyncEngineStream
<
Annotated
<
Resp
>>>>
,
Context
<
PreprocessedRequest
>
,
Pin
<
Box
<
dyn
AsyncEngineStream
<
Annotated
<
BackendOutput
>>>>
,
>
,
{
let
frontend
=
ServiceFrontend
::
<
SingleIn
<
Req
>
,
ManyOut
<
Annotated
<
Resp
>>>
::
new
();
let
preprocessor
=
OpenAIPreprocessor
::
new
((
*
card
)
.clone
())
...
...
@@ -224,11 +224,11 @@ where
Req
:
Data
,
Resp
:
Data
,
OpenAIPreprocessor
:
Operator
<
Context
<
Req
>
,
Pin
<
Box
<
dyn
AsyncEngineStream
<
Annotated
<
Resp
>>>>
,
Context
<
PreprocessedRequest
>
,
Pin
<
Box
<
dyn
AsyncEngineStream
<
Annotated
<
BackendOutput
>>>>
,
>
,
Context
<
Req
>
,
Pin
<
Box
<
dyn
AsyncEngineStream
<
Annotated
<
Resp
>>>>
,
Context
<
PreprocessedRequest
>
,
Pin
<
Box
<
dyn
AsyncEngineStream
<
Annotated
<
BackendOutput
>>>>
,
>
,
{
let
frontend
=
SegmentSource
::
<
SingleIn
<
Req
>
,
ManyOut
<
Annotated
<
Resp
>>>
::
new
();
let
preprocessor
=
OpenAIPreprocessor
::
new
(
card
.clone
())
.await
?
.into_operator
();
...
...
lib/llm/src/entrypoint/input/endpoint.rs
View file @
bce74588
...
...
@@ -9,18 +9,18 @@ use crate::{
model_type
::
ModelType
,
preprocessor
::{
BackendOutput
,
PreprocessedRequest
},
types
::{
Annotated
,
openai
::
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
,
},
Annotated
,
},
};
use
dynamo_runtime
::
engine
::
AsyncEngineStream
;
use
dynamo_runtime
::
pipeline
::{
network
::
Ingress
,
Context
,
ManyOut
,
Operator
,
SegmentSource
,
ServiceBackend
,
SingleIn
,
Source
,
Context
,
ManyOut
,
Operator
,
SegmentSource
,
ServiceBackend
,
SingleIn
,
Source
,
network
::
Ingress
,
};
use
dynamo_runtime
::{
protocols
::
EndpointId
,
DistributedRuntime
};
use
dynamo_runtime
::{
DistributedRuntime
,
protocols
::
EndpointId
};
use
crate
::
entrypoint
::
EngineConfig
;
...
...
@@ -125,13 +125,12 @@ pub async fn run(
result
?
;
// Cleanup on shutdown
if
let
Some
(
mut
card
)
=
card
{
if
let
Err
(
err
)
=
card
if
let
Some
(
mut
card
)
=
card
&&
let
Err
(
err
)
=
card
.delete_from_nats
(
distributed_runtime
.nats_client
())
.await
{
tracing
::
error!
(
%
err
,
"delete_from_nats error on shutdown"
);
}
{
tracing
::
error!
(
%
err
,
"delete_from_nats error on shutdown"
);
}
Ok
(())
...
...
lib/llm/src/entrypoint/input/http.rs
View file @
bce74588
...
...
@@ -4,10 +4,10 @@
use
std
::
sync
::
Arc
;
use
crate
::{
discovery
::{
ModelManager
,
ModelUpdate
,
ModelWatcher
,
MODEL_ROOT_PATH
},
discovery
::{
MODEL_ROOT_PATH
,
ModelManager
,
ModelUpdate
,
ModelWatcher
},
endpoint_type
::
EndpointType
,
engines
::
StreamingEngineAdapter
,
entrypoint
::{
self
,
input
::
common
,
EngineConfig
},
entrypoint
::{
self
,
EngineConfig
,
input
::
common
},
http
::
service
::
service_v2
::{
self
,
HttpService
},
kv_router
::
KvRouterConfig
,
model_type
::
ModelType
,
...
...
@@ -17,8 +17,8 @@ use crate::{
},
};
use
dynamo_runtime
::
transports
::
etcd
;
use
dynamo_runtime
::{
distributed
::
DistributedConfig
,
pipeline
::
RouterMode
};
use
dynamo_runtime
::{
DistributedRuntime
,
Runtime
};
use
dynamo_runtime
::{
distributed
::
DistributedConfig
,
pipeline
::
RouterMode
};
/// Build and run an HTTP service
pub
async
fn
run
(
runtime
:
Runtime
,
engine_config
:
EngineConfig
)
->
anyhow
::
Result
<
()
>
{
...
...
lib/llm/src/entrypoint/input/text.rs
View file @
bce74588
...
...
@@ -6,12 +6,12 @@ use crate::request_template::RequestTemplate;
use
crate
::
types
::
openai
::
chat_completions
::{
NvCreateChatCompletionRequest
,
OpenAIChatCompletionsStreamingEngine
,
};
use
dynamo_runtime
::{
pipeline
::
Context
,
runtime
::
CancellationToken
,
Runtime
};
use
dynamo_runtime
::{
Runtime
,
pipeline
::
Context
,
runtime
::
CancellationToken
};
use
futures
::
StreamExt
;
use
std
::
io
::{
ErrorKind
,
Write
};
use
crate
::
entrypoint
::
input
::
common
;
use
crate
::
entrypoint
::
EngineConfig
;
use
crate
::
entrypoint
::
input
::
common
;
/// Max response tokens for each single query. Must be less than model context size.
/// TODO: Cmd line flag to overwrite this
...
...
Prev
1
2
3
4
5
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment