Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
bce74588
"docs/kubernetes/deployment/multinode-deployment.md" did not exist on "9e6972a548c44e78361ca1296d36f862bbe4dbae"
Unverified
Commit
bce74588
authored
Aug 22, 2025
by
Graham King
Committed by
GitHub
Aug 22, 2025
Browse files
chore: Rust to 1.89 and edition 2024 (#2659)
parent
268d017e
Changes
199
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
110 additions
and
105 deletions
+110
-105
lib/llm/src/block_manager/block/data/logical.rs
lib/llm/src/block_manager/block/data/logical.rs
+1
-1
lib/llm/src/block_manager/block/locality.rs
lib/llm/src/block_manager/block/locality.rs
+1
-1
lib/llm/src/block_manager/block/registry.rs
lib/llm/src/block_manager/block/registry.rs
+22
-22
lib/llm/src/block_manager/block/state.rs
lib/llm/src/block_manager/block/state.rs
+1
-1
lib/llm/src/block_manager/block/transfer.rs
lib/llm/src/block_manager/block/transfer.rs
+1
-1
lib/llm/src/block_manager/block/transfer/context.rs
lib/llm/src/block_manager/block/transfer/context.rs
+5
-5
lib/llm/src/block_manager/block/transfer/cuda.rs
lib/llm/src/block_manager/block/transfer/cuda.rs
+14
-8
lib/llm/src/block_manager/block/transfer/memcpy.rs
lib/llm/src/block_manager/block/transfer/memcpy.rs
+1
-1
lib/llm/src/block_manager/connector/protocol.rs
lib/llm/src/block_manager/connector/protocol.rs
+11
-12
lib/llm/src/block_manager/controller.rs
lib/llm/src/block_manager/controller.rs
+2
-2
lib/llm/src/block_manager/distributed.rs
lib/llm/src/block_manager/distributed.rs
+4
-4
lib/llm/src/block_manager/distributed/transfer.rs
lib/llm/src/block_manager/distributed/transfer.rs
+4
-6
lib/llm/src/block_manager/distributed/worker.rs
lib/llm/src/block_manager/distributed/worker.rs
+4
-4
lib/llm/src/block_manager/distributed/zmq.rs
lib/llm/src/block_manager/distributed/zmq.rs
+5
-5
lib/llm/src/block_manager/layout/nixl.rs
lib/llm/src/block_manager/layout/nixl.rs
+1
-1
lib/llm/src/block_manager/metrics.rs
lib/llm/src/block_manager/metrics.rs
+2
-2
lib/llm/src/block_manager/offload.rs
lib/llm/src/block_manager/offload.rs
+24
-22
lib/llm/src/block_manager/offload/pending.rs
lib/llm/src/block_manager/offload/pending.rs
+3
-3
lib/llm/src/block_manager/offload/request.rs
lib/llm/src/block_manager/offload/request.rs
+1
-1
lib/llm/src/block_manager/pool.rs
lib/llm/src/block_manager/pool.rs
+3
-3
No files found.
lib/llm/src/block_manager/block/data/logical.rs
View file @
bce74588
...
@@ -7,8 +7,8 @@ pub mod distributed_leader_worker;
...
@@ -7,8 +7,8 @@ pub mod distributed_leader_worker;
pub
mod
null
;
pub
mod
null
;
use
crate
::
block_manager
::
block
::{
use
crate
::
block_manager
::
block
::{
transfer
::{
TransferContext
,
TransferError
,
WriteToStrategy
},
BlockDataProvider
,
ReadableBlock
,
WritableBlock
,
BlockDataProvider
,
ReadableBlock
,
WritableBlock
,
transfer
::{
TransferContext
,
TransferError
,
WriteToStrategy
},
};
};
use
crate
::
block_manager
::
locality
::
Logical
;
use
crate
::
block_manager
::
locality
::
Logical
;
use
crate
::
block_manager
::
storage
::{
self
,
nixl
::
NixlDescriptor
};
use
crate
::
block_manager
::
storage
::{
self
,
nixl
::
NixlDescriptor
};
...
...
lib/llm/src/block_manager/block/locality.rs
View file @
bce74588
...
@@ -16,7 +16,7 @@
...
@@ -16,7 +16,7 @@
use
super
::
*
;
use
super
::
*
;
use
crate
::
block_manager
::
block
::
transfer
::{
use
crate
::
block_manager
::
block
::
transfer
::{
handle_local_transfer
,
TransferContext
,
TransferError
,
WriteToStrategy
,
TransferContext
,
TransferError
,
WriteToStrategy
,
handle_local_transfer
,
};
};
use
crate
::
block_manager
::
storage
::{
self
,
nixl
::
NixlDescriptor
};
use
crate
::
block_manager
::
storage
::{
self
,
nixl
::
NixlDescriptor
};
...
...
lib/llm/src/block_manager/block/registry.rs
View file @
bce74588
...
@@ -109,21 +109,21 @@ impl BlockRegistry {
...
@@ -109,21 +109,21 @@ impl BlockRegistry {
{
{
let
mut
blocks
=
blocks
.lock
()
.unwrap
();
let
mut
blocks
=
blocks
.lock
()
.unwrap
();
if
let
Some
(
handle
)
=
blocks
.get
(
&
sequence_hash
)
{
if
let
Some
(
handle
)
=
blocks
.get
(
&
sequence_hash
)
if
handle
.upgrade
()
.is_none
()
{
&&
handle
.upgrade
()
.is_none
()
{
blocks
.remove
(
&
sequence_hash
);
blocks
.remove
(
&
sequence_hash
);
}
}
}
}
}
let
mut
global_registry
=
global_registry
.lock
()
.unwrap
();
let
mut
global_registry
=
global_registry
.lock
()
.unwrap
();
if
let
Some
(
entry
)
=
global_registry
.get
(
&
sequence_hash
)
{
if
let
Some
(
entry
)
=
global_registry
.get
(
&
sequence_hash
)
if
entry
.upgrade
()
.is_none
()
{
&&
entry
.upgrade
()
.is_none
()
{
global_registry
.remove
(
&
sequence_hash
);
global_registry
.remove
(
&
sequence_hash
);
}
}
}
}
}
});
});
Self
{
Self
{
...
@@ -136,11 +136,11 @@ impl BlockRegistry {
...
@@ -136,11 +136,11 @@ impl BlockRegistry {
pub
fn
is_registered
(
&
self
,
sequence_hash
:
SequenceHash
)
->
bool
{
pub
fn
is_registered
(
&
self
,
sequence_hash
:
SequenceHash
)
->
bool
{
let
blocks
=
self
.blocks
.lock
()
.unwrap
();
let
blocks
=
self
.blocks
.lock
()
.unwrap
();
if
let
Some
(
handle
)
=
blocks
.get
(
&
sequence_hash
)
{
if
let
Some
(
handle
)
=
blocks
.get
(
&
sequence_hash
)
if
let
Some
(
_
handle
)
=
handle
.upgrade
()
{
&&
let
Some
(
_
handle
)
=
handle
.upgrade
()
{
return
true
;
return
true
;
}
}
}
false
false
}
}
...
@@ -161,13 +161,13 @@ impl BlockRegistry {
...
@@ -161,13 +161,13 @@ impl BlockRegistry {
let
mut
blocks
=
self
.blocks
.lock
()
.unwrap
();
let
mut
blocks
=
self
.blocks
.lock
()
.unwrap
();
// If an identical block already exists in this pool, return an error.
// If an identical block already exists in this pool, return an error.
if
let
Some
(
handle
)
=
blocks
.get
(
&
sequence_hash
)
{
if
let
Some
(
handle
)
=
blocks
.get
(
&
sequence_hash
)
if
let
Some
(
_
handle
)
=
handle
.upgrade
()
{
&&
let
Some
(
_
handle
)
=
handle
.upgrade
()
{
return
Err
(
BlockRegistrationError
::
BlockAlreadyRegistered
(
return
Err
(
BlockRegistrationError
::
BlockAlreadyRegistered
(
sequence_hash
,
sequence_hash
,
));
));
}
}
}
let
mut
publish_handle
=
None
;
let
mut
publish_handle
=
None
;
...
@@ -179,11 +179,11 @@ impl BlockRegistry {
...
@@ -179,11 +179,11 @@ impl BlockRegistry {
let
mut
global_registry
=
self
.global_registry
.lock
()
.unwrap
();
let
mut
global_registry
=
self
.global_registry
.lock
()
.unwrap
();
// If an identical block exists in other pool, use the same registration handle.
// If an identical block exists in other pool, use the same registration handle.
if
let
Some
(
handle
)
=
global_registry
.get
(
&
sequence_hash
)
{
if
let
Some
(
handle
)
=
global_registry
.get
(
&
sequence_hash
)
if
let
Some
(
handle
)
=
handle
.upgrade
()
{
&&
let
Some
(
handle
)
=
handle
.upgrade
()
{
break
'reg_block
handle
;
break
'reg_block
handle
;
}
}
}
// Otherwise, create a new registration handle.
// Otherwise, create a new registration handle.
publish_handle
=
Some
(
Self
::
create_publish_handle
(
publish_handle
=
Some
(
Self
::
create_publish_handle
(
...
...
lib/llm/src/block_manager/block/state.rs
View file @
bce74588
...
@@ -17,8 +17,8 @@ use std::sync::Arc;
...
@@ -17,8 +17,8 @@ use std::sync::Arc;
use
derive_getters
::
Getters
;
use
derive_getters
::
Getters
;
use
super
::
registry
::{
BlockHandle
,
RegistrationHandle
};
use
super
::
Result
;
use
super
::
Result
;
use
super
::
registry
::{
BlockHandle
,
RegistrationHandle
};
use
crate
::
tokens
::{
PartialTokenBlock
,
SaltHash
,
Token
,
TokenBlock
,
Tokens
};
use
crate
::
tokens
::{
PartialTokenBlock
,
SaltHash
,
Token
,
TokenBlock
,
Tokens
};
#[derive(Debug,
thiserror::Error)]
#[derive(Debug,
thiserror::Error)]
...
...
lib/llm/src/block_manager/block/transfer.rs
View file @
bce74588
...
@@ -22,8 +22,8 @@ mod strategy;
...
@@ -22,8 +22,8 @@ mod strategy;
use
super
::
*
;
use
super
::
*
;
use
crate
::
block_manager
::
storage
::{
use
crate
::
block_manager
::
storage
::{
nixl
::{
NixlRegisterableStorage
,
NixlStorage
},
DeviceStorage
,
DiskStorage
,
PinnedStorage
,
SystemStorage
,
DeviceStorage
,
DiskStorage
,
PinnedStorage
,
SystemStorage
,
nixl
::{
NixlRegisterableStorage
,
NixlStorage
},
};
};
use
cudarc
::
driver
::
CudaStream
;
use
cudarc
::
driver
::
CudaStream
;
...
...
lib/llm/src/block_manager/block/transfer/context.rs
View file @
bce74588
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
use
super
::
*
;
use
super
::
*
;
use
cudarc
::
driver
::{
sys
::
CUevent_flags
,
CudaEvent
,
CudaStream
};
use
cudarc
::
driver
::{
CudaEvent
,
CudaStream
,
sys
::
CUevent_flags
};
use
nixl_sys
::
Agent
as
NixlAgent
;
use
nixl_sys
::
Agent
as
NixlAgent
;
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
...
@@ -107,10 +107,10 @@ impl TransferContext {
...
@@ -107,10 +107,10 @@ impl TransferContext {
impl
Drop
for
TransferContext
{
impl
Drop
for
TransferContext
{
fn
drop
(
&
mut
self
)
{
fn
drop
(
&
mut
self
)
{
self
.cancel_token
.cancel
();
self
.cancel_token
.cancel
();
if
let
Some
(
handle
)
=
self
.cuda_event_worker
.take
()
{
if
let
Some
(
handle
)
=
self
.cuda_event_worker
.take
()
if
let
Err
(
e
)
=
handle
.join
()
{
&&
let
Err
(
e
)
=
handle
.join
()
{
tracing
::
error!
(
"Error joining CUDA event worker: {:?}"
,
e
);
tracing
::
error!
(
"Error joining CUDA event worker: {:?}"
,
e
);
}
}
}
}
}
}
}
lib/llm/src/block_manager/block/transfer/cuda.rs
View file @
bce74588
...
@@ -177,9 +177,11 @@ unsafe fn cuda_memcpy_h2d(
...
@@ -177,9 +177,11 @@ unsafe fn cuda_memcpy_h2d(
"Source and destination device memory regions must not overlap for D2D copy"
"Source and destination device memory regions must not overlap for D2D copy"
);
);
unsafe
{
let
src_slice
=
std
::
slice
::
from_raw_parts
(
src_ptr
,
size
);
let
src_slice
=
std
::
slice
::
from_raw_parts
(
src_ptr
,
size
);
cuda_result
::
memcpy_htod_async
(
dst_ptr
as
u64
,
src_slice
,
stream
.cu_stream
())
cuda_result
::
memcpy_htod_async
(
dst_ptr
as
u64
,
src_slice
,
stream
.cu_stream
())
.map_err
(|
e
|
TransferError
::
ExecutionError
(
format!
(
"CUDA H2D memcpy failed: {}"
,
e
)))
?
;
.map_err
(|
e
|
TransferError
::
ExecutionError
(
format!
(
"CUDA H2D memcpy failed: {}"
,
e
)))
?
};
Ok
(())
Ok
(())
}
}
...
@@ -199,9 +201,11 @@ unsafe fn cuda_memcpy_d2h(
...
@@ -199,9 +201,11 @@ unsafe fn cuda_memcpy_d2h(
"Source and destination device memory regions must not overlap for D2D copy"
"Source and destination device memory regions must not overlap for D2D copy"
);
);
unsafe
{
let
dst_slice
=
std
::
slice
::
from_raw_parts_mut
(
dst_ptr
,
size
);
let
dst_slice
=
std
::
slice
::
from_raw_parts_mut
(
dst_ptr
,
size
);
cuda_result
::
memcpy_dtoh_async
(
dst_slice
,
src_ptr
as
u64
,
stream
.cu_stream
())
cuda_result
::
memcpy_dtoh_async
(
dst_slice
,
src_ptr
as
u64
,
stream
.cu_stream
())
.map_err
(|
e
|
TransferError
::
ExecutionError
(
format!
(
"CUDA D2H memcpy failed: {}"
,
e
)))
?
;
.map_err
(|
e
|
TransferError
::
ExecutionError
(
format!
(
"CUDA D2H memcpy failed: {}"
,
e
)))
?
;
}
Ok
(())
Ok
(())
}
}
...
@@ -221,8 +225,10 @@ unsafe fn cuda_memcpy_d2d(
...
@@ -221,8 +225,10 @@ unsafe fn cuda_memcpy_d2d(
"Source and destination device memory regions must not overlap for D2D copy"
"Source and destination device memory regions must not overlap for D2D copy"
);
);
unsafe
{
cuda_result
::
memcpy_dtod_async
(
dst_ptr
as
u64
,
src_ptr
as
u64
,
size
,
stream
.cu_stream
())
cuda_result
::
memcpy_dtod_async
(
dst_ptr
as
u64
,
src_ptr
as
u64
,
size
,
stream
.cu_stream
())
.map_err
(|
e
|
TransferError
::
ExecutionError
(
format!
(
"CUDA D2D memcpy failed: {}"
,
e
)))
?
;
.map_err
(|
e
|
TransferError
::
ExecutionError
(
format!
(
"CUDA D2D memcpy failed: {}"
,
e
)))
?
};
Ok
(())
Ok
(())
}
}
...
...
lib/llm/src/block_manager/block/transfer/memcpy.rs
View file @
bce74588
...
@@ -78,5 +78,5 @@ unsafe fn memcpy(src_ptr: *const u8, dst_ptr: *mut u8, size: usize) {
...
@@ -78,5 +78,5 @@ unsafe fn memcpy(src_ptr: *const u8, dst_ptr: *mut u8, size: usize) {
"Source and destination memory regions must not overlap for copy_nonoverlapping"
"Source and destination memory regions must not overlap for copy_nonoverlapping"
);
);
std
::
ptr
::
copy_nonoverlapping
(
src_ptr
,
dst_ptr
,
size
);
unsafe
{
std
::
ptr
::
copy_nonoverlapping
(
src_ptr
,
dst_ptr
,
size
)
}
;
}
}
lib/llm/src/block_manager/connector/protocol.rs
View file @
bce74588
...
@@ -53,7 +53,7 @@
...
@@ -53,7 +53,7 @@
//!
//!
//! [`SchedulerOutput`] is transform
//! [`SchedulerOutput`] is transform
use
super
::
scheduler
::{
SchedulingDecision
,
DISCONNECTED_WARNING
};
use
super
::
scheduler
::{
DISCONNECTED_WARNING
,
SchedulingDecision
};
use
super
::
*
;
use
super
::
*
;
use
tokio
::
sync
::
oneshot
;
use
tokio
::
sync
::
oneshot
;
...
@@ -194,14 +194,14 @@ impl TransferCompletionHandle for ScheduledTransferCompletionHandle {
...
@@ -194,14 +194,14 @@ impl TransferCompletionHandle for ScheduledTransferCompletionHandle {
}
}
async
fn
mark_complete
(
&
self
,
result
:
anyhow
::
Result
<
()
>
)
{
async
fn
mark_complete
(
&
self
,
result
:
anyhow
::
Result
<
()
>
)
{
if
let
Some
(
completion_tx
)
=
self
.completion_tx
.lock
()
.unwrap
()
.take
()
{
if
let
Some
(
completion_tx
)
=
self
.completion_tx
.lock
()
.unwrap
()
.take
()
if
completion_tx
.send
(
result
)
.is_err
()
{
&&
completion_tx
.send
(
result
)
.is_err
()
{
tracing
::
error!
(
tracing
::
error!
(
"failed to send completion status; this could lead to silent data corruption"
"failed to send completion status; this could lead to silent data corruption"
);
);
}
}
}
}
}
}
}
impl
Drop
for
ScheduledTransferCompletionHandle
{
impl
Drop
for
ScheduledTransferCompletionHandle
{
...
@@ -256,8 +256,8 @@ impl TransferCompletionHandle for ImmediateTransferCompletionHandle {
...
@@ -256,8 +256,8 @@ impl TransferCompletionHandle for ImmediateTransferCompletionHandle {
let
mut
guard
=
self
.completion_tx
.lock
()
.unwrap
();
let
mut
guard
=
self
.completion_tx
.lock
()
.unwrap
();
guard
.take
()
guard
.take
()
};
};
if
let
Some
(
completion_tx
)
=
completion_tx
{
if
let
Some
(
completion_tx
)
=
completion_tx
if
completion_tx
&&
completion_tx
.send
(
TransferToSchedulerMessage
::
ImmediateResult
(
.send
(
TransferToSchedulerMessage
::
ImmediateResult
(
ImmediateTransferResult
{
ImmediateTransferResult
{
request_id
:
self
.request_id
.clone
(),
request_id
:
self
.request_id
.clone
(),
...
@@ -271,7 +271,6 @@ impl TransferCompletionHandle for ImmediateTransferCompletionHandle {
...
@@ -271,7 +271,6 @@ impl TransferCompletionHandle for ImmediateTransferCompletionHandle {
tracing
::
error!
(
DISCONNECTED_WARNING
);
tracing
::
error!
(
DISCONNECTED_WARNING
);
}
}
}
}
}
}
}
impl
Drop
for
ImmediateTransferCompletionHandle
{
impl
Drop
for
ImmediateTransferCompletionHandle
{
...
...
lib/llm/src/block_manager/controller.rs
View file @
bce74588
...
@@ -12,8 +12,8 @@ use serde::{Deserialize, Serialize};
...
@@ -12,8 +12,8 @@ use serde::{Deserialize, Serialize};
use
dynamo_runtime
::{
use
dynamo_runtime
::{
pipeline
::{
pipeline
::{
async_trait
,
network
::
Ingress
,
AsyncEngine
,
AsyncEngineContextProvider
,
Error
,
ManyOut
,
AsyncEngine
,
AsyncEngineContextProvider
,
Error
,
ManyOut
,
ResponseStream
,
SingleIn
,
ResponseStream
,
SingleIn
,
async_trait
,
network
::
Ingress
,
},
},
protocols
::
annotated
::
Annotated
,
protocols
::
annotated
::
Annotated
,
traits
::
DistributedRuntimeProvider
,
traits
::
DistributedRuntimeProvider
,
...
...
lib/llm/src/block_manager/distributed.rs
View file @
bce74588
...
@@ -43,22 +43,22 @@ pub struct SchedulerRequest<T> {
...
@@ -43,22 +43,22 @@ pub struct SchedulerRequest<T> {
mod
tests
{
mod
tests
{
use
super
::
*
;
use
super
::
*
;
use
crate
::
block_manager
::
b
lock
::
data
::
logical
::
distributed_leader_worker
::
DistributedLeaderWorkerResources
;
use
crate
::
block_manager
::
KvB
lock
Manager
;
use
crate
::
block_manager
::
block
::
BasicMetadata
;
use
crate
::
block_manager
::
block
::
BasicMetadata
;
use
crate
::
block_manager
::
block
::
data
::
logical
::
distributed_leader_worker
::
DistributedLeaderWorkerResources
;
use
crate
::
block_manager
::
config
::
*
;
use
crate
::
block_manager
::
config
::
*
;
use
crate
::
block_manager
::
locality
::
Logical
;
use
crate
::
block_manager
::
locality
::
Logical
;
use
crate
::
block_manager
::
storage
::{
use
crate
::
block_manager
::
storage
::{
torch
::{
TorchDevice
,
TorchTensor
},
DeviceAllocator
,
Storage
,
StorageAllocator
,
DeviceAllocator
,
Storage
,
StorageAllocator
,
torch
::{
TorchDevice
,
TorchTensor
},
};
};
use
crate
::
block_manager
::
KvBlockManager
;
use
anyhow
::
Result
;
use
anyhow
::
Result
;
use
rstest
::
*
;
use
rstest
::
*
;
use
std
::
sync
::{
use
std
::
sync
::{
atomic
::{
AtomicUsize
,
Ordering
},
Arc
,
Arc
,
atomic
::{
AtomicUsize
,
Ordering
},
};
};
use
tokio_util
::
sync
::
CancellationToken
;
use
tokio_util
::
sync
::
CancellationToken
;
...
...
lib/llm/src/block_manager/distributed/transfer.rs
View file @
bce74588
...
@@ -10,15 +10,15 @@ use zmq::*;
...
@@ -10,15 +10,15 @@ use zmq::*;
use
BlockTransferPool
::
*
;
use
BlockTransferPool
::
*
;
use
crate
::
block_manager
::{
use
crate
::
block_manager
::{
BasicMetadata
,
Storage
,
block
::{
block
::{
Block
,
BlockDataProvider
,
BlockDataProviderMut
,
ReadableBlock
,
WritableBlock
,
data
::
local
::
LocalBlockData
,
data
::
local
::
LocalBlockData
,
locality
,
locality
,
transfer
::{
TransferContext
,
WriteTo
,
WriteToStrategy
},
transfer
::{
TransferContext
,
WriteTo
,
WriteToStrategy
},
Block
,
BlockDataProvider
,
BlockDataProviderMut
,
ReadableBlock
,
WritableBlock
,
},
},
connector
::
scheduler
::{
SchedulingDecision
,
TransferSchedulerClient
},
connector
::
scheduler
::{
SchedulingDecision
,
TransferSchedulerClient
},
storage
::{
DeviceStorage
,
DiskStorage
,
Local
,
PinnedStorage
},
storage
::{
DeviceStorage
,
DiskStorage
,
Local
,
PinnedStorage
},
BasicMetadata
,
Storage
,
};
};
use
anyhow
::
Result
;
use
anyhow
::
Result
;
...
@@ -113,15 +113,13 @@ impl BlockTransferHandler {
...
@@ -113,15 +113,13 @@ impl BlockTransferHandler {
.collect
();
.collect
();
// Perform the transfer, and return the notifying channel.
// Perform the transfer, and return the notifying channel.
let
channel
=
match
sources
.write_to
(
&
mut
targets
,
self
.context
.clone
())
{
match
sources
.write_to
(
&
mut
targets
,
self
.context
.clone
())
{
Ok
(
channel
)
=>
Ok
(
channel
),
Ok
(
channel
)
=>
Ok
(
channel
),
Err
(
e
)
=>
{
Err
(
e
)
=>
{
tracing
::
error!
(
"Failed to write to blocks: {:?}"
,
e
);
tracing
::
error!
(
"Failed to write to blocks: {:?}"
,
e
);
Err
(
e
.into
())
Err
(
e
.into
())
}
}
};
}
channel
}
}
pub
async
fn
execute_transfer
(
&
self
,
request
:
BlockTransferRequest
)
->
Result
<
()
>
{
pub
async
fn
execute_transfer
(
&
self
,
request
:
BlockTransferRequest
)
->
Result
<
()
>
{
...
...
lib/llm/src/block_manager/distributed/worker.rs
View file @
bce74588
...
@@ -10,11 +10,11 @@ use utils::*;
...
@@ -10,11 +10,11 @@ use utils::*;
use
zmq
::
*
;
use
zmq
::
*
;
use
crate
::
block_manager
::{
use
crate
::
block_manager
::{
block
::{
layout_to_blocks
,
locality
,
transfer
::
TransferContext
,
Block
},
BasicMetadata
,
BlockMetadata
,
LayoutConfigBuilder
,
NixlLayout
,
Storage
,
block
::{
Block
,
layout_to_blocks
,
locality
,
transfer
::
TransferContext
},
connector
::
scheduler
::
TransferSchedulerClient
,
connector
::
scheduler
::
TransferSchedulerClient
,
layout
::
LayoutType
,
layout
::
LayoutType
,
storage
::{
torch
::
TorchTensor
,
DeviceAllocator
,
DeviceStorage
,
DiskAllocator
,
PinnedAllocator
},
storage
::{
DeviceAllocator
,
DeviceStorage
,
DiskAllocator
,
PinnedAllocator
,
torch
::
TorchTensor
},
BasicMetadata
,
BlockMetadata
,
LayoutConfigBuilder
,
NixlLayout
,
Storage
,
};
};
use
derive_builder
::
Builder
;
use
derive_builder
::
Builder
;
...
@@ -28,8 +28,8 @@ use tokio::sync::oneshot;
...
@@ -28,8 +28,8 @@ use tokio::sync::oneshot;
use
tokio_util
::
sync
::
CancellationToken
;
use
tokio_util
::
sync
::
CancellationToken
;
use
dynamo_runtime
::{
use
dynamo_runtime
::{
utils
::{
leader_worker_barrier
::
WorkerBarrier
,
task
::
CriticalTaskExecutionHandle
},
DistributedRuntime
,
DistributedRuntime
,
utils
::{
leader_worker_barrier
::
WorkerBarrier
,
task
::
CriticalTaskExecutionHandle
},
};
};
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
...
...
lib/llm/src/block_manager/distributed/zmq.rs
View file @
bce74588
...
@@ -13,13 +13,13 @@ use std::collections::{HashMap, VecDeque};
...
@@ -13,13 +13,13 @@ use std::collections::{HashMap, VecDeque};
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
use
std
::
time
::{
Duration
,
Instant
};
use
std
::
time
::{
Duration
,
Instant
};
use
tmq
::{
use
tmq
::{
publish
::{
publish
,
Publish
},
pull
::{
pull
,
Pull
},
push
::{
push
,
Push
},
subscribe
::{
subscribe
,
Subscribe
},
Context
,
Message
,
Multipart
,
Context
,
Message
,
Multipart
,
publish
::{
Publish
,
publish
},
pull
::{
Pull
,
pull
},
push
::{
Push
,
push
},
subscribe
::{
Subscribe
,
subscribe
},
};
};
use
tokio
::
sync
::{
oneshot
,
Mutex
};
use
tokio
::
sync
::{
Mutex
,
oneshot
};
use
tokio_util
::
sync
::
CancellationToken
;
use
tokio_util
::
sync
::
CancellationToken
;
use
futures_util
::{
SinkExt
,
StreamExt
};
use
futures_util
::{
SinkExt
,
StreamExt
};
...
...
lib/llm/src/block_manager/layout/nixl.rs
View file @
bce74588
...
@@ -110,8 +110,8 @@ use super::{
...
@@ -110,8 +110,8 @@ use super::{
};
};
use
super
::
super
::
storage
::{
use
super
::
super
::
storage
::{
nixl
::{
NixlAgent
,
NixlRegisterableStorage
,
NixlStorage
,
OptArgs
},
Storage
,
StorageAllocator
,
Storage
,
StorageAllocator
,
nixl
::{
NixlAgent
,
NixlRegisterableStorage
,
NixlStorage
,
OptArgs
},
};
};
use
super
::{
FullyContiguous
,
FullyContiguousConfig
,
LayerSeparate
,
LayerSeparateConfig
};
use
super
::{
FullyContiguous
,
FullyContiguousConfig
,
LayerSeparate
,
LayerSeparateConfig
};
use
serde
::{
Deserialize
,
Serialize
};
use
serde
::{
Deserialize
,
Serialize
};
...
...
lib/llm/src/block_manager/metrics.rs
View file @
bce74588
...
@@ -15,9 +15,9 @@
...
@@ -15,9 +15,9 @@
use
anyhow
::
Result
;
use
anyhow
::
Result
;
use
prometheus
::{
use
prometheus
::{
IntCounterVec
,
IntGaugeVec
,
Opts
,
Registry
,
core
::{
AtomicI64
,
AtomicU64
,
GenericCounter
,
GenericGauge
},
core
::{
AtomicI64
,
AtomicU64
,
GenericCounter
,
GenericGauge
},
register_int_counter_vec_with_registry
,
register_int_gauge_vec_with_registry
,
IntCounterVec
,
register_int_counter_vec_with_registry
,
register_int_gauge_vec_with_registry
,
IntGaugeVec
,
Opts
,
Registry
,
};
};
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
pub
struct
BlockManagerMetrics
{
pub
struct
BlockManagerMetrics
{
...
...
lib/llm/src/block_manager/offload.rs
View file @
bce74588
...
@@ -45,8 +45,8 @@
...
@@ -45,8 +45,8 @@
//! of the [`OffloadManager::offload_worker`] and [`OffloadManager::onboard_worker`] methods.
//! of the [`OffloadManager::offload_worker`] and [`OffloadManager::onboard_worker`] methods.
use
super
::
block
::{
use
super
::
block
::{
locality
::
LocalityProvider
,
transfer
::
TransferContext
,
BlockError
,
BlockMetadata
,
BlockState
,
BlockError
,
BlockMetadata
,
BlockState
,
ImmutableBlock
,
MutableBlock
,
ImmutableBlock
,
MutableBlock
,
locality
::
LocalityProvider
,
transfer
::
TransferContext
,
};
};
use
super
::
metrics
::{
BlockManagerMetrics
,
PoolMetrics
};
use
super
::
metrics
::{
BlockManagerMetrics
,
PoolMetrics
};
use
super
::
pool
::{
BlockPool
,
BlockPoolError
};
use
super
::
pool
::{
BlockPool
,
BlockPoolError
};
...
@@ -56,8 +56,9 @@ use nixl_sys::Agent as NixlAgent;
...
@@ -56,8 +56,9 @@ use nixl_sys::Agent as NixlAgent;
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
use
tokio
::
runtime
::
Handle
;
use
tokio
::
runtime
::
Handle
;
use
tokio
::
sync
::{
use
tokio
::
sync
::{
Mutex
,
mpsc
::{
self
,
error
::
TryRecvError
},
mpsc
::{
self
,
error
::
TryRecvError
},
oneshot
,
Mutex
,
oneshot
,
};
};
use
tokio_util
::
sync
::
CancellationToken
;
use
tokio_util
::
sync
::
CancellationToken
;
...
@@ -320,20 +321,21 @@ impl<Locality: LocalityProvider + 'static, Metadata: BlockMetadata>
...
@@ -320,20 +321,21 @@ impl<Locality: LocalityProvider + 'static, Metadata: BlockMetadata>
if
let
Ok
(
blocks
)
=
target_pool
if
let
Ok
(
blocks
)
=
target_pool
.match_sequence_hashes
(
vec!
[
request
.sequence_hash
]
.as_slice
())
.match_sequence_hashes
(
vec!
[
request
.sequence_hash
]
.as_slice
())
.await
.await
&&
!
blocks
.is_empty
()
{
{
if
!
blocks
.is_empty
()
{
continue
;
continue
;
}
}
}
let
target_block
=
'target_block
:
{
let
target_block
=
'target_block
:
{
if
let
Ok
(
blocks
)
=
target_pool
.allocate_blocks
(
1
)
.await
{
if
let
Ok
(
blocks
)
=
target_pool
.allocate_blocks
(
1
)
.await
if
let
Some
(
block
)
=
blocks
.into_iter
()
.next
()
{
&&
let
Some
(
block
)
=
blocks
.into_iter
()
.next
()
{
break
'target_block
Some
(
block
);
break
'target_block
Some
(
block
);
}
}
}
tracing
::
warn!
(
"Target pool full. Skipping offload. This should only ever happen with very small pool sizes."
);
tracing
::
warn!
(
"Target pool full. Skipping offload. This should only ever happen with very small pool sizes."
);
None
None
};
};
...
@@ -504,15 +506,15 @@ impl<Locality: LocalityProvider + 'static, Metadata: BlockMetadata>
...
@@ -504,15 +506,15 @@ impl<Locality: LocalityProvider + 'static, Metadata: BlockMetadata>
}
}
}
}
if
let
Some
(
targets
)
=
targets
.as_ref
()
{
if
let
Some
(
targets
)
=
targets
.as_ref
()
if
targets
.len
()
!=
blocks
.len
()
{
&&
targets
.len
()
!=
blocks
.len
()
{
tx
.send
(
Err
(
BlockPoolError
::
BlockError
(
BlockError
::
Other
(
tx
.send
(
Err
(
BlockPoolError
::
BlockError
(
BlockError
::
Other
(
anyhow
::
anyhow!
(
"Number of targets does not match number of blocks."
),
anyhow
::
anyhow!
(
"Number of targets does not match number of blocks."
),
))))
))))
.unwrap
();
.unwrap
();
return
rx
;
return
rx
;
}
}
}
if
blocks
.is_empty
()
{
if
blocks
.is_empty
()
{
tx
.send
(
Ok
(
vec!
[]))
.unwrap
();
tx
.send
(
Ok
(
vec!
[]))
.unwrap
();
...
@@ -582,16 +584,16 @@ mod tests {
...
@@ -582,16 +584,16 @@ mod tests {
use
super
::
*
;
use
super
::
*
;
use
crate
::
block_manager
::{
use
crate
::
block_manager
::{
LayoutConfig
,
NixlRegisterableStorage
,
block
::{
block
::{
locality
::
Local
,
BasicMetadata
,
BlockDataExt
,
BlockDataProvider
,
Blocks
,
MutableBlock
,
BasicMetadata
,
BlockDataExt
,
BlockDataProvider
,
Blocks
,
MutableBlock
,
locality
::
Local
,
},
},
layout
::{
nixl
::
NixlLayout
,
FullyContiguous
,
LayerSeparate
,
LayoutType
},
layout
::{
FullyContiguous
,
LayerSeparate
,
LayoutType
,
nixl
::
NixlLayout
},
pool
::{
BlockRegistrationDuplicationSetting
,
ManagedBlockPool
},
pool
::{
BlockRegistrationDuplicationSetting
,
ManagedBlockPool
},
storage
::{
storage
::{
DeviceAllocator
,
DeviceStorage
,
DiskAllocator
,
DiskStorage
,
PinnedAllocator
,
DeviceAllocator
,
DeviceStorage
,
DiskAllocator
,
DiskStorage
,
PinnedAllocator
,
PinnedStorage
,
StorageAllocator
,
StorageType
,
PinnedStorage
,
StorageAllocator
,
StorageType
,
},
},
LayoutConfig
,
NixlRegisterableStorage
,
};
};
use
crate
::
tokens
::{
TokenBlockSequence
,
Tokens
};
use
crate
::
tokens
::{
TokenBlockSequence
,
Tokens
};
use
nixl_sys
::{
MemoryRegion
,
NixlDescriptor
};
use
nixl_sys
::{
MemoryRegion
,
NixlDescriptor
};
...
...
lib/llm/src/block_manager/offload/pending.rs
View file @
bce74588
...
@@ -48,10 +48,10 @@ use tokio::sync::{mpsc, oneshot};
...
@@ -48,10 +48,10 @@ use tokio::sync::{mpsc, oneshot};
use
tokio_util
::
sync
::
CancellationToken
;
use
tokio_util
::
sync
::
CancellationToken
;
use
crate
::
block_manager
::
block
::{
use
crate
::
block_manager
::
block
::{
locality
::
LocalityProvider
,
transfer
::{
TransferContext
,
WriteTo
,
WriteToStrategy
},
BlockDataProvider
,
BlockDataProviderMut
,
BlockError
,
BlockMetadata
,
BlockState
,
ImmutableBlock
,
BlockDataProvider
,
BlockDataProviderMut
,
BlockError
,
BlockMetadata
,
BlockState
,
ImmutableBlock
,
MutableBlock
,
ReadableBlock
,
WritableBlock
,
MutableBlock
,
ReadableBlock
,
WritableBlock
,
locality
::
LocalityProvider
,
transfer
::{
TransferContext
,
WriteTo
,
WriteToStrategy
},
};
};
use
crate
::
block_manager
::
metrics
::
PoolMetrics
;
use
crate
::
block_manager
::
metrics
::
PoolMetrics
;
use
crate
::
block_manager
::
pool
::{
BlockPool
,
BlockPoolError
};
use
crate
::
block_manager
::
pool
::{
BlockPool
,
BlockPoolError
};
...
@@ -59,7 +59,7 @@ use crate::block_manager::storage::{Local, Storage};
...
@@ -59,7 +59,7 @@ use crate::block_manager::storage::{Local, Storage};
use
anyhow
::
Result
;
use
anyhow
::
Result
;
use
async_trait
::
async_trait
;
use
async_trait
::
async_trait
;
use
futures
::{
stream
::
FuturesUnordered
,
StreamExt
};
use
futures
::{
StreamExt
,
stream
::
FuturesUnordered
};
use
super
::
BlockResult
;
use
super
::
BlockResult
;
...
...
lib/llm/src/block_manager/offload/request.rs
View file @
bce74588
...
@@ -18,7 +18,7 @@ use std::sync::Weak;
...
@@ -18,7 +18,7 @@ use std::sync::Weak;
use
tokio
::
sync
::
oneshot
;
use
tokio
::
sync
::
oneshot
;
use
crate
::
block_manager
::
block
::{
use
crate
::
block_manager
::
block
::{
locality
::
LocalityProvider
,
BlockMetadata
,
ImmutableBlock
,
MutableBlock
,
BlockMetadata
,
ImmutableBlock
,
MutableBlock
,
locality
::
LocalityProvider
,
};
};
use
crate
::
block_manager
::
pool
::
BlockPoolError
;
use
crate
::
block_manager
::
pool
::
BlockPoolError
;
use
crate
::
block_manager
::
storage
::
Storage
;
use
crate
::
block_manager
::
storage
::
Storage
;
...
...
lib/llm/src/block_manager/pool.rs
View file @
bce74588
...
@@ -23,15 +23,15 @@ use serde::{Deserialize, Serialize};
...
@@ -23,15 +23,15 @@ use serde::{Deserialize, Serialize};
pub
use
super
::
block
::{
ImmutableBlock
,
MutableBlock
};
pub
use
super
::
block
::{
ImmutableBlock
,
MutableBlock
};
use
super
::
block
::{
use
super
::
block
::{
nixl
::
short_type_name
,
private
,
registry
::
BlockRegistry
,
Block
,
BlockError
,
BlockMetadata
,
Block
,
BlockError
,
BlockMetadata
,
GlobalRegistry
,
MaybeReturnableBlock
,
nixl
::
short_type_name
,
GlobalRegistry
,
MaybeReturnableBlock
,
private
,
registry
::
BlockRegistry
,
};
};
use
super
::
events
::{
EventManager
,
NullEventManager
};
use
super
::
events
::{
EventManager
,
NullEventManager
};
use
super
::
metrics
::{
BlockManagerMetrics
,
PoolMetrics
};
use
super
::
metrics
::{
BlockManagerMetrics
,
PoolMetrics
};
use
super
::
storage
::
Storage
;
use
super
::
storage
::
Storage
;
use
crate
::
block_manager
::
block
::
locality
::
LocalityProvider
;
use
crate
::
block_manager
::
CacheLevel
;
use
crate
::
block_manager
::
CacheLevel
;
use
crate
::
block_manager
::
block
::
locality
::
LocalityProvider
;
use
crate
::
tokens
::{
SequenceHash
,
TokenBlock
};
use
crate
::
tokens
::{
SequenceHash
,
TokenBlock
};
use
async_trait
::
async_trait
;
use
async_trait
::
async_trait
;
...
...
Prev
1
2
3
4
5
6
7
8
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment