Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
bce74588
Unverified
Commit
bce74588
authored
Aug 22, 2025
by
Graham King
Committed by
GitHub
Aug 22, 2025
Browse files
chore: Rust to 1.89 and edition 2024 (#2659)
parent
268d017e
Changes
199
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
110 additions
and
105 deletions
+110
-105
lib/llm/src/block_manager/block/data/logical.rs
lib/llm/src/block_manager/block/data/logical.rs
+1
-1
lib/llm/src/block_manager/block/locality.rs
lib/llm/src/block_manager/block/locality.rs
+1
-1
lib/llm/src/block_manager/block/registry.rs
lib/llm/src/block_manager/block/registry.rs
+22
-22
lib/llm/src/block_manager/block/state.rs
lib/llm/src/block_manager/block/state.rs
+1
-1
lib/llm/src/block_manager/block/transfer.rs
lib/llm/src/block_manager/block/transfer.rs
+1
-1
lib/llm/src/block_manager/block/transfer/context.rs
lib/llm/src/block_manager/block/transfer/context.rs
+5
-5
lib/llm/src/block_manager/block/transfer/cuda.rs
lib/llm/src/block_manager/block/transfer/cuda.rs
+14
-8
lib/llm/src/block_manager/block/transfer/memcpy.rs
lib/llm/src/block_manager/block/transfer/memcpy.rs
+1
-1
lib/llm/src/block_manager/connector/protocol.rs
lib/llm/src/block_manager/connector/protocol.rs
+11
-12
lib/llm/src/block_manager/controller.rs
lib/llm/src/block_manager/controller.rs
+2
-2
lib/llm/src/block_manager/distributed.rs
lib/llm/src/block_manager/distributed.rs
+4
-4
lib/llm/src/block_manager/distributed/transfer.rs
lib/llm/src/block_manager/distributed/transfer.rs
+4
-6
lib/llm/src/block_manager/distributed/worker.rs
lib/llm/src/block_manager/distributed/worker.rs
+4
-4
lib/llm/src/block_manager/distributed/zmq.rs
lib/llm/src/block_manager/distributed/zmq.rs
+5
-5
lib/llm/src/block_manager/layout/nixl.rs
lib/llm/src/block_manager/layout/nixl.rs
+1
-1
lib/llm/src/block_manager/metrics.rs
lib/llm/src/block_manager/metrics.rs
+2
-2
lib/llm/src/block_manager/offload.rs
lib/llm/src/block_manager/offload.rs
+24
-22
lib/llm/src/block_manager/offload/pending.rs
lib/llm/src/block_manager/offload/pending.rs
+3
-3
lib/llm/src/block_manager/offload/request.rs
lib/llm/src/block_manager/offload/request.rs
+1
-1
lib/llm/src/block_manager/pool.rs
lib/llm/src/block_manager/pool.rs
+3
-3
No files found.
lib/llm/src/block_manager/block/data/logical.rs
View file @
bce74588
...
@@ -7,8 +7,8 @@ pub mod distributed_leader_worker;
...
@@ -7,8 +7,8 @@ pub mod distributed_leader_worker;
pub
mod
null
;
pub
mod
null
;
use
crate
::
block_manager
::
block
::{
use
crate
::
block_manager
::
block
::{
transfer
::{
TransferContext
,
TransferError
,
WriteToStrategy
},
BlockDataProvider
,
ReadableBlock
,
WritableBlock
,
BlockDataProvider
,
ReadableBlock
,
WritableBlock
,
transfer
::{
TransferContext
,
TransferError
,
WriteToStrategy
},
};
};
use
crate
::
block_manager
::
locality
::
Logical
;
use
crate
::
block_manager
::
locality
::
Logical
;
use
crate
::
block_manager
::
storage
::{
self
,
nixl
::
NixlDescriptor
};
use
crate
::
block_manager
::
storage
::{
self
,
nixl
::
NixlDescriptor
};
...
...
lib/llm/src/block_manager/block/locality.rs
View file @
bce74588
...
@@ -16,7 +16,7 @@
...
@@ -16,7 +16,7 @@
use
super
::
*
;
use
super
::
*
;
use
crate
::
block_manager
::
block
::
transfer
::{
use
crate
::
block_manager
::
block
::
transfer
::{
handle_local_transfer
,
TransferContext
,
TransferError
,
WriteToStrategy
,
TransferContext
,
TransferError
,
WriteToStrategy
,
handle_local_transfer
,
};
};
use
crate
::
block_manager
::
storage
::{
self
,
nixl
::
NixlDescriptor
};
use
crate
::
block_manager
::
storage
::{
self
,
nixl
::
NixlDescriptor
};
...
...
lib/llm/src/block_manager/block/registry.rs
View file @
bce74588
...
@@ -109,19 +109,19 @@ impl BlockRegistry {
...
@@ -109,19 +109,19 @@ impl BlockRegistry {
{
{
let
mut
blocks
=
blocks
.lock
()
.unwrap
();
let
mut
blocks
=
blocks
.lock
()
.unwrap
();
if
let
Some
(
handle
)
=
blocks
.get
(
&
sequence_hash
)
{
if
let
Some
(
handle
)
=
blocks
.get
(
&
sequence_hash
)
if
handle
.upgrade
()
.is_none
()
{
&&
handle
.upgrade
()
.is_none
()
blocks
.remove
(
&
sequence_hash
);
{
}
blocks
.remove
(
&
sequence_hash
);
}
}
}
}
let
mut
global_registry
=
global_registry
.lock
()
.unwrap
();
let
mut
global_registry
=
global_registry
.lock
()
.unwrap
();
if
let
Some
(
entry
)
=
global_registry
.get
(
&
sequence_hash
)
{
if
let
Some
(
entry
)
=
global_registry
.get
(
&
sequence_hash
)
if
entry
.upgrade
()
.is_none
()
{
&&
entry
.upgrade
()
.is_none
()
global_registry
.remove
(
&
sequence_hash
);
{
}
global_registry
.remove
(
&
sequence_hash
);
}
}
}
}
});
});
...
@@ -136,10 +136,10 @@ impl BlockRegistry {
...
@@ -136,10 +136,10 @@ impl BlockRegistry {
pub
fn
is_registered
(
&
self
,
sequence_hash
:
SequenceHash
)
->
bool
{
pub
fn
is_registered
(
&
self
,
sequence_hash
:
SequenceHash
)
->
bool
{
let
blocks
=
self
.blocks
.lock
()
.unwrap
();
let
blocks
=
self
.blocks
.lock
()
.unwrap
();
if
let
Some
(
handle
)
=
blocks
.get
(
&
sequence_hash
)
{
if
let
Some
(
handle
)
=
blocks
.get
(
&
sequence_hash
)
if
let
Some
(
_
handle
)
=
handle
.upgrade
()
{
&&
let
Some
(
_
handle
)
=
handle
.upgrade
()
return
true
;
{
}
return
true
;
}
}
false
false
}
}
...
@@ -161,12 +161,12 @@ impl BlockRegistry {
...
@@ -161,12 +161,12 @@ impl BlockRegistry {
let
mut
blocks
=
self
.blocks
.lock
()
.unwrap
();
let
mut
blocks
=
self
.blocks
.lock
()
.unwrap
();
// If an identical block already exists in this pool, return an error.
// If an identical block already exists in this pool, return an error.
if
let
Some
(
handle
)
=
blocks
.get
(
&
sequence_hash
)
{
if
let
Some
(
handle
)
=
blocks
.get
(
&
sequence_hash
)
if
let
Some
(
_
handle
)
=
handle
.upgrade
()
{
&&
let
Some
(
_
handle
)
=
handle
.upgrade
()
return
Err
(
BlockRegistrationError
::
BlockAlreadyRegistered
(
{
sequence_hash
,
return
Err
(
BlockRegistrationError
::
BlockAlreadyRegistered
(
));
sequence_hash
,
}
));
}
}
let
mut
publish_handle
=
None
;
let
mut
publish_handle
=
None
;
...
@@ -179,10 +179,10 @@ impl BlockRegistry {
...
@@ -179,10 +179,10 @@ impl BlockRegistry {
let
mut
global_registry
=
self
.global_registry
.lock
()
.unwrap
();
let
mut
global_registry
=
self
.global_registry
.lock
()
.unwrap
();
// If an identical block exists in other pool, use the same registration handle.
// If an identical block exists in other pool, use the same registration handle.
if
let
Some
(
handle
)
=
global_registry
.get
(
&
sequence_hash
)
{
if
let
Some
(
handle
)
=
global_registry
.get
(
&
sequence_hash
)
if
let
Some
(
handle
)
=
handle
.upgrade
()
{
&&
let
Some
(
handle
)
=
handle
.upgrade
()
break
'reg_block
handle
;
{
}
break
'reg_block
handle
;
}
}
// Otherwise, create a new registration handle.
// Otherwise, create a new registration handle.
...
...
lib/llm/src/block_manager/block/state.rs
View file @
bce74588
...
@@ -17,8 +17,8 @@ use std::sync::Arc;
...
@@ -17,8 +17,8 @@ use std::sync::Arc;
use
derive_getters
::
Getters
;
use
derive_getters
::
Getters
;
use
super
::
registry
::{
BlockHandle
,
RegistrationHandle
};
use
super
::
Result
;
use
super
::
Result
;
use
super
::
registry
::{
BlockHandle
,
RegistrationHandle
};
use
crate
::
tokens
::{
PartialTokenBlock
,
SaltHash
,
Token
,
TokenBlock
,
Tokens
};
use
crate
::
tokens
::{
PartialTokenBlock
,
SaltHash
,
Token
,
TokenBlock
,
Tokens
};
#[derive(Debug,
thiserror::Error)]
#[derive(Debug,
thiserror::Error)]
...
...
lib/llm/src/block_manager/block/transfer.rs
View file @
bce74588
...
@@ -22,8 +22,8 @@ mod strategy;
...
@@ -22,8 +22,8 @@ mod strategy;
use
super
::
*
;
use
super
::
*
;
use
crate
::
block_manager
::
storage
::{
use
crate
::
block_manager
::
storage
::{
nixl
::{
NixlRegisterableStorage
,
NixlStorage
},
DeviceStorage
,
DiskStorage
,
PinnedStorage
,
SystemStorage
,
DeviceStorage
,
DiskStorage
,
PinnedStorage
,
SystemStorage
,
nixl
::{
NixlRegisterableStorage
,
NixlStorage
},
};
};
use
cudarc
::
driver
::
CudaStream
;
use
cudarc
::
driver
::
CudaStream
;
...
...
lib/llm/src/block_manager/block/transfer/context.rs
View file @
bce74588
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
use
super
::
*
;
use
super
::
*
;
use
cudarc
::
driver
::{
sys
::
CUevent_flags
,
CudaEvent
,
CudaStream
};
use
cudarc
::
driver
::{
CudaEvent
,
CudaStream
,
sys
::
CUevent_flags
};
use
nixl_sys
::
Agent
as
NixlAgent
;
use
nixl_sys
::
Agent
as
NixlAgent
;
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
...
@@ -107,10 +107,10 @@ impl TransferContext {
...
@@ -107,10 +107,10 @@ impl TransferContext {
impl
Drop
for
TransferContext
{
impl
Drop
for
TransferContext
{
fn
drop
(
&
mut
self
)
{
fn
drop
(
&
mut
self
)
{
self
.cancel_token
.cancel
();
self
.cancel_token
.cancel
();
if
let
Some
(
handle
)
=
self
.cuda_event_worker
.take
()
{
if
let
Some
(
handle
)
=
self
.cuda_event_worker
.take
()
if
let
Err
(
e
)
=
handle
.join
()
{
&&
let
Err
(
e
)
=
handle
.join
()
tracing
::
error!
(
"Error joining CUDA event worker: {:?}"
,
e
);
{
}
tracing
::
error!
(
"Error joining CUDA event worker: {:?}"
,
e
);
}
}
}
}
}
}
lib/llm/src/block_manager/block/transfer/cuda.rs
View file @
bce74588
...
@@ -177,9 +177,11 @@ unsafe fn cuda_memcpy_h2d(
...
@@ -177,9 +177,11 @@ unsafe fn cuda_memcpy_h2d(
"Source and destination device memory regions must not overlap for D2D copy"
"Source and destination device memory regions must not overlap for D2D copy"
);
);
let
src_slice
=
std
::
slice
::
from_raw_parts
(
src_ptr
,
size
);
unsafe
{
cuda_result
::
memcpy_htod_async
(
dst_ptr
as
u64
,
src_slice
,
stream
.cu_stream
())
let
src_slice
=
std
::
slice
::
from_raw_parts
(
src_ptr
,
size
);
.map_err
(|
e
|
TransferError
::
ExecutionError
(
format!
(
"CUDA H2D memcpy failed: {}"
,
e
)))
?
;
cuda_result
::
memcpy_htod_async
(
dst_ptr
as
u64
,
src_slice
,
stream
.cu_stream
())
.map_err
(|
e
|
TransferError
::
ExecutionError
(
format!
(
"CUDA H2D memcpy failed: {}"
,
e
)))
?
};
Ok
(())
Ok
(())
}
}
...
@@ -199,9 +201,11 @@ unsafe fn cuda_memcpy_d2h(
...
@@ -199,9 +201,11 @@ unsafe fn cuda_memcpy_d2h(
"Source and destination device memory regions must not overlap for D2D copy"
"Source and destination device memory regions must not overlap for D2D copy"
);
);
let
dst_slice
=
std
::
slice
::
from_raw_parts_mut
(
dst_ptr
,
size
);
unsafe
{
cuda_result
::
memcpy_dtoh_async
(
dst_slice
,
src_ptr
as
u64
,
stream
.cu_stream
())
let
dst_slice
=
std
::
slice
::
from_raw_parts_mut
(
dst_ptr
,
size
);
.map_err
(|
e
|
TransferError
::
ExecutionError
(
format!
(
"CUDA D2H memcpy failed: {}"
,
e
)))
?
;
cuda_result
::
memcpy_dtoh_async
(
dst_slice
,
src_ptr
as
u64
,
stream
.cu_stream
())
.map_err
(|
e
|
TransferError
::
ExecutionError
(
format!
(
"CUDA D2H memcpy failed: {}"
,
e
)))
?
;
}
Ok
(())
Ok
(())
}
}
...
@@ -221,8 +225,10 @@ unsafe fn cuda_memcpy_d2d(
...
@@ -221,8 +225,10 @@ unsafe fn cuda_memcpy_d2d(
"Source and destination device memory regions must not overlap for D2D copy"
"Source and destination device memory regions must not overlap for D2D copy"
);
);
cuda_result
::
memcpy_dtod_async
(
dst_ptr
as
u64
,
src_ptr
as
u64
,
size
,
stream
.cu_stream
())
unsafe
{
.map_err
(|
e
|
TransferError
::
ExecutionError
(
format!
(
"CUDA D2D memcpy failed: {}"
,
e
)))
?
;
cuda_result
::
memcpy_dtod_async
(
dst_ptr
as
u64
,
src_ptr
as
u64
,
size
,
stream
.cu_stream
())
.map_err
(|
e
|
TransferError
::
ExecutionError
(
format!
(
"CUDA D2D memcpy failed: {}"
,
e
)))
?
};
Ok
(())
Ok
(())
}
}
...
...
lib/llm/src/block_manager/block/transfer/memcpy.rs
View file @
bce74588
...
@@ -78,5 +78,5 @@ unsafe fn memcpy(src_ptr: *const u8, dst_ptr: *mut u8, size: usize) {
...
@@ -78,5 +78,5 @@ unsafe fn memcpy(src_ptr: *const u8, dst_ptr: *mut u8, size: usize) {
"Source and destination memory regions must not overlap for copy_nonoverlapping"
"Source and destination memory regions must not overlap for copy_nonoverlapping"
);
);
std
::
ptr
::
copy_nonoverlapping
(
src_ptr
,
dst_ptr
,
size
);
unsafe
{
std
::
ptr
::
copy_nonoverlapping
(
src_ptr
,
dst_ptr
,
size
)
}
;
}
}
lib/llm/src/block_manager/connector/protocol.rs
View file @
bce74588
...
@@ -53,7 +53,7 @@
...
@@ -53,7 +53,7 @@
//!
//!
//! [`SchedulerOutput`] is transform
//! [`SchedulerOutput`] is transform
use
super
::
scheduler
::{
SchedulingDecision
,
DISCONNECTED_WARNING
};
use
super
::
scheduler
::{
DISCONNECTED_WARNING
,
SchedulingDecision
};
use
super
::
*
;
use
super
::
*
;
use
tokio
::
sync
::
oneshot
;
use
tokio
::
sync
::
oneshot
;
...
@@ -194,12 +194,12 @@ impl TransferCompletionHandle for ScheduledTransferCompletionHandle {
...
@@ -194,12 +194,12 @@ impl TransferCompletionHandle for ScheduledTransferCompletionHandle {
}
}
async
fn
mark_complete
(
&
self
,
result
:
anyhow
::
Result
<
()
>
)
{
async
fn
mark_complete
(
&
self
,
result
:
anyhow
::
Result
<
()
>
)
{
if
let
Some
(
completion_tx
)
=
self
.completion_tx
.lock
()
.unwrap
()
.take
()
{
if
let
Some
(
completion_tx
)
=
self
.completion_tx
.lock
()
.unwrap
()
.take
()
if
completion_tx
.send
(
result
)
.is_err
()
{
&&
completion_tx
.send
(
result
)
.is_err
()
tracing
::
error!
(
{
"failed to send completion status; this could lead to silent data corruption"
tracing
::
error!
(
);
"failed to send completion status; this could lead to silent data corruption"
}
);
}
}
}
}
}
}
...
@@ -256,8 +256,8 @@ impl TransferCompletionHandle for ImmediateTransferCompletionHandle {
...
@@ -256,8 +256,8 @@ impl TransferCompletionHandle for ImmediateTransferCompletionHandle {
let
mut
guard
=
self
.completion_tx
.lock
()
.unwrap
();
let
mut
guard
=
self
.completion_tx
.lock
()
.unwrap
();
guard
.take
()
guard
.take
()
};
};
if
let
Some
(
completion_tx
)
=
completion_tx
{
if
let
Some
(
completion_tx
)
=
completion_tx
if
completion_tx
&&
completion_tx
.send
(
TransferToSchedulerMessage
::
ImmediateResult
(
.send
(
TransferToSchedulerMessage
::
ImmediateResult
(
ImmediateTransferResult
{
ImmediateTransferResult
{
request_id
:
self
.request_id
.clone
(),
request_id
:
self
.request_id
.clone
(),
...
@@ -267,9 +267,8 @@ impl TransferCompletionHandle for ImmediateTransferCompletionHandle {
...
@@ -267,9 +267,8 @@ impl TransferCompletionHandle for ImmediateTransferCompletionHandle {
))
))
.await
.await
.is_err
()
.is_err
()
{
{
tracing
::
error!
(
DISCONNECTED_WARNING
);
tracing
::
error!
(
DISCONNECTED_WARNING
);
}
}
}
}
}
}
}
...
...
lib/llm/src/block_manager/controller.rs
View file @
bce74588
...
@@ -12,8 +12,8 @@ use serde::{Deserialize, Serialize};
...
@@ -12,8 +12,8 @@ use serde::{Deserialize, Serialize};
use
dynamo_runtime
::{
use
dynamo_runtime
::{
pipeline
::{
pipeline
::{
async_trait
,
network
::
Ingress
,
AsyncEngine
,
AsyncEngineContextProvider
,
Error
,
ManyOut
,
AsyncEngine
,
AsyncEngineContextProvider
,
Error
,
ManyOut
,
ResponseStream
,
SingleIn
,
ResponseStream
,
SingleIn
,
async_trait
,
network
::
Ingress
,
},
},
protocols
::
annotated
::
Annotated
,
protocols
::
annotated
::
Annotated
,
traits
::
DistributedRuntimeProvider
,
traits
::
DistributedRuntimeProvider
,
...
...
lib/llm/src/block_manager/distributed.rs
View file @
bce74588
...
@@ -43,22 +43,22 @@ pub struct SchedulerRequest<T> {
...
@@ -43,22 +43,22 @@ pub struct SchedulerRequest<T> {
mod
tests
{
mod
tests
{
use
super
::
*
;
use
super
::
*
;
use
crate
::
block_manager
::
b
lock
::
data
::
logical
::
distributed_leader_worker
::
DistributedLeaderWorkerResources
;
use
crate
::
block_manager
::
KvB
lock
Manager
;
use
crate
::
block_manager
::
block
::
BasicMetadata
;
use
crate
::
block_manager
::
block
::
BasicMetadata
;
use
crate
::
block_manager
::
block
::
data
::
logical
::
distributed_leader_worker
::
DistributedLeaderWorkerResources
;
use
crate
::
block_manager
::
config
::
*
;
use
crate
::
block_manager
::
config
::
*
;
use
crate
::
block_manager
::
locality
::
Logical
;
use
crate
::
block_manager
::
locality
::
Logical
;
use
crate
::
block_manager
::
storage
::{
use
crate
::
block_manager
::
storage
::{
torch
::{
TorchDevice
,
TorchTensor
},
DeviceAllocator
,
Storage
,
StorageAllocator
,
DeviceAllocator
,
Storage
,
StorageAllocator
,
torch
::{
TorchDevice
,
TorchTensor
},
};
};
use
crate
::
block_manager
::
KvBlockManager
;
use
anyhow
::
Result
;
use
anyhow
::
Result
;
use
rstest
::
*
;
use
rstest
::
*
;
use
std
::
sync
::{
use
std
::
sync
::{
atomic
::{
AtomicUsize
,
Ordering
},
Arc
,
Arc
,
atomic
::{
AtomicUsize
,
Ordering
},
};
};
use
tokio_util
::
sync
::
CancellationToken
;
use
tokio_util
::
sync
::
CancellationToken
;
...
...
lib/llm/src/block_manager/distributed/transfer.rs
View file @
bce74588
...
@@ -10,15 +10,15 @@ use zmq::*;
...
@@ -10,15 +10,15 @@ use zmq::*;
use
BlockTransferPool
::
*
;
use
BlockTransferPool
::
*
;
use
crate
::
block_manager
::{
use
crate
::
block_manager
::{
BasicMetadata
,
Storage
,
block
::{
block
::{
Block
,
BlockDataProvider
,
BlockDataProviderMut
,
ReadableBlock
,
WritableBlock
,
data
::
local
::
LocalBlockData
,
data
::
local
::
LocalBlockData
,
locality
,
locality
,
transfer
::{
TransferContext
,
WriteTo
,
WriteToStrategy
},
transfer
::{
TransferContext
,
WriteTo
,
WriteToStrategy
},
Block
,
BlockDataProvider
,
BlockDataProviderMut
,
ReadableBlock
,
WritableBlock
,
},
},
connector
::
scheduler
::{
SchedulingDecision
,
TransferSchedulerClient
},
connector
::
scheduler
::{
SchedulingDecision
,
TransferSchedulerClient
},
storage
::{
DeviceStorage
,
DiskStorage
,
Local
,
PinnedStorage
},
storage
::{
DeviceStorage
,
DiskStorage
,
Local
,
PinnedStorage
},
BasicMetadata
,
Storage
,
};
};
use
anyhow
::
Result
;
use
anyhow
::
Result
;
...
@@ -113,15 +113,13 @@ impl BlockTransferHandler {
...
@@ -113,15 +113,13 @@ impl BlockTransferHandler {
.collect
();
.collect
();
// Perform the transfer, and return the notifying channel.
// Perform the transfer, and return the notifying channel.
let
channel
=
match
sources
.write_to
(
&
mut
targets
,
self
.context
.clone
())
{
match
sources
.write_to
(
&
mut
targets
,
self
.context
.clone
())
{
Ok
(
channel
)
=>
Ok
(
channel
),
Ok
(
channel
)
=>
Ok
(
channel
),
Err
(
e
)
=>
{
Err
(
e
)
=>
{
tracing
::
error!
(
"Failed to write to blocks: {:?}"
,
e
);
tracing
::
error!
(
"Failed to write to blocks: {:?}"
,
e
);
Err
(
e
.into
())
Err
(
e
.into
())
}
}
};
}
channel
}
}
pub
async
fn
execute_transfer
(
&
self
,
request
:
BlockTransferRequest
)
->
Result
<
()
>
{
pub
async
fn
execute_transfer
(
&
self
,
request
:
BlockTransferRequest
)
->
Result
<
()
>
{
...
...
lib/llm/src/block_manager/distributed/worker.rs
View file @
bce74588
...
@@ -10,11 +10,11 @@ use utils::*;
...
@@ -10,11 +10,11 @@ use utils::*;
use
zmq
::
*
;
use
zmq
::
*
;
use
crate
::
block_manager
::{
use
crate
::
block_manager
::{
block
::{
layout_to_blocks
,
locality
,
transfer
::
TransferContext
,
Block
},
BasicMetadata
,
BlockMetadata
,
LayoutConfigBuilder
,
NixlLayout
,
Storage
,
block
::{
Block
,
layout_to_blocks
,
locality
,
transfer
::
TransferContext
},
connector
::
scheduler
::
TransferSchedulerClient
,
connector
::
scheduler
::
TransferSchedulerClient
,
layout
::
LayoutType
,
layout
::
LayoutType
,
storage
::{
torch
::
TorchTensor
,
DeviceAllocator
,
DeviceStorage
,
DiskAllocator
,
PinnedAllocator
},
storage
::{
DeviceAllocator
,
DeviceStorage
,
DiskAllocator
,
PinnedAllocator
,
torch
::
TorchTensor
},
BasicMetadata
,
BlockMetadata
,
LayoutConfigBuilder
,
NixlLayout
,
Storage
,
};
};
use
derive_builder
::
Builder
;
use
derive_builder
::
Builder
;
...
@@ -28,8 +28,8 @@ use tokio::sync::oneshot;
...
@@ -28,8 +28,8 @@ use tokio::sync::oneshot;
use
tokio_util
::
sync
::
CancellationToken
;
use
tokio_util
::
sync
::
CancellationToken
;
use
dynamo_runtime
::{
use
dynamo_runtime
::{
utils
::{
leader_worker_barrier
::
WorkerBarrier
,
task
::
CriticalTaskExecutionHandle
},
DistributedRuntime
,
DistributedRuntime
,
utils
::{
leader_worker_barrier
::
WorkerBarrier
,
task
::
CriticalTaskExecutionHandle
},
};
};
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
...
...
lib/llm/src/block_manager/distributed/zmq.rs
View file @
bce74588
...
@@ -13,13 +13,13 @@ use std::collections::{HashMap, VecDeque};
...
@@ -13,13 +13,13 @@ use std::collections::{HashMap, VecDeque};
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
use
std
::
time
::{
Duration
,
Instant
};
use
std
::
time
::{
Duration
,
Instant
};
use
tmq
::{
use
tmq
::{
publish
::{
publish
,
Publish
},
pull
::{
pull
,
Pull
},
push
::{
push
,
Push
},
subscribe
::{
subscribe
,
Subscribe
},
Context
,
Message
,
Multipart
,
Context
,
Message
,
Multipart
,
publish
::{
Publish
,
publish
},
pull
::{
Pull
,
pull
},
push
::{
Push
,
push
},
subscribe
::{
Subscribe
,
subscribe
},
};
};
use
tokio
::
sync
::{
oneshot
,
Mutex
};
use
tokio
::
sync
::{
Mutex
,
oneshot
};
use
tokio_util
::
sync
::
CancellationToken
;
use
tokio_util
::
sync
::
CancellationToken
;
use
futures_util
::{
SinkExt
,
StreamExt
};
use
futures_util
::{
SinkExt
,
StreamExt
};
...
...
lib/llm/src/block_manager/layout/nixl.rs
View file @
bce74588
...
@@ -110,8 +110,8 @@ use super::{
...
@@ -110,8 +110,8 @@ use super::{
};
};
use
super
::
super
::
storage
::{
use
super
::
super
::
storage
::{
nixl
::{
NixlAgent
,
NixlRegisterableStorage
,
NixlStorage
,
OptArgs
},
Storage
,
StorageAllocator
,
Storage
,
StorageAllocator
,
nixl
::{
NixlAgent
,
NixlRegisterableStorage
,
NixlStorage
,
OptArgs
},
};
};
use
super
::{
FullyContiguous
,
FullyContiguousConfig
,
LayerSeparate
,
LayerSeparateConfig
};
use
super
::{
FullyContiguous
,
FullyContiguousConfig
,
LayerSeparate
,
LayerSeparateConfig
};
use
serde
::{
Deserialize
,
Serialize
};
use
serde
::{
Deserialize
,
Serialize
};
...
...
lib/llm/src/block_manager/metrics.rs
View file @
bce74588
...
@@ -15,9 +15,9 @@
...
@@ -15,9 +15,9 @@
use
anyhow
::
Result
;
use
anyhow
::
Result
;
use
prometheus
::{
use
prometheus
::{
IntCounterVec
,
IntGaugeVec
,
Opts
,
Registry
,
core
::{
AtomicI64
,
AtomicU64
,
GenericCounter
,
GenericGauge
},
core
::{
AtomicI64
,
AtomicU64
,
GenericCounter
,
GenericGauge
},
register_int_counter_vec_with_registry
,
register_int_gauge_vec_with_registry
,
IntCounterVec
,
register_int_counter_vec_with_registry
,
register_int_gauge_vec_with_registry
,
IntGaugeVec
,
Opts
,
Registry
,
};
};
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
pub
struct
BlockManagerMetrics
{
pub
struct
BlockManagerMetrics
{
...
...
lib/llm/src/block_manager/offload.rs
View file @
bce74588
...
@@ -45,8 +45,8 @@
...
@@ -45,8 +45,8 @@
//! of the [`OffloadManager::offload_worker`] and [`OffloadManager::onboard_worker`] methods.
//! of the [`OffloadManager::offload_worker`] and [`OffloadManager::onboard_worker`] methods.
use
super
::
block
::{
use
super
::
block
::{
locality
::
LocalityProvider
,
transfer
::
TransferContext
,
BlockError
,
BlockMetadata
,
BlockState
,
BlockError
,
BlockMetadata
,
BlockState
,
ImmutableBlock
,
MutableBlock
,
ImmutableBlock
,
MutableBlock
,
locality
::
LocalityProvider
,
transfer
::
TransferContext
,
};
};
use
super
::
metrics
::{
BlockManagerMetrics
,
PoolMetrics
};
use
super
::
metrics
::{
BlockManagerMetrics
,
PoolMetrics
};
use
super
::
pool
::{
BlockPool
,
BlockPoolError
};
use
super
::
pool
::{
BlockPool
,
BlockPoolError
};
...
@@ -56,8 +56,9 @@ use nixl_sys::Agent as NixlAgent;
...
@@ -56,8 +56,9 @@ use nixl_sys::Agent as NixlAgent;
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
use
tokio
::
runtime
::
Handle
;
use
tokio
::
runtime
::
Handle
;
use
tokio
::
sync
::{
use
tokio
::
sync
::{
Mutex
,
mpsc
::{
self
,
error
::
TryRecvError
},
mpsc
::{
self
,
error
::
TryRecvError
},
oneshot
,
Mutex
,
oneshot
,
};
};
use
tokio_util
::
sync
::
CancellationToken
;
use
tokio_util
::
sync
::
CancellationToken
;
...
@@ -320,20 +321,21 @@ impl<Locality: LocalityProvider + 'static, Metadata: BlockMetadata>
...
@@ -320,20 +321,21 @@ impl<Locality: LocalityProvider + 'static, Metadata: BlockMetadata>
if
let
Ok
(
blocks
)
=
target_pool
if
let
Ok
(
blocks
)
=
target_pool
.match_sequence_hashes
(
vec!
[
request
.sequence_hash
]
.as_slice
())
.match_sequence_hashes
(
vec!
[
request
.sequence_hash
]
.as_slice
())
.await
.await
&&
!
blocks
.is_empty
()
{
{
if
!
blocks
.is_empty
()
{
continue
;
continue
;
}
}
}
let
target_block
=
'target_block
:
{
let
target_block
=
'target_block
:
{
if
let
Ok
(
blocks
)
=
target_pool
.allocate_blocks
(
1
)
.await
{
if
let
Ok
(
blocks
)
=
target_pool
.allocate_blocks
(
1
)
.await
if
let
Some
(
block
)
=
blocks
.into_iter
()
.next
()
{
&&
let
Some
(
block
)
=
blocks
.into_iter
()
.next
()
break
'target_block
Some
(
block
);
{
}
break
'target_block
Some
(
block
);
}
}
tracing
::
warn!
(
"Target pool full. Skipping offload. This should only ever happen with very small pool sizes."
);
tracing
::
warn!
(
"Target pool full. Skipping offload. This should only ever happen with very small pool sizes."
);
None
None
};
};
...
@@ -504,14 +506,14 @@ impl<Locality: LocalityProvider + 'static, Metadata: BlockMetadata>
...
@@ -504,14 +506,14 @@ impl<Locality: LocalityProvider + 'static, Metadata: BlockMetadata>
}
}
}
}
if
let
Some
(
targets
)
=
targets
.as_ref
()
{
if
let
Some
(
targets
)
=
targets
.as_ref
()
if
targets
.len
()
!=
blocks
.len
()
{
&&
targets
.len
()
!=
blocks
.len
()
tx
.send
(
Err
(
BlockPoolError
::
BlockError
(
BlockError
::
Other
(
{
anyhow
::
anyhow!
(
"Number of targets does not match number of blocks."
),
tx
.send
(
Err
(
BlockPoolError
::
BlockError
(
BlockError
::
Other
(
))))
anyhow
::
anyhow!
(
"Number of targets does not match number of blocks."
),
.unwrap
();
))))
return
rx
;
.unwrap
()
;
}
return
rx
;
}
}
if
blocks
.is_empty
()
{
if
blocks
.is_empty
()
{
...
@@ -582,16 +584,16 @@ mod tests {
...
@@ -582,16 +584,16 @@ mod tests {
use
super
::
*
;
use
super
::
*
;
use
crate
::
block_manager
::{
use
crate
::
block_manager
::{
LayoutConfig
,
NixlRegisterableStorage
,
block
::{
block
::{
locality
::
Local
,
BasicMetadata
,
BlockDataExt
,
BlockDataProvider
,
Blocks
,
MutableBlock
,
BasicMetadata
,
BlockDataExt
,
BlockDataProvider
,
Blocks
,
MutableBlock
,
locality
::
Local
,
},
},
layout
::{
nixl
::
NixlLayout
,
FullyContiguous
,
LayerSeparate
,
LayoutType
},
layout
::{
FullyContiguous
,
LayerSeparate
,
LayoutType
,
nixl
::
NixlLayout
},
pool
::{
BlockRegistrationDuplicationSetting
,
ManagedBlockPool
},
pool
::{
BlockRegistrationDuplicationSetting
,
ManagedBlockPool
},
storage
::{
storage
::{
DeviceAllocator
,
DeviceStorage
,
DiskAllocator
,
DiskStorage
,
PinnedAllocator
,
DeviceAllocator
,
DeviceStorage
,
DiskAllocator
,
DiskStorage
,
PinnedAllocator
,
PinnedStorage
,
StorageAllocator
,
StorageType
,
PinnedStorage
,
StorageAllocator
,
StorageType
,
},
},
LayoutConfig
,
NixlRegisterableStorage
,
};
};
use
crate
::
tokens
::{
TokenBlockSequence
,
Tokens
};
use
crate
::
tokens
::{
TokenBlockSequence
,
Tokens
};
use
nixl_sys
::{
MemoryRegion
,
NixlDescriptor
};
use
nixl_sys
::{
MemoryRegion
,
NixlDescriptor
};
...
...
lib/llm/src/block_manager/offload/pending.rs
View file @
bce74588
...
@@ -48,10 +48,10 @@ use tokio::sync::{mpsc, oneshot};
...
@@ -48,10 +48,10 @@ use tokio::sync::{mpsc, oneshot};
use
tokio_util
::
sync
::
CancellationToken
;
use
tokio_util
::
sync
::
CancellationToken
;
use
crate
::
block_manager
::
block
::{
use
crate
::
block_manager
::
block
::{
locality
::
LocalityProvider
,
transfer
::{
TransferContext
,
WriteTo
,
WriteToStrategy
},
BlockDataProvider
,
BlockDataProviderMut
,
BlockError
,
BlockMetadata
,
BlockState
,
ImmutableBlock
,
BlockDataProvider
,
BlockDataProviderMut
,
BlockError
,
BlockMetadata
,
BlockState
,
ImmutableBlock
,
MutableBlock
,
ReadableBlock
,
WritableBlock
,
MutableBlock
,
ReadableBlock
,
WritableBlock
,
locality
::
LocalityProvider
,
transfer
::{
TransferContext
,
WriteTo
,
WriteToStrategy
},
};
};
use
crate
::
block_manager
::
metrics
::
PoolMetrics
;
use
crate
::
block_manager
::
metrics
::
PoolMetrics
;
use
crate
::
block_manager
::
pool
::{
BlockPool
,
BlockPoolError
};
use
crate
::
block_manager
::
pool
::{
BlockPool
,
BlockPoolError
};
...
@@ -59,7 +59,7 @@ use crate::block_manager::storage::{Local, Storage};
...
@@ -59,7 +59,7 @@ use crate::block_manager::storage::{Local, Storage};
use
anyhow
::
Result
;
use
anyhow
::
Result
;
use
async_trait
::
async_trait
;
use
async_trait
::
async_trait
;
use
futures
::{
stream
::
FuturesUnordered
,
StreamExt
};
use
futures
::{
StreamExt
,
stream
::
FuturesUnordered
};
use
super
::
BlockResult
;
use
super
::
BlockResult
;
...
...
lib/llm/src/block_manager/offload/request.rs
View file @
bce74588
...
@@ -18,7 +18,7 @@ use std::sync::Weak;
...
@@ -18,7 +18,7 @@ use std::sync::Weak;
use
tokio
::
sync
::
oneshot
;
use
tokio
::
sync
::
oneshot
;
use
crate
::
block_manager
::
block
::{
use
crate
::
block_manager
::
block
::{
locality
::
LocalityProvider
,
BlockMetadata
,
ImmutableBlock
,
MutableBlock
,
BlockMetadata
,
ImmutableBlock
,
MutableBlock
,
locality
::
LocalityProvider
,
};
};
use
crate
::
block_manager
::
pool
::
BlockPoolError
;
use
crate
::
block_manager
::
pool
::
BlockPoolError
;
use
crate
::
block_manager
::
storage
::
Storage
;
use
crate
::
block_manager
::
storage
::
Storage
;
...
...
lib/llm/src/block_manager/pool.rs
View file @
bce74588
...
@@ -23,15 +23,15 @@ use serde::{Deserialize, Serialize};
...
@@ -23,15 +23,15 @@ use serde::{Deserialize, Serialize};
pub
use
super
::
block
::{
ImmutableBlock
,
MutableBlock
};
pub
use
super
::
block
::{
ImmutableBlock
,
MutableBlock
};
use
super
::
block
::{
use
super
::
block
::{
nixl
::
short_type_name
,
private
,
registry
::
BlockRegistry
,
Block
,
BlockError
,
BlockMetadata
,
Block
,
BlockError
,
BlockMetadata
,
GlobalRegistry
,
MaybeReturnableBlock
,
nixl
::
short_type_name
,
GlobalRegistry
,
MaybeReturnableBlock
,
private
,
registry
::
BlockRegistry
,
};
};
use
super
::
events
::{
EventManager
,
NullEventManager
};
use
super
::
events
::{
EventManager
,
NullEventManager
};
use
super
::
metrics
::{
BlockManagerMetrics
,
PoolMetrics
};
use
super
::
metrics
::{
BlockManagerMetrics
,
PoolMetrics
};
use
super
::
storage
::
Storage
;
use
super
::
storage
::
Storage
;
use
crate
::
block_manager
::
block
::
locality
::
LocalityProvider
;
use
crate
::
block_manager
::
CacheLevel
;
use
crate
::
block_manager
::
CacheLevel
;
use
crate
::
block_manager
::
block
::
locality
::
LocalityProvider
;
use
crate
::
tokens
::{
SequenceHash
,
TokenBlock
};
use
crate
::
tokens
::{
SequenceHash
,
TokenBlock
};
use
async_trait
::
async_trait
;
use
async_trait
::
async_trait
;
...
...
Prev
1
2
3
4
5
6
7
8
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment