Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
9ab148dc
Unverified
Commit
9ab148dc
authored
Mar 01, 2026
by
Ryan Olson
Committed by
GitHub
Mar 01, 2026
Browse files
feat: kvbm-physical (#6490)
Signed-off-by:
Ryan Olson
<
rolson@nvidia.com
>
parent
7546c193
Changes
54
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
3896 additions
and
47 deletions
+3896
-47
lib/kvbm-physical/src/transfer/notifications/mod.rs
lib/kvbm-physical/src/transfer/notifications/mod.rs
+193
-0
lib/kvbm-physical/src/transfer/notifications/nixl_events.rs
lib/kvbm-physical/src/transfer/notifications/nixl_events.rs
+195
-0
lib/kvbm-physical/src/transfer/notifications/nixl_status.rs
lib/kvbm-physical/src/transfer/notifications/nixl_status.rs
+33
-0
lib/kvbm-physical/src/transfer/notifications/notification.rs
lib/kvbm-physical/src/transfer/notifications/notification.rs
+159
-0
lib/kvbm-physical/src/transfer/options.rs
lib/kvbm-physical/src/transfer/options.rs
+150
-0
lib/kvbm-physical/src/transfer/preferences.rs
lib/kvbm-physical/src/transfer/preferences.rs
+122
-0
lib/kvbm-physical/src/transfer/strategy.rs
lib/kvbm-physical/src/transfer/strategy.rs
+503
-0
lib/kvbm-physical/src/transfer/testing.rs
lib/kvbm-physical/src/transfer/testing.rs
+365
-0
lib/kvbm-physical/src/transfer/tests/local_transfers.rs
lib/kvbm-physical/src/transfer/tests/local_transfers.rs
+1081
-0
lib/kvbm-physical/src/transfer/tests/mod.rs
lib/kvbm-physical/src/transfer/tests/mod.rs
+572
-0
lib/kvbm-physical/src/transfer/validation.rs
lib/kvbm-physical/src/transfer/validation.rs
+465
-0
lib/memory/src/nixl.rs
lib/memory/src/nixl.rs
+2
-1
lib/memory/src/nixl/agent.rs
lib/memory/src/nixl/agent.rs
+4
-1
lib/runtime/examples/Cargo.lock
lib/runtime/examples/Cargo.lock
+52
-45
No files found.
lib/kvbm-physical/src/transfer/notifications/mod.rs
0 → 100644
View file @
9ab148dc
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Transfer completion notification system.
//!
//! This module provides abstractions for waiting on transfer completions using different
//! mechanisms: polling-based (NIXL status, CUDA events) and event-based (NIXL notifications).
use
std
::
collections
::
HashMap
;
use
std
::
sync
::
Arc
;
use
std
::
time
::{
Duration
,
Instant
};
use
anyhow
::
Result
;
use
tokio
::
sync
::
mpsc
;
use
tokio
::
time
::
interval
;
use
tracing
::{
error
,
warn
};
use
uuid
::
Uuid
;
use
velo_events
::{
EventHandle
,
EventManager
};
pub
mod
cuda_event
;
pub
mod
nixl_events
;
pub
mod
nixl_status
;
pub
mod
notification
;
pub
use
cuda_event
::
CudaEventChecker
;
pub
use
nixl_events
::{
RegisterNixlNotification
,
process_nixl_notification_events
};
pub
use
nixl_status
::
NixlStatusChecker
;
pub
use
notification
::
TransferCompleteNotification
;
/// Trait for checking if a transfer operation has completed.
/// Supports polling-based completion checks (NIXL status, CUDA events).
pub
trait
CompletionChecker
:
Send
{
/// Returns true if the transfer is complete, false if still pending.
fn
is_complete
(
&
self
)
->
Result
<
bool
>
;
}
/// Registration message for polling-based transfer completion.
pub
struct
RegisterPollingNotification
<
C
:
CompletionChecker
>
{
pub
uuid
:
Uuid
,
pub
checker
:
C
,
pub
event_handle
:
EventHandle
,
}
/// Tracking struct for outstanding polling-based transfers.
struct
OutstandingPollingTransfer
<
C
:
CompletionChecker
>
{
checker
:
C
,
event_handle
:
EventHandle
,
arrived_at
:
Instant
,
last_warned_at
:
Option
<
Instant
>
,
}
/// Helper function to check if a transfer should be warned about and log the warning.
/// Returns the new last_warned_at time if a warning was issued.
fn
check_and_warn_slow_transfer
(
uuid
:
&
Uuid
,
arrived_at
:
Instant
,
last_warned_at
:
Option
<
Instant
>
,
)
->
Option
<
Instant
>
{
let
elapsed
=
arrived_at
.elapsed
();
if
elapsed
>
Duration
::
from_secs
(
60
)
{
let
should_warn
=
last_warned_at
.map
(|
last
|
last
.elapsed
()
>
Duration
::
from_secs
(
30
))
.unwrap_or
(
true
);
if
should_warn
{
warn!
(
uuid
=
%
uuid
,
elapsed_secs
=
elapsed
.as_secs
(),
"Transfer has been pending for over 1 minute"
);
return
Some
(
Instant
::
now
());
}
}
last_warned_at
}
/// Generic polling-based transfer completion handler.
/// Works with any CompletionChecker implementation (NIXL status, CUDA events, etc.)
pub
async
fn
process_polling_notifications
<
C
:
CompletionChecker
>
(
mut
rx
:
mpsc
::
Receiver
<
RegisterPollingNotification
<
C
>>
,
system
:
Arc
<
EventManager
>
,
)
{
let
mut
outstanding
:
HashMap
<
Uuid
,
OutstandingPollingTransfer
<
C
>>
=
HashMap
::
new
();
let
mut
check_interval
=
interval
(
Duration
::
from_millis
(
1
));
loop
{
tokio
::
select!
{
// Handle new transfer requests
notification
=
rx
.recv
()
=>
{
match
notification
{
Some
(
notif
)
=>
{
outstanding
.insert
(
notif
.uuid
,
OutstandingPollingTransfer
{
checker
:
notif
.checker
,
event_handle
:
notif
.event_handle
,
arrived_at
:
Instant
::
now
(),
last_warned_at
:
None
,
});
}
None
=>
{
// Channel closed, finish processing outstanding transfers then exit
break
;
}
}
}
// Periodically check status of outstanding transfers
_
=
check_interval
.tick
(),
if
!
outstanding
.is_empty
()
=>
{
let
mut
completed
=
Vec
::
new
();
for
(
uuid
,
transfer
)
in
outstanding
.iter_mut
()
{
// Check transfer status
match
transfer
.checker
.is_complete
()
{
Ok
(
true
)
=>
{
// Transfer complete - mark for removal
completed
.push
((
*
uuid
,
Ok
(())));
}
Ok
(
false
)
=>
{
// Transfer still in progress - check if we should warn
transfer
.last_warned_at
=
check_and_warn_slow_transfer
(
uuid
,
transfer
.arrived_at
,
transfer
.last_warned_at
,
);
}
Err
(
e
)
=>
{
warn!
(
uuid
=
%
uuid
,
error
=
%
e
,
"Transfer status check failed"
);
completed
.push
((
*
uuid
,
Err
(
e
)));
}
}
}
// Remove completed transfers and signal completion
for
(
uuid
,
result
)
in
completed
{
if
let
Some
(
transfer
)
=
outstanding
.remove
(
&
uuid
)
{
// Signal completion via Nova event system
match
result
{
Ok
(())
=>
{
if
let
Err
(
e
)
=
system
.trigger
(
transfer
.event_handle
)
{
error!
(
uuid
=
%
uuid
,
error
=
%
e
,
"Failed to trigger completion event"
);
}
}
Err
(
e
)
=>
{
if
let
Err
(
err
)
=
system
.poison
(
transfer
.event_handle
,
e
.to_string
())
{
error!
(
uuid
=
%
uuid
,
error
=
%
err
,
"Failed to poison completion event"
);
}
}
}
}
}
}
}
}
// Channel closed, but we may still have outstanding transfers
// Continue processing them until all are complete
while
!
outstanding
.is_empty
()
{
check_interval
.tick
()
.await
;
let
mut
completed
=
Vec
::
new
();
for
(
uuid
,
transfer
)
in
outstanding
.iter_mut
()
{
match
transfer
.checker
.is_complete
()
{
Ok
(
true
)
=>
completed
.push
((
*
uuid
,
Ok
(()))),
Ok
(
false
)
=>
{}
Err
(
e
)
=>
completed
.push
((
*
uuid
,
Err
(
e
))),
}
}
for
(
uuid
,
result
)
in
completed
{
if
let
Some
(
transfer
)
=
outstanding
.remove
(
&
uuid
)
{
match
result
{
Ok
(())
=>
{
let
_
=
system
.trigger
(
transfer
.event_handle
);
}
Err
(
e
)
=>
{
let
_
=
system
.poison
(
transfer
.event_handle
,
e
.to_string
());
}
}
}
}
}
}
lib/kvbm-physical/src/transfer/notifications/nixl_events.rs
0 → 100644
View file @
9ab148dc
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! NIXL notification-based completion handler.
use
std
::
collections
::
HashMap
;
use
std
::
sync
::
Arc
;
use
std
::
time
::{
Duration
,
Instant
};
use
dynamo_memory
::
nixl
::{
Agent
as
NixlAgent
,
NotificationMap
,
XferRequest
};
use
tokio
::
sync
::
mpsc
;
use
tokio
::
time
::
interval
;
use
tracing
::{
error
,
warn
};
use
uuid
::
Uuid
;
use
velo_events
::{
EventHandle
,
EventManager
};
/// Registration message for NIXL notification-based transfer completion.
pub
struct
RegisterNixlNotification
{
pub
uuid
:
Uuid
,
pub
xfer_req
:
XferRequest
,
pub
event_handle
:
EventHandle
,
}
/// Tracking struct for outstanding NIXL notification transfers.
struct
OutstandingTransfer
{
#[allow(dead_code)]
// Kept for potential future cleanup or debugging
xfer_req
:
XferRequest
,
event_handle
:
EventHandle
,
arrived_at
:
Instant
,
last_warned_at
:
Option
<
Instant
>
,
}
/// Helper function to check if a transfer should be warned about and log the warning.
/// Returns the new last_warned_at time if a warning was issued.
fn
check_and_warn_slow_transfer
(
uuid
:
&
Uuid
,
arrived_at
:
Instant
,
last_warned_at
:
Option
<
Instant
>
,
)
->
Option
<
Instant
>
{
let
elapsed
=
arrived_at
.elapsed
();
if
elapsed
>
Duration
::
from_secs
(
60
)
{
let
should_warn
=
last_warned_at
.map
(|
last
|
last
.elapsed
()
>
Duration
::
from_secs
(
30
))
.unwrap_or
(
true
);
if
should_warn
{
warn!
(
uuid
=
%
uuid
,
elapsed_secs
=
elapsed
.as_secs
(),
"Transfer has been pending for over 1 minute"
);
return
Some
(
Instant
::
now
());
}
}
last_warned_at
}
/// NIXL notification-based transfer completion handler.
/// Fetches notifications in batches and matches them against outstanding transfers.
pub
async
fn
process_nixl_notification_events
(
agent
:
NixlAgent
,
mut
rx
:
mpsc
::
Receiver
<
RegisterNixlNotification
>
,
system
:
Arc
<
EventManager
>
,
)
{
let
mut
outstanding
:
HashMap
<
Uuid
,
OutstandingTransfer
>
=
HashMap
::
new
();
let
mut
check_interval
=
interval
(
Duration
::
from_millis
(
1
));
loop
{
tokio
::
select!
{
// Handle new transfer requests
notification
=
rx
.recv
()
=>
{
match
notification
{
Some
(
notif
)
=>
{
outstanding
.insert
(
notif
.uuid
,
OutstandingTransfer
{
xfer_req
:
notif
.xfer_req
,
event_handle
:
notif
.event_handle
,
arrived_at
:
Instant
::
now
(),
last_warned_at
:
None
,
});
}
None
=>
{
// Channel closed, finish processing outstanding transfers then exit
break
;
}
}
}
// Periodically fetch and process notifications
_
=
check_interval
.tick
(),
if
!
outstanding
.is_empty
()
=>
{
// Create notification map inside this branch to avoid Send issues
let
mut
notif_map
=
match
NotificationMap
::
new
()
{
Ok
(
map
)
=>
map
,
Err
(
e
)
=>
{
warn!
(
error
=
%
e
,
"Failed to create notification map"
);
continue
;
}
};
// Fetch all pending notifications
if
let
Err
(
e
)
=
agent
.get_notifications
(
&
mut
notif_map
,
None
)
{
warn!
(
error
=
%
e
,
"Failed to fetch NIXL notifications"
);
continue
;
}
// Process notifications and match against outstanding transfers
let
notifications
=
match
notif_map
.take_notifs
()
{
Ok
(
notifs
)
=>
notifs
,
Err
(
e
)
=>
{
warn!
(
error
=
%
e
,
"Failed to extract notifications from map"
);
continue
;
}
};
let
mut
completed
=
Vec
::
new
();
// Iterate through all notifications
for
(
_
agent_name
,
notif_strings
)
in
notifications
{
for
notif_str
in
notif_strings
{
// Try to parse notification as UUID
// NOTE: This assumes notifications contain UUIDs.
// The actual format may be different and may need adjustment.
if
let
Ok
(
notif_uuid
)
=
Uuid
::
parse_str
(
&
notif_str
)
{
if
outstanding
.contains_key
(
&
notif_uuid
)
{
completed
.push
(
notif_uuid
);
}
else
{
// Notification arrived before we started waiting for it
// This is the race condition we need to handle
warn!
(
uuid
=
%
notif_uuid
,
"Received notification for transfer not in outstanding map (early arrival)"
);
}
}
}
}
// Check for slow transfers and update warnings
for
(
uuid
,
transfer
)
in
outstanding
.iter_mut
()
{
if
!
completed
.contains
(
uuid
)
{
transfer
.last_warned_at
=
check_and_warn_slow_transfer
(
uuid
,
transfer
.arrived_at
,
transfer
.last_warned_at
,
);
}
}
// Remove completed transfers and signal completion
for
uuid
in
completed
{
if
let
Some
(
transfer
)
=
outstanding
.remove
(
&
uuid
)
&&
let
Err
(
e
)
=
system
.trigger
(
transfer
.event_handle
)
{
error!
(
uuid
=
%
uuid
,
error
=
%
e
,
"Failed to trigger completion event"
);
}
}
}
}
}
// Channel closed, but we may still have outstanding transfers
// Continue processing them until all are complete
while
!
outstanding
.is_empty
()
{
check_interval
.tick
()
.await
;
let
mut
notif_map
=
match
NotificationMap
::
new
()
{
Ok
(
map
)
=>
map
,
Err
(
_
)
=>
continue
,
};
if
let
Ok
(())
=
agent
.get_notifications
(
&
mut
notif_map
,
None
)
&&
let
Ok
(
notifications
)
=
notif_map
.take_notifs
()
{
let
mut
completed
=
Vec
::
new
();
for
(
_
agent_name
,
notif_strings
)
in
notifications
{
for
notif_str
in
notif_strings
{
if
let
Ok
(
notif_uuid
)
=
Uuid
::
parse_str
(
&
notif_str
)
&&
outstanding
.contains_key
(
&
notif_uuid
)
{
completed
.push
(
notif_uuid
);
}
}
}
for
uuid
in
completed
{
if
let
Some
(
transfer
)
=
outstanding
.remove
(
&
uuid
)
{
let
_
=
system
.trigger
(
transfer
.event_handle
);
}
}
}
}
}
lib/kvbm-physical/src/transfer/notifications/nixl_status.rs
0 → 100644
View file @
9ab148dc
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! NIXL status polling-based completion checker.
use
anyhow
::{
Result
,
anyhow
};
use
dynamo_memory
::
nixl
::{
Agent
as
NixlAgent
,
XferRequest
};
use
super
::
CompletionChecker
;
/// Completion checker that polls NIXL transfer status.
pub
struct
NixlStatusChecker
{
agent
:
NixlAgent
,
xfer_req
:
XferRequest
,
}
impl
NixlStatusChecker
{
pub
fn
new
(
agent
:
NixlAgent
,
xfer_req
:
XferRequest
)
->
Self
{
Self
{
agent
,
xfer_req
}
}
}
impl
CompletionChecker
for
NixlStatusChecker
{
fn
is_complete
(
&
self
)
->
Result
<
bool
>
{
// get_xfer_status returns XferStatus enum:
// - XferStatus::Success means transfer is complete
// - XferStatus::InProgress means still pending
match
self
.agent
.get_xfer_status
(
&
self
.xfer_req
)
{
Ok
(
status
)
=>
Ok
(
status
.is_success
()),
Err
(
e
)
=>
Err
(
anyhow!
(
"NIXL transfer status check failed: {}"
,
e
)),
}
}
}
lib/kvbm-physical/src/transfer/notifications/notification.rs
0 → 100644
View file @
9ab148dc
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Transfer completion notification handle.
use
anyhow
::
Result
;
use
futures
::
future
::{
Either
,
Ready
,
ready
};
use
std
::{
pin
::
Pin
,
sync
::
Arc
,
task
::{
Context
,
Poll
},
};
use
velo_events
::{
Event
,
EventAwaiter
,
EventManager
};
pub
enum
TransferAwaiter
{
Local
(
EventAwaiter
),
// Sync(SyncResult),
}
impl
std
::
future
::
Future
for
TransferAwaiter
{
type
Output
=
Result
<
()
>
;
fn
poll
(
self
:
Pin
<&
mut
Self
>
,
cx
:
&
mut
Context
<
'_
>
)
->
Poll
<
Self
::
Output
>
{
match
self
.get_mut
()
{
Self
::
Local
(
waiter
)
=>
Pin
::
new
(
waiter
)
.poll
(
cx
),
// Self::Sync(sync) => Pin::new(sync).poll(cx),
}
}
}
/// Notification handle for an in-progress transfer.
///
/// This object can be awaited to block until the transfer completes.
/// The transfer is tracked by a background handler that polls for completion
/// or processes notification events.
///
/// Uses `futures::Either` to avoid event system overhead for synchronous completions.
/// Pending transfers use `LocalEventWaiter` which avoids heap allocation and repeated
/// DashMap lookups when awaiting.
pub
struct
TransferCompleteNotification
{
awaiter
:
Either
<
Ready
<
Result
<
()
>>
,
TransferAwaiter
>
,
}
impl
TransferCompleteNotification
{
/// Create a notification that is already completed (for synchronous transfers).
///
/// This is useful for transfers that complete immediately without needing
/// background polling, such as memcpy operations.
///
/// This is extremely efficient - no allocations, locks, or event system overhead.
pub
fn
completed
()
->
Self
{
Self
{
awaiter
:
Either
::
Left
(
ready
(
Ok
(()))),
}
}
/// Create a notification from a `LocalEventWaiter`.
///
/// This is the primary way to construct a notification when you already
/// have an event waiter from the event system.
pub
fn
from_awaiter
(
awaiter
:
EventAwaiter
)
->
Self
{
Self
{
awaiter
:
Either
::
Right
(
TransferAwaiter
::
Local
(
awaiter
)),
}
}
// /// Create a notification from a synchronous active message result.
// pub fn from_sync_result(sync: SyncResult) -> Self {
// Self {
// awaiter: Either::Right(TransferAwaiter::Sync(sync)),
// }
// }
/// Check if the notification can yield the current task.
///
/// The internal ::Left arm is guaranteed to be ready, while the ::Right arm is not.
pub
fn
could_yield
(
&
self
)
->
bool
{
matches!
(
self
.awaiter
,
Either
::
Right
(
_
))
}
/// Aggregate multiple notifications into one that completes when all are done.
///
/// This is useful when a transfer is split across multiple workers and you want
/// to wait for all of them to complete.
///
/// # Arguments
/// * `notifications` - The notifications to aggregate
/// * `events` - The event system to create the aggregate event
/// * `runtime` - The tokio runtime handle to spawn the aggregation task
///
/// # Behavior
/// - If the list is empty, returns an already-completed notification
/// - If there's only one, returns it directly
/// - Otherwise, creates a new event and spawns a task to await all notifications
pub
fn
aggregate
(
notifications
:
Vec
<
Self
>
,
events
:
&
Arc
<
EventManager
>
,
runtime
:
&
tokio
::
runtime
::
Handle
,
)
->
Result
<
Self
>
{
if
notifications
.is_empty
()
{
return
Ok
(
Self
::
completed
());
}
if
notifications
.len
()
==
1
{
return
Ok
(
notifications
.into_iter
()
.next
()
.unwrap
());
}
// Check if all notifications are already complete (no yielding needed)
if
notifications
.iter
()
.all
(|
n
|
!
n
.could_yield
())
{
return
Ok
(
Self
::
completed
());
}
// Create a new event for the aggregate completion
let
event
=
events
.new_event
()
?
;
let
awaiter
=
events
.awaiter
(
event
.handle
())
?
;
// Spawn task that awaits all notifications and triggers/poisons the event
runtime
.spawn
(
await_all_notifications
(
notifications
,
event
));
Ok
(
Self
::
from_awaiter
(
awaiter
))
}
}
/// Awaits all transfer notifications and signals completion via the event.
///
/// This function awaits ALL notifications regardless of individual failures,
/// then triggers the event on success or poisons it with error details on failure.
async
fn
await_all_notifications
(
notifications
:
Vec
<
TransferCompleteNotification
>
,
local_event
:
Event
,
)
{
// Await all notifications, collecting results
let
results
:
Vec
<
Result
<
()
>>
=
futures
::
future
::
join_all
(
notifications
.into_iter
()
.map
(|
n
|
n
.into_future
()))
.await
;
// Check for any failures
let
errors
:
Vec
<
_
>
=
results
.into_iter
()
.filter_map
(|
r
|
r
.err
())
.collect
();
if
errors
.is_empty
()
{
// Ignore trigger error - if event system is shutdown, nothing to do
let
_
=
local_event
.trigger
();
}
else
{
let
error_msg
=
errors
.iter
()
.map
(|
e
|
e
.to_string
())
.collect
::
<
Vec
<
_
>>
()
.join
(
"; "
);
// Ignore poison error - if event system is shutdown, nothing to do
let
_
=
local_event
.poison
(
error_msg
);
}
}
impl
std
::
future
::
IntoFuture
for
TransferCompleteNotification
{
type
Output
=
Result
<
()
>
;
type
IntoFuture
=
Either
<
Ready
<
Result
<
()
>>
,
TransferAwaiter
>
;
fn
into_future
(
self
)
->
Self
::
IntoFuture
{
self
.awaiter
}
}
lib/kvbm-physical/src/transfer/options.rs
0 → 100644
View file @
9ab148dc
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Transfer options for configuring block and layer transfers.
use
super
::
BounceBuffer
;
use
crate
::
layout
::
KvBlockLayout
;
use
cudarc
::
driver
::
CudaStream
;
use
derive_builder
::
Builder
;
use
derive_getters
::
Dissolve
;
use
std
::
ops
::
Range
;
use
std
::
sync
::
Arc
;
/// Options for configuring transfer operations.
///
/// This structure provides configuration for block and layer transfers,
/// including layer ranges, NIXL write notifications, and bounce buffers.
///
/// # Examples
///
/// ```rust,ignore
/// let options = TransferOptions::builder()
/// .nixl_write_notification(42)
/// .layer_range(0..10)
/// .build();
/// ```
#[derive(Clone,
Default,
Builder,
Dissolve)]
#[builder(pattern
=
"owned"
,
default)]
pub
struct
TransferOptions
{
/// Range of layers to transfer (None = all layers).
///
/// When specified, only the layers in this range will be transferred.
/// This is useful for partial block transfers or layer-specific operations.
#[builder(default,
setter(strip_option))]
pub
layer_range
:
Option
<
Range
<
usize
>>
,
/// NIXL write notification value delivered after RDMA write completes.
///
/// When specified, NIXL will deliver this notification value to the remote
/// node after the RDMA write operation completes. This enables efficient
/// notification of transfer completion without requiring polling.
#[builder(default,
setter(strip_option))]
pub
nixl_write_notification
:
Option
<
u64
>
,
/// Bounce buffer specification for multi-hop transfers.
///
/// When direct transfers are not allowed or efficient, this specifies
/// an intermediate staging area. The transfer will be split into two hops:
/// source → bounce buffer → destination.
#[builder(default,
setter(strip_option,
into))]
pub
bounce_buffer
:
Option
<
BounceBuffer
>
,
/// Optional caller-provided CUDA stream for the transfer.
///
/// When provided, the transfer executor will use this stream instead of
/// acquiring one from the pool. The caller is responsible for synchronization -
/// no event is recorded by the executor.
///
/// This is useful for layer-wise transfers where all layers must execute
/// on the same stream to allow proper event sequencing.
#[builder(default,
setter(strip_option))]
pub
cuda_stream
:
Option
<
Arc
<
CudaStream
>>
,
/// Override source block layout interpretation.
///
/// When set, the transfer executor will treat source blocks as having
/// this layout instead of the layout's default block_layout().
/// This enables transferring blocks that are stored in one format
/// but should be interpreted as another (e.g., operational → universal).
#[builder(default,
setter(strip_option))]
pub
src_kv_layout
:
Option
<
KvBlockLayout
>
,
/// Override destination block layout interpretation.
///
/// When set, the transfer executor will treat destination blocks as having
/// this layout instead of the layout's default block_layout().
/// This enables writing blocks in a different format than the destination
/// layout's native format.
#[builder(default,
setter(strip_option))]
pub
dst_kv_layout
:
Option
<
KvBlockLayout
>
,
}
impl
TransferOptions
{
/// Create a new builder for transfer options.
pub
fn
builder
()
->
TransferOptionsBuilder
{
TransferOptionsBuilder
::
default
()
}
/// Create transfer options from an optional layer range.
pub
fn
from_layer_range
(
layer_range
:
Option
<
Range
<
usize
>>
)
->
Self
{
Self
{
layer_range
,
..
Self
::
default
()
}
}
/// Create default transfer options.
///
/// This transfers all layers with no special configuration.
pub
fn
new
()
->
Self
{
Self
::
default
()
}
}
#[cfg(all(test,
feature
=
"testing-kvbm"
))]
mod
tests
{
use
super
::
*
;
#[test]
fn
test_default_options
()
{
let
options
=
TransferOptions
::
default
();
assert
!
(
options
.layer_range
.is_none
());
assert
!
(
options
.nixl_write_notification
.is_none
());
assert
!
(
options
.bounce_buffer
.is_none
());
}
#[test]
fn
test_builder_with_notification
()
{
let
options
=
TransferOptions
::
builder
()
.nixl_write_notification
(
42
)
.build
()
.unwrap
();
assert_eq!
(
options
.nixl_write_notification
,
Some
(
42
));
assert
!
(
options
.layer_range
.is_none
());
}
#[test]
fn
test_builder_with_layer_range
()
{
let
options
=
TransferOptions
::
builder
()
.layer_range
(
0
..
10
)
.build
()
.unwrap
();
assert_eq!
(
options
.layer_range
,
Some
(
0
..
10
));
assert
!
(
options
.nixl_write_notification
.is_none
());
}
#[test]
fn
test_builder_with_all_options
()
{
let
options
=
TransferOptions
::
builder
()
.nixl_write_notification
(
100
)
.layer_range
(
5
..
15
)
.build
()
.unwrap
();
assert_eq!
(
options
.nixl_write_notification
,
Some
(
100
));
assert_eq!
(
options
.layer_range
,
Some
(
5
..
15
));
}
}
lib/kvbm-physical/src/transfer/preferences.rs
0 → 100644
View file @
9ab148dc
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
#![allow(dead_code)]
//! Transfer preferences for resolving redundant strategy choices.
//!
//! Some source/destination combinations can use multiple transfer strategies.
//! For example:
//! - System ↔ Pinned: memcpy or NIXL
//! - Pinned ↔ Device: CUDA or NIXL
//!
//! This module provides preferences to control which strategy to prefer.
use
serde
::{
Deserialize
,
Serialize
};
/// Policy for choosing between native transports (memcpy/CUDA) and NIXL.
#[derive(Debug,
Clone,
Copy,
PartialEq,
Eq,
Serialize,
Deserialize,
Default)]
pub
enum
NativeVsNixlPolicy
{
/// Always prefer native transports (memcpy/CUDA) when available
PreferNative
,
/// Always prefer NIXL when available
PreferNixl
,
/// Use native for local-to-local, NIXL for remote/disk
#[default]
Automatic
,
}
/// Transfer preferences for strategy selection.
///
/// These preferences allow fine-grained control over transfer strategy selection
/// when multiple valid strategies exist for a source/destination pair.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
TransferPreferences
{
/// Policy for native vs NIXL transport selection
pub
native_vs_nixl
:
NativeVsNixlPolicy
,
/// Whether to prefer async CUDA operations over blocking ones
pub
prefer_async_cuda
:
bool
,
}
impl
Default
for
TransferPreferences
{
fn
default
()
->
Self
{
Self
{
native_vs_nixl
:
NativeVsNixlPolicy
::
default
(),
prefer_async_cuda
:
true
,
}
}
}
impl
TransferPreferences
{
/// Create preferences with all defaults.
pub
fn
new
()
->
Self
{
Self
::
default
()
}
/// Create preferences that always prefer native transports.
pub
fn
prefer_native
()
->
Self
{
Self
{
native_vs_nixl
:
NativeVsNixlPolicy
::
PreferNative
,
prefer_async_cuda
:
true
,
}
}
/// Create preferences that always prefer NIXL.
pub
fn
prefer_nixl
()
->
Self
{
Self
{
native_vs_nixl
:
NativeVsNixlPolicy
::
PreferNixl
,
prefer_async_cuda
:
true
,
}
}
/// Set the native vs NIXL policy.
pub
fn
with_native_vs_nixl
(
mut
self
,
policy
:
NativeVsNixlPolicy
)
->
Self
{
self
.native_vs_nixl
=
policy
;
self
}
/// Set whether to prefer async CUDA operations.
pub
fn
with_async_cuda
(
mut
self
,
prefer_async
:
bool
)
->
Self
{
self
.prefer_async_cuda
=
prefer_async
;
self
}
}
#[cfg(all(test,
feature
=
"testing-kvbm"
))]
mod
tests
{
use
super
::
*
;
#[test]
fn
test_default_preferences
()
{
let
prefs
=
TransferPreferences
::
default
();
assert_eq!
(
prefs
.native_vs_nixl
,
NativeVsNixlPolicy
::
Automatic
);
assert
!
(
prefs
.prefer_async_cuda
);
}
#[test]
fn
test_prefer_native
()
{
let
prefs
=
TransferPreferences
::
prefer_native
();
assert_eq!
(
prefs
.native_vs_nixl
,
NativeVsNixlPolicy
::
PreferNative
);
assert
!
(
prefs
.prefer_async_cuda
);
}
#[test]
fn
test_prefer_nixl
()
{
let
prefs
=
TransferPreferences
::
prefer_nixl
();
assert_eq!
(
prefs
.native_vs_nixl
,
NativeVsNixlPolicy
::
PreferNixl
);
assert
!
(
prefs
.prefer_async_cuda
);
}
#[test]
fn
test_builder_pattern
()
{
let
prefs
=
TransferPreferences
::
new
()
.with_native_vs_nixl
(
NativeVsNixlPolicy
::
PreferNixl
)
.with_async_cuda
(
false
);
assert_eq!
(
prefs
.native_vs_nixl
,
NativeVsNixlPolicy
::
PreferNixl
);
assert
!
(
!
prefs
.prefer_async_cuda
);
}
}
lib/kvbm-physical/src/transfer/strategy.rs
0 → 100644
View file @
9ab148dc
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Transfer strategy selection based on source and destination storage locations.
use
dynamo_memory
::
StorageKind
;
use
crate
::{
layout
::
PhysicalLayout
,
transfer
::
TransferContext
};
use
super
::
TransferCapabilities
;
/// Transfer strategy to use for copying memory between locations.
///
/// The strategy is determined by the source and destination storage locations.
#[derive(Debug,
Clone,
Copy,
PartialEq,
Eq)]
pub
enum
TransferStrategy
{
/// CPU memcpy (for host-to-host transfers)
Memcpy
,
/// CUDA async host-to-device transfer
CudaAsyncH2D
,
/// CUDA async device-to-host transfer
CudaAsyncD2H
,
/// CUDA async device-to-device transfer
CudaAsyncD2D
,
/// NIXL read operation (pull from remote)
NixlRead
,
/// NIXL write operation (push to remote)
NixlWrite
,
/// NIXL write (flipped local and remote order)
/// This is needed for some NIXL backends.
/// For example, the POSIX backend requires that host memory
/// always be the "local" descriptor list, regardless of whether
/// it's a read or write.
#[expect(dead_code)]
NixlWriteFlipped
,
/// NIXL read (flipped local and remote order)
NixlReadFlipped
,
/// Invalid/unsupported transfer
#[expect(dead_code)]
Invalid
,
}
/// Plan for executing a transfer, either direct or via bounce buffer.
///
/// Some transfers require staging through host memory when direct paths
/// are not enabled via capabilities.
#[derive(Debug,
Clone,
PartialEq,
Eq)]
pub
enum
TransferPlan
{
/// Direct single-hop transfer using the specified strategy.
Direct
(
TransferStrategy
),
/// Two-hop transfer requiring a bounce buffer in host memory.
///
/// This is used when:
/// - Device → Remote (without GPU RDMA)
/// - Disk → Remote
/// - Device ↔ Disk (without GDS)
TwoHop
{
/// First hop strategy (src → bounce)
first
:
TransferStrategy
,
/// Bounce buffer location (always Pinned for best performance)
bounce_location
:
StorageKind
,
/// Second hop strategy (bounce → dst)
second
:
TransferStrategy
,
},
}
pub
(
crate
)
fn
select_strategy
(
src
:
&
PhysicalLayout
,
dst
:
&
PhysicalLayout
,
ctx
:
&
TransferContext
,
)
->
anyhow
::
Result
<
TransferPlan
>
{
let
is_src_local
=
src
.nixl_metadata
()
.agent_name
()
==
ctx
.nixl_agent
()
.name
();
let
is_dst_local
=
dst
.nixl_metadata
()
.agent_name
()
==
ctx
.nixl_agent
()
.name
();
if
!
is_src_local
&&
!
is_dst_local
{
return
Err
(
anyhow
::
anyhow!
(
"Both src and dst are remote - this is not supported."
));
}
if
is_src_local
&&
is_dst_local
{
return
Ok
(
select_direct_strategy
(
src
.location
(),
dst
.location
(),
false
,
ctx
.capabilities
(),
));
}
select_remote_strategy_v2
(
src
.location
(),
is_src_local
,
dst
.location
(),
is_dst_local
,
ctx
.capabilities
(),
)
}
/// Select the appropriate transfer plan based on source and destination locations.
///
/// # Arguments
/// * `src` - Source storage location (always local)
/// * `dst` - Destination storage location (can be local or remote)
/// * `dst_is_remote` - Whether destination is on a remote node
/// * `capabilities` - Transfer capability flags
///
/// # Returns
/// A transfer plan (direct or two-hop)
///
/// # Conservative Default Policy
///
/// With default capabilities (all disabled):
/// - Device can only transfer to/from Host
/// - Disk can only transfer to/from Host
/// - Host can transfer to Device, Disk, or Remote
/// - Device ↔ Device is allowed (native CUDA)
///
/// Transfers that would violate this policy are staged through host:
/// - Device → Remote: Device → Host → Remote (2 hops)
/// - Disk → Remote: Disk → Host → Remote (2 hops)
/// - Device ↔ Disk: Device → Host → Disk (2 hops)
///
/// # Optional Direct Paths
///
/// - `allow_gds`: Enables Disk ↔ Device direct transfers
/// - `allow_gpu_rdma`: Enables Device → Remote direct transfers
fn
select_direct_strategy
(
src
:
StorageKind
,
dst
:
StorageKind
,
dst_is_remote
:
bool
,
capabilities
:
&
TransferCapabilities
,
)
->
TransferPlan
{
use
StorageKind
::
*
;
use
TransferStrategy
::
*
;
// Handle remote destination
if
dst_is_remote
{
return
select_remote_strategy
(
src
,
capabilities
);
}
// Local-to-local transfers
match
(
src
,
dst
)
{
// Host ↔ Host - direct memcpy
(
System
,
System
)
|
(
System
,
Pinned
)
|
(
Pinned
,
System
)
|
(
Pinned
,
Pinned
)
=>
{
TransferPlan
::
Direct
(
Memcpy
)
}
// Host → Device - direct CUDA
(
System
,
Device
(
_
))
=>
panic!
(
"System to Device transfers are not supported"
),
(
Pinned
,
Device
(
_
))
=>
TransferPlan
::
Direct
(
CudaAsyncH2D
),
// Device → Host - direct CUDA
(
Device
(
_
),
System
)
=>
panic!
(
"Device to System transfers are not supported"
),
(
Device
(
_
),
Pinned
)
=>
TransferPlan
::
Direct
(
CudaAsyncD2H
),
// Device ↔ Device - direct CUDA
(
Device
(
_
),
Device
(
_
))
=>
TransferPlan
::
Direct
(
CudaAsyncD2D
),
// Host ↔ Disk - direct NIXL
(
System
,
Disk
(
_
))
|
(
Pinned
,
Disk
(
_
))
=>
TransferPlan
::
Direct
(
NixlWrite
),
(
Disk
(
_
),
System
)
|
(
Disk
(
_
),
Pinned
)
=>
TransferPlan
::
Direct
(
NixlReadFlipped
),
// Disk ↔ Disk - NIXL doesn't seem to support direct transfers here.
// Leaving this as two-hop for now.
(
Disk
(
_
),
Disk
(
_
))
=>
TransferPlan
::
TwoHop
{
first
:
NixlReadFlipped
,
bounce_location
:
Pinned
,
second
:
NixlWrite
,
},
// Device ↔ Disk - check GDS capability
(
Device
(
_
),
Disk
(
_
))
=>
{
if
capabilities
.allows_device_disk_direct
()
{
// Direct GDS transfer
TransferPlan
::
Direct
(
NixlWrite
)
}
else
{
// Stage through host: Device → Pinned → Disk
TransferPlan
::
TwoHop
{
first
:
CudaAsyncD2H
,
bounce_location
:
Pinned
,
second
:
NixlWrite
,
}
}
}
(
Disk
(
_
),
Device
(
_
))
=>
{
if
capabilities
.allows_device_disk_direct
()
{
// Direct GDS transfer
TransferPlan
::
Direct
(
NixlRead
)
}
else
{
// Stage through host: Disk → Pinned → Device
TransferPlan
::
TwoHop
{
first
:
NixlReadFlipped
,
bounce_location
:
Pinned
,
second
:
CudaAsyncH2D
,
}
}
}
}
}
/// Select transfer strategy for remote destination.
fn
select_remote_strategy
(
src
:
StorageKind
,
capabilities
:
&
TransferCapabilities
)
->
TransferPlan
{
use
StorageKind
::
*
;
use
TransferStrategy
::
*
;
match
src
{
// Host → Remote - direct NIXL
System
|
Pinned
=>
TransferPlan
::
Direct
(
NixlWrite
),
// Device → Remote - check GPU RDMA capability
Device
(
_
)
=>
{
if
capabilities
.allows_device_remote_direct
()
{
// Direct GPU RDMA transfer
TransferPlan
::
Direct
(
NixlWrite
)
}
else
{
// Stage through host: Device → Pinned → Remote
TransferPlan
::
TwoHop
{
first
:
CudaAsyncD2H
,
bounce_location
:
Pinned
,
second
:
NixlWrite
,
}
}
}
// Disk → Remote - always stage through host
Disk
(
_
)
=>
TransferPlan
::
TwoHop
{
first
:
NixlWrite
,
bounce_location
:
Pinned
,
second
:
NixlWrite
,
},
}
}
fn
select_remote_strategy_v2
(
src
:
StorageKind
,
is_src_local
:
bool
,
dst
:
StorageKind
,
is_dst_local
:
bool
,
capabilities
:
&
TransferCapabilities
,
)
->
anyhow
::
Result
<
TransferPlan
>
{
// We only support System, Pinned and Device for remote transfers.
// Later we might support staged/bounce buffer transfers.
if
matches!
(
src
,
StorageKind
::
Disk
(
_
))
||
matches!
(
dst
,
StorageKind
::
Disk
(
_
))
{
return
Err
(
anyhow
::
anyhow!
(
"Neither local nor remote disk transfers are supported over NIXL at this time."
));
}
if
!
capabilities
.allow_gpu_rdma
&&
(
matches!
(
src
,
StorageKind
::
Device
(
_
))
||
matches!
(
dst
,
StorageKind
::
Device
(
_
)))
{
return
Err
(
anyhow
::
anyhow!
(
"GPU RDMA is disabled - this transfer requires GPU RDMA."
));
}
if
is_src_local
&&
!
is_dst_local
{
return
Ok
(
TransferPlan
::
Direct
(
TransferStrategy
::
NixlWrite
));
}
if
is_dst_local
&&
!
is_src_local
{
return
Ok
(
TransferPlan
::
Direct
(
TransferStrategy
::
NixlReadFlipped
));
}
unreachable!
(
"Both src and dst are remote - this is not supported."
);
}
#[cfg(all(test,
feature
=
"testing-kvbm"
))]
mod
tests
{
use
super
::
*
;
fn
default_caps
()
->
TransferCapabilities
{
TransferCapabilities
::
default
()
}
#[test]
fn
test_host_to_host_transfers
()
{
let
caps
=
default_caps
();
assert_eq!
(
select_direct_strategy
(
StorageKind
::
System
,
StorageKind
::
System
,
false
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
Memcpy
)
);
assert_eq!
(
select_direct_strategy
(
StorageKind
::
System
,
StorageKind
::
Pinned
,
false
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
Memcpy
)
);
assert_eq!
(
select_direct_strategy
(
StorageKind
::
Pinned
,
StorageKind
::
System
,
false
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
Memcpy
)
);
assert_eq!
(
select_direct_strategy
(
StorageKind
::
Pinned
,
StorageKind
::
Pinned
,
false
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
Memcpy
)
);
}
#[test]
fn
test_host_to_device_transfers
()
{
let
caps
=
default_caps
();
// // System (unpinned) to device should be blocking
// assert_eq!(
// select_direct_strategy(StorageKind::System, StorageKind::Device(0), false, &caps),
// TransferPlan::Direct(TransferStrategy::CudaBlockingH2D)
// );
// Pinned to device should be async
assert_eq!
(
select_direct_strategy
(
StorageKind
::
Pinned
,
StorageKind
::
Device
(
0
),
false
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
CudaAsyncH2D
)
);
}
#[test]
fn
test_device_to_host_transfers
()
{
let
caps
=
default_caps
();
// // Device to system should be blocking
// assert_eq!(
// select_direct_strategy(StorageKind::Device(0), StorageKind::System, false, &caps),
// TransferPlan::Direct(TransferStrategy::CudaBlockingD2H)
// );
// Device to pinned should be async
assert_eq!
(
select_direct_strategy
(
StorageKind
::
Device
(
0
),
StorageKind
::
Pinned
,
false
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
CudaAsyncD2H
)
);
}
#[test]
fn
test_device_to_device_transfers
()
{
let
caps
=
default_caps
();
assert_eq!
(
select_direct_strategy
(
StorageKind
::
Device
(
0
),
StorageKind
::
Device
(
1
),
false
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
CudaAsyncD2D
)
);
assert_eq!
(
select_direct_strategy
(
StorageKind
::
Device
(
3
),
StorageKind
::
Device
(
3
),
false
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
CudaAsyncD2D
)
);
}
#[test]
fn
test_disk_to_host_transfers
()
{
let
caps
=
default_caps
();
// Disk to host - direct NIXL
assert_eq!
(
select_direct_strategy
(
StorageKind
::
Disk
(
42
),
StorageKind
::
System
,
false
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
NixlReadFlipped
)
);
assert_eq!
(
select_direct_strategy
(
StorageKind
::
Disk
(
42
),
StorageKind
::
Pinned
,
false
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
NixlReadFlipped
)
);
}
#[test]
fn
test_host_to_disk_transfers
()
{
let
caps
=
default_caps
();
// Host to disk - direct NIXL
assert_eq!
(
select_direct_strategy
(
StorageKind
::
System
,
StorageKind
::
Disk
(
42
),
false
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
NixlWrite
)
);
assert_eq!
(
select_direct_strategy
(
StorageKind
::
Pinned
,
StorageKind
::
Disk
(
42
),
false
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
NixlWrite
)
);
}
#[test]
fn
test_device_to_disk_without_gds
()
{
let
caps
=
default_caps
();
// GDS disabled
// Device → Disk should use bounce buffer
let
plan
=
select_direct_strategy
(
StorageKind
::
Device
(
0
),
StorageKind
::
Disk
(
42
),
false
,
&
caps
);
match
plan
{
TransferPlan
::
TwoHop
{
first
,
bounce_location
,
second
,
}
=>
{
assert_eq!
(
first
,
TransferStrategy
::
CudaAsyncD2H
);
assert_eq!
(
bounce_location
,
StorageKind
::
Pinned
);
assert_eq!
(
second
,
TransferStrategy
::
NixlWrite
);
}
_
=>
panic!
(
"Expected TwoHop plan"
),
}
}
#[test]
fn
test_disk_to_device_without_gds
()
{
let
caps
=
default_caps
();
// GDS disabled
// Disk → Device should use bounce buffer
let
plan
=
select_direct_strategy
(
StorageKind
::
Disk
(
42
),
StorageKind
::
Device
(
0
),
false
,
&
caps
);
match
plan
{
TransferPlan
::
TwoHop
{
first
,
bounce_location
,
second
,
}
=>
{
assert_eq!
(
first
,
TransferStrategy
::
NixlReadFlipped
);
assert_eq!
(
bounce_location
,
StorageKind
::
Pinned
);
assert_eq!
(
second
,
TransferStrategy
::
CudaAsyncH2D
);
}
_
=>
panic!
(
"Expected TwoHop plan"
),
}
}
#[test]
fn
test_device_to_disk_with_gds
()
{
let
caps
=
TransferCapabilities
::
default
()
.with_gds
(
true
);
// Device → Disk should be direct with GDS
assert_eq!
(
select_direct_strategy
(
StorageKind
::
Device
(
0
),
StorageKind
::
Disk
(
42
),
false
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
NixlWrite
)
);
}
#[test]
fn
test_disk_to_device_with_gds
()
{
let
caps
=
TransferCapabilities
::
default
()
.with_gds
(
true
);
// Disk → Device should be direct with GDS
assert_eq!
(
select_direct_strategy
(
StorageKind
::
Disk
(
42
),
StorageKind
::
Device
(
0
),
false
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
NixlRead
)
);
}
#[test]
fn
test_host_to_remote
()
{
let
caps
=
default_caps
();
// Host → Remote - always direct
assert_eq!
(
select_direct_strategy
(
StorageKind
::
System
,
StorageKind
::
System
,
true
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
NixlWrite
)
);
assert_eq!
(
select_direct_strategy
(
StorageKind
::
Pinned
,
StorageKind
::
Pinned
,
true
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
NixlWrite
)
);
}
#[test]
fn
test_device_to_remote_without_rdma
()
{
let
caps
=
default_caps
();
// GPU RDMA disabled
// Device → Remote should use bounce buffer
let
plan
=
select_direct_strategy
(
StorageKind
::
Device
(
0
),
StorageKind
::
System
,
true
,
&
caps
);
match
plan
{
TransferPlan
::
TwoHop
{
first
,
bounce_location
,
second
,
}
=>
{
assert_eq!
(
first
,
TransferStrategy
::
CudaAsyncD2H
);
assert_eq!
(
bounce_location
,
StorageKind
::
Pinned
);
assert_eq!
(
second
,
TransferStrategy
::
NixlWrite
);
}
_
=>
panic!
(
"Expected TwoHop plan"
),
}
}
#[test]
fn
test_device_to_remote_with_rdma
()
{
let
caps
=
TransferCapabilities
::
default
()
.with_gpu_rdma
(
true
);
// Device → Remote should be direct with GPU RDMA
assert_eq!
(
select_direct_strategy
(
StorageKind
::
Device
(
0
),
StorageKind
::
Device
(
0
),
true
,
&
caps
),
TransferPlan
::
Direct
(
TransferStrategy
::
NixlWrite
)
);
}
#[test]
fn
test_disk_to_remote
()
{
let
caps
=
default_caps
();
// Disk → Remote always uses bounce buffer
let
plan
=
select_direct_strategy
(
StorageKind
::
Disk
(
42
),
StorageKind
::
System
,
true
,
&
caps
);
match
plan
{
TransferPlan
::
TwoHop
{
first
,
bounce_location
,
second
,
}
=>
{
assert_eq!
(
first
,
TransferStrategy
::
NixlWrite
);
assert_eq!
(
bounce_location
,
StorageKind
::
Pinned
);
assert_eq!
(
second
,
TransferStrategy
::
NixlWrite
);
}
_
=>
panic!
(
"Expected TwoHop plan"
),
}
}
}
lib/kvbm-physical/src/transfer/testing.rs
0 → 100644
View file @
9ab148dc
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Round-trip testing infrastructure for transfer verification.
//!
//! This module provides utilities for testing data integrity across transfers
//! by comparing checksums after round-trip operations:
//! 1. Source blocks (host) → Intermediate (device/disk/remote)
//! 2. Intermediate → Destination blocks (host, different IDs)
//! 3. Verify checksums match between source and destination
use
super
::
context
::
TransferContext
;
use
super
::{
BlockChecksum
,
FillPattern
,
PhysicalLayout
,
StorageKind
,
compute_block_checksums
,
fill_blocks
,
transfer_blocks
,
};
use
anyhow
::{
Result
,
anyhow
};
use
std
::
collections
::
HashMap
;
/// Result of a round-trip test.
#[derive(Debug)]
pub
struct
RoundTripTestResult
{
/// Source block checksums (keyed by source block ID)
pub
source_checksums
:
HashMap
<
usize
,
BlockChecksum
>
,
/// Destination block checksums (keyed by destination block ID)
pub
dest_checksums
:
HashMap
<
usize
,
BlockChecksum
>
,
/// Block ID mapping used (src_id, dst_id)
pub
block_mapping
:
Vec
<
(
usize
,
usize
)
>
,
/// Whether all checksums matched
pub
success
:
bool
,
/// Mismatched blocks (if any)
pub
mismatches
:
Vec
<
(
usize
,
usize
)
>
,
// (src_id, dst_id) pairs that didn't match
}
impl
RoundTripTestResult
{
/// Check if the round-trip test passed.
pub
fn
is_success
(
&
self
)
->
bool
{
self
.success
}
/// Get the number of blocks tested.
pub
fn
num_blocks
(
&
self
)
->
usize
{
self
.block_mapping
.len
()
}
/// Get a detailed report of the test results.
pub
fn
report
(
&
self
)
->
String
{
if
self
.success
{
format!
(
"Round-trip test PASSED: {}/{} blocks verified successfully"
,
self
.num_blocks
(),
self
.num_blocks
()
)
}
else
{
format!
(
"Round-trip test FAILED: {}/{} blocks mismatched
\n
Mismatches: {:?}"
,
self
.mismatches
.len
(),
self
.num_blocks
(),
self
.mismatches
)
}
}
}
/// Builder for round-trip tests.
///
/// This allows configuring a test that transfers data from source blocks
/// to intermediate storage and back to different destination blocks,
/// verifying data integrity via checksums.
pub
struct
RoundTripTest
{
/// Source physical layout (must be local)
source
:
PhysicalLayout
,
/// Intermediate physical layout (can be remote/device/disk)
intermediate
:
PhysicalLayout
,
/// Destination physical layout (must be local)
destination
:
PhysicalLayout
,
/// Block mapping: (src_id, intermediate_id, dst_id)
block_mapping
:
Vec
<
(
usize
,
usize
,
usize
)
>
,
/// Fill pattern for source blocks
fill_pattern
:
FillPattern
,
}
impl
RoundTripTest
{
/// Create a new round-trip test.
///
/// # Arguments
/// * `source` - Source physical layout (must be local)
/// * `intermediate` - Intermediate physical layout
/// * `destination` - Destination physical layout (must be local)
pub
fn
new
(
source
:
PhysicalLayout
,
intermediate
:
PhysicalLayout
,
destination
:
PhysicalLayout
,
)
->
Result
<
Self
>
{
if
source
.is_remote
()
{
return
Err
(
anyhow!
(
"Source layout must be local"
));
}
if
destination
.is_remote
()
{
return
Err
(
anyhow!
(
"Destination layout must be local"
));
}
Ok
(
Self
{
source
,
intermediate
,
destination
,
block_mapping
:
Vec
::
new
(),
fill_pattern
:
FillPattern
::
Sequential
,
})
}
/// Set the fill pattern for source blocks.
pub
fn
with_fill_pattern
(
mut
self
,
pattern
:
FillPattern
)
->
Self
{
self
.fill_pattern
=
pattern
;
self
}
/// Add a block mapping for the round-trip test.
///
/// # Arguments
/// * `src_id` - Source block ID
/// * `intermediate_id` - Intermediate block ID
/// * `dst_id` - Destination block ID
pub
fn
add_block_mapping
(
mut
self
,
src_id
:
usize
,
intermediate_id
:
usize
,
dst_id
:
usize
,
)
->
Self
{
self
.block_mapping
.push
((
src_id
,
intermediate_id
,
dst_id
));
self
}
/// Add multiple block mappings at once.
///
/// This is a convenience method for adding several mappings.
pub
fn
with_block_mappings
(
mut
self
,
mappings
:
&
[(
usize
,
usize
,
usize
)])
->
Self
{
self
.block_mapping
.extend_from_slice
(
mappings
);
self
}
/// Run the round-trip test.
///
/// # Workflow
/// 1. Fill source blocks with the specified pattern
/// 2. Compute source checksums
/// 3. Transfer source → intermediate
/// 4. Transfer intermediate → destination
/// 5. Compute destination checksums
/// 6. Compare checksums
///
/// # Arguments
/// * `ctx` - Transfer context with CUDA stream and NIXL agent
pub
async
fn
run
(
self
,
ctx
:
&
TransferContext
)
->
Result
<
RoundTripTestResult
>
{
if
self
.block_mapping
.is_empty
()
{
return
Err
(
anyhow!
(
"No block mappings specified"
));
}
// Step 1: Fill source blocks
let
src_ids
:
Vec
<
usize
>
=
self
.block_mapping
.iter
()
.map
(|(
src
,
_
,
_
)|
*
src
)
.collect
();
fill_blocks
(
&
self
.source
,
&
src_ids
,
self
.fill_pattern
)
?
;
// Step 2: Compute source checksums
let
source_checksums
=
compute_block_checksums
(
&
self
.source
,
&
src_ids
)
?
;
// Step 3: Transfer source → intermediate
let
src_ids_intermediate
:
Vec
<
usize
>
=
self
.block_mapping
.iter
()
.map
(|(
src
,
_
,
_
)|
*
src
)
.collect
();
let
inter_ids_from_src
:
Vec
<
usize
>
=
self
.block_mapping
.iter
()
.map
(|(
_
,
inter
,
_
)|
*
inter
)
.collect
();
let
notification
=
transfer_blocks
(
&
self
.source
,
&
self
.intermediate
,
&
src_ids_intermediate
,
&
inter_ids_from_src
,
ctx
,
)
?
;
notification
.await
?
;
// Step 4: Transfer intermediate → destination
let
inter_ids_to_dst
:
Vec
<
usize
>
=
self
.block_mapping
.iter
()
.map
(|(
_
,
inter
,
_
)|
*
inter
)
.collect
();
let
dst_ids_from_inter
:
Vec
<
usize
>
=
self
.block_mapping
.iter
()
.map
(|(
_
,
_
,
dst
)|
*
dst
)
.collect
();
let
notification
=
transfer_blocks
(
&
self
.intermediate
,
&
self
.destination
,
&
inter_ids_to_dst
,
&
dst_ids_from_inter
,
ctx
,
)
?
;
notification
.await
?
;
// Step 5: Compute destination checksums
let
dst_ids
:
Vec
<
usize
>
=
self
.block_mapping
.iter
()
.map
(|(
_
,
_
,
dst
)|
*
dst
)
.collect
();
let
dest_checksums
=
compute_block_checksums
(
&
self
.destination
,
&
dst_ids
)
?
;
// Step 6: Compare checksums
let
mut
mismatches
=
Vec
::
new
();
for
(
src_id
,
_
,
dst_id
)
in
&
self
.block_mapping
{
let
src_checksum
=
&
source_checksums
[
src_id
];
let
dst_checksum
=
&
dest_checksums
[
dst_id
];
if
src_checksum
!=
dst_checksum
{
mismatches
.push
((
*
src_id
,
*
dst_id
));
}
}
let
success
=
mismatches
.is_empty
();
let
block_mapping
:
Vec
<
(
usize
,
usize
)
>
=
self
.block_mapping
.iter
()
.map
(|(
src
,
_
,
dst
)|
(
*
src
,
*
dst
))
.collect
();
Ok
(
RoundTripTestResult
{
source_checksums
,
dest_checksums
,
block_mapping
,
success
,
mismatches
,
})
}
}
#[cfg(all(test,
feature
=
"testing-kvbm"
))]
mod
tests
{
use
super
::
*
;
use
crate
::
v2
::
layout
::{
FullyContiguousLayout
,
Layout
,
LayoutConfig
,
MemoryRegion
,
OwnedMemoryRegion
,
};
use
std
::
sync
::
Arc
;
// Helper to create a minimal transfer context for testing
// In real tests with CUDA/NIXL, this would be properly constructed
fn
create_test_context
()
->
TransferContext
{
// For now, we'll skip these tests if CUDA is not available
// In the future, we can mock TransferContext or use conditional compilation
todo!
(
"Create test context - requires CUDA/NIXL setup"
)
}
#[tokio::test]
#[ignore
=
"Requires CUDA/NIXL setup"
]
async
fn
test_round_trip_host_to_host
()
{
// Create three layouts: source, intermediate, destination
let
(
src_layout
,
_
src_mem
)
=
create_test_layout
(
4
);
let
(
inter_layout
,
_
inter_mem
)
=
create_test_layout
(
4
);
let
(
dst_layout
,
_
dst_mem
)
=
create_test_layout
(
4
);
let
source
=
PhysicalLayout
::
new_local
(
src_layout
,
StorageKind
::
System
);
let
intermediate
=
PhysicalLayout
::
new_local
(
inter_layout
,
StorageKind
::
Pinned
);
let
destination
=
PhysicalLayout
::
new_local
(
dst_layout
,
StorageKind
::
System
);
// Build round-trip test with different block IDs
// Source: blocks [0, 1, 2, 3]
// Intermediate: blocks [0, 1, 2, 3]
// Destination: blocks [0, 1, 2, 3] (different memory than source)
let
test
=
RoundTripTest
::
new
(
source
,
intermediate
,
destination
)
.unwrap
()
.with_fill_pattern
(
FillPattern
::
Sequential
)
.add_block_mapping
(
0
,
0
,
0
)
.add_block_mapping
(
1
,
1
,
1
)
.add_block_mapping
(
2
,
2
,
2
)
.add_block_mapping
(
3
,
3
,
3
);
// Create a transfer context (requires actual CUDA/NIXL setup)
let
ctx
=
create_test_context
();
// Run the test
let
result
=
test
.run
(
&
ctx
)
.await
.unwrap
();
assert
!
(
result
.is_success
(),
"{}"
,
result
.report
());
assert_eq!
(
result
.num_blocks
(),
4
);
}
#[tokio::test]
#[ignore
=
"Requires CUDA/NIXL setup"
]
async
fn
test_round_trip_different_block_ids
()
{
// Create layouts with enough blocks
let
(
src_layout
,
_
src_mem
)
=
create_test_layout
(
8
);
let
(
inter_layout
,
_
inter_mem
)
=
create_test_layout
(
8
);
let
(
dst_layout
,
_
dst_mem
)
=
create_test_layout
(
8
);
let
source
=
PhysicalLayout
::
new_local
(
src_layout
,
StorageKind
::
System
);
let
intermediate
=
PhysicalLayout
::
new_local
(
inter_layout
,
StorageKind
::
Pinned
);
let
destination
=
PhysicalLayout
::
new_local
(
dst_layout
,
StorageKind
::
System
);
// Test with non-overlapping block IDs
// Source: blocks [0, 1, 2, 3]
// Intermediate: blocks [2, 3, 4, 5]
// Destination: blocks [4, 5, 6, 7]
let
test
=
RoundTripTest
::
new
(
source
,
intermediate
,
destination
)
.unwrap
()
.with_fill_pattern
(
FillPattern
::
BlockBased
)
.with_block_mappings
(
&
[(
0
,
2
,
4
),
(
1
,
3
,
5
),
(
2
,
4
,
6
),
(
3
,
5
,
7
)]);
let
ctx
=
create_test_context
();
let
result
=
test
.run
(
&
ctx
)
.await
.unwrap
();
assert
!
(
result
.is_success
(),
"{}"
,
result
.report
());
assert_eq!
(
result
.num_blocks
(),
4
);
}
#[test]
fn
test_round_trip_builder
()
{
let
(
src_layout
,
_
)
=
create_test_layout
(
4
);
let
(
inter_layout
,
_
)
=
create_test_layout
(
4
);
let
(
dst_layout
,
_
)
=
create_test_layout
(
4
);
let
source
=
PhysicalLayout
::
new_local
(
src_layout
,
StorageKind
::
System
);
let
intermediate
=
PhysicalLayout
::
new_local
(
inter_layout
,
StorageKind
::
Pinned
);
let
destination
=
PhysicalLayout
::
new_local
(
dst_layout
,
StorageKind
::
System
);
let
test
=
RoundTripTest
::
new
(
source
,
intermediate
,
destination
)
.unwrap
()
.with_fill_pattern
(
FillPattern
::
Constant
(
42
))
.add_block_mapping
(
0
,
0
,
1
)
.add_block_mapping
(
1
,
1
,
2
);
assert_eq!
(
test
.block_mapping
.len
(),
2
);
}
#[test]
fn
test_round_trip_requires_local_source
()
{
let
(
src_layout
,
_
)
=
create_test_layout
(
1
);
let
(
inter_layout
,
_
)
=
create_test_layout
(
1
);
let
(
dst_layout
,
_
)
=
create_test_layout
(
1
);
let
source
=
PhysicalLayout
::
new_remote
(
src_layout
,
StorageKind
::
System
,
"remote"
.to_string
());
let
intermediate
=
PhysicalLayout
::
new_local
(
inter_layout
,
StorageKind
::
Pinned
);
let
destination
=
PhysicalLayout
::
new_local
(
dst_layout
,
StorageKind
::
System
);
let
result
=
RoundTripTest
::
new
(
source
,
intermediate
,
destination
);
assert
!
(
result
.is_err
());
}
#[test]
fn
test_round_trip_requires_local_destination
()
{
let
(
src_layout
,
_
)
=
create_test_layout
(
1
);
let
(
inter_layout
,
_
)
=
create_test_layout
(
1
);
let
(
dst_layout
,
_
)
=
create_test_layout
(
1
);
let
source
=
PhysicalLayout
::
new_local
(
src_layout
,
StorageKind
::
System
);
let
intermediate
=
PhysicalLayout
::
new_local
(
inter_layout
,
StorageKind
::
Pinned
);
let
destination
=
PhysicalLayout
::
new_remote
(
dst_layout
,
StorageKind
::
System
,
"remote"
.to_string
());
let
result
=
RoundTripTest
::
new
(
source
,
intermediate
,
destination
);
assert
!
(
result
.is_err
());
}
}
lib/kvbm-physical/src/transfer/tests/local_transfers.rs
0 → 100644
View file @
9ab148dc
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Local transfer tests where source and destination use the same NIXL agent.
//!
//! These tests verify data integrity across:
//! - Different storage types (System, Pinned, Device)
//! - Different layout types (Fully Contiguous, Layer-wise)
//! - Different transfer strategies (Memcpy, CUDA H2D/D2H)
use
super
::
*
;
use
crate
::
transfer
::
TransferCapabilities
;
use
crate
::
transfer
::
executor
::
TransferOptionsInternal
;
use
crate
::
transfer
::
executor
::
execute_transfer
;
use
crate
::
transfer
::{
can_use_whole_block_transfer
,
validate_layout_compatibility
};
use
anyhow
::
Result
;
use
rstest
::
rstest
;
// ============================================================================
// System <=> System Tests (Memcpy)
// ============================================================================
#[derive(Clone)]
enum
LayoutType
{
FC
,
LW
,
}
fn
build_layout
(
agent
:
NixlAgent
,
layout_type
:
LayoutType
,
storage_kind
:
StorageKind
,
num_blocks
:
usize
,
)
->
PhysicalLayout
{
match
layout_type
{
LayoutType
::
FC
=>
create_fc_layout
(
agent
,
storage_kind
,
num_blocks
),
LayoutType
::
LW
=>
create_lw_layout
(
agent
,
storage_kind
,
num_blocks
),
}
}
/// Check if a transfer between two storage kinds requires GDS_MT (Device ↔ Disk direct).
fn
requires_gds
(
src_kind
:
StorageKind
,
dst_kind
:
StorageKind
)
->
bool
{
matches!
(
(
src_kind
,
dst_kind
),
(
StorageKind
::
Device
(
_
),
StorageKind
::
Disk
(
_
))
|
(
StorageKind
::
Disk
(
_
),
StorageKind
::
Device
(
_
))
)
}
/// Check if a transfer between two storage kinds is unsupported.
///
/// Device ↔ System transfers are not supported - must use Pinned memory for CUDA transfers.
fn
is_unsupported_transfer
(
src_kind
:
StorageKind
,
dst_kind
:
StorageKind
)
->
bool
{
matches!
(
(
src_kind
,
dst_kind
),
(
StorageKind
::
Device
(
_
),
StorageKind
::
System
)
|
(
StorageKind
::
System
,
StorageKind
::
Device
(
_
))
)
}
/// Probe whether a NIXL backend is available by attempting to add it to a temporary agent.
fn
is_nixl_backend_available
(
backend
:
&
str
)
->
bool
{
let
mut
agent
=
match
NixlAgent
::
new
(
"__backend_probe__"
)
{
Ok
(
a
)
=>
a
,
Err
(
_
)
=>
return
false
,
};
agent
.add_backend
(
backend
)
.is_ok
()
}
fn
build_agent_for_kinds
(
kinds
:
&
[
StorageKind
])
->
Result
<
NixlAgent
>
{
let
mut
agent
=
NixlAgent
::
new
(
"agent"
)
?
;
let
mut
added_backends
=
Vec
::
new
();
// Determine required backends for all storage kinds
for
&
kind
in
kinds
{
match
kind
{
StorageKind
::
System
|
StorageKind
::
Pinned
=>
{
if
!
added_backends
.contains
(
&
"POSIX"
)
{
let
_
=
agent
.add_backend
(
"POSIX"
);
// Optional for DRAM
added_backends
.push
(
"POSIX"
);
}
}
StorageKind
::
Device
(
_
)
=>
{
if
!
added_backends
.contains
(
&
"UCX"
)
{
agent
.add_backend
(
"UCX"
)
?
;
// Required for VRAM
added_backends
.push
(
"UCX"
);
}
}
StorageKind
::
Disk
(
_
)
=>
{
if
!
added_backends
.contains
(
&
"POSIX"
)
{
let
_
=
agent
.add_backend
(
"POSIX"
);
// Optional for disk I/O
added_backends
.push
(
"POSIX"
);
}
}
}
}
// GDS_MT is optional for Device <-> Disk (will be checked separately)
for
window
in
kinds
.windows
(
2
)
{
if
requires_gds
(
window
[
0
],
window
[
1
])
&&
!
added_backends
.contains
(
&
"GDS_MT"
)
{
let
_
=
agent
.add_backend
(
"GDS_MT"
);
break
;
}
}
Ok
(
agent
)
}
#[rstest]
#[tokio::test]
async
fn
test_p2p
(
#[values(LayoutType::FC,
LayoutType::LW)]
src_layout
:
LayoutType
,
#[values(
StorageKind::System,
StorageKind::Pinned,
StorageKind::Device(
0
),
StorageKind::Disk(
0
)
)]
src_kind
:
StorageKind
,
#[values(LayoutType::FC,
LayoutType::LW)]
dst_layout
:
LayoutType
,
#[values(
StorageKind::System,
StorageKind::Pinned,
StorageKind::Device(
0
),
StorageKind::Disk(
0
)
)]
dst_kind
:
StorageKind
,
)
->
Result
<
()
>
{
// Skip unsupported Device ↔ System transfers (must use Pinned for CUDA)
if
is_unsupported_transfer
(
src_kind
,
dst_kind
)
{
eprintln!
(
"Skipping unsupported Device ↔ System transfer: src={:?}, dst={:?}"
,
src_kind
,
dst_kind
);
return
Ok
(());
}
// Device ↔ Disk direct transfers require GDS_MT
if
requires_gds
(
src_kind
,
dst_kind
)
&&
!
is_nixl_backend_available
(
"GDS_MT"
)
{
eprintln!
(
"Skipping Device ↔ Disk test - GDS_MT backend unavailable"
);
return
Ok
(());
}
use
crate
::
transfer
::{
BounceBufferInternal
,
executor
::
TransferOptionsInternal
};
let
agent
=
build_agent_for_kinds
(
&
[
src_kind
,
dst_kind
])
?
;
let
src
=
build_layout
(
agent
.clone
(),
src_layout
,
src_kind
,
4
);
let
dst
=
build_layout
(
agent
.clone
(),
dst_layout
,
dst_kind
,
4
);
let
bounce_layout
=
build_layout
(
agent
.clone
(),
LayoutType
::
FC
,
StorageKind
::
Pinned
,
4
);
let
bounce_buffer_spec
=
BounceBufferInternal
::
from_layout
(
bounce_layout
,
vec!
[
0
,
1
]);
let
src_blocks
=
vec!
[
0
,
1
];
let
dst_blocks
=
vec!
[
2
,
3
];
let
checksums
=
fill_and_checksum
(
&
src
,
&
src_blocks
,
FillPattern
::
Sequential
)
?
;
let
ctx
=
create_transfer_context
(
agent
,
None
)
.unwrap
();
let
options
=
TransferOptionsInternal
::
builder
()
.bounce_buffer
(
bounce_buffer_spec
)
.build
()
?
;
let
notification
=
execute_transfer
(
&
src
,
&
dst
,
&
src_blocks
,
&
dst_blocks
,
options
,
ctx
.context
())
?
;
notification
.await
?
;
verify_checksums_by_position
(
&
checksums
,
&
src_blocks
,
&
dst
,
&
dst_blocks
)
?
;
Ok
(())
}
#[rstest]
#[tokio::test]
async
fn
test_roundtrip
(
#[values(LayoutType::FC,
LayoutType::LW)]
src_layout
:
LayoutType
,
#[values(StorageKind::System,
StorageKind::Pinned,
StorageKind::Device(
0
))]
src_kind
:
StorageKind
,
#[values(LayoutType::FC,
LayoutType::LW)]
inter_layout
:
LayoutType
,
#[values(StorageKind::System,
StorageKind::Pinned,
StorageKind::Device(
0
))]
inter_kind
:
StorageKind
,
#[values(LayoutType::FC,
LayoutType::LW)]
dst_layout
:
LayoutType
,
#[values(StorageKind::System,
StorageKind::Pinned,
StorageKind::Device(
0
))]
dst_kind
:
StorageKind
,
)
->
Result
<
()
>
{
// Skip unsupported Device ↔ System transfers (must use Pinned for CUDA)
if
is_unsupported_transfer
(
src_kind
,
inter_kind
)
||
is_unsupported_transfer
(
inter_kind
,
dst_kind
)
{
eprintln!
(
"Skipping unsupported Device ↔ System transfer: src={:?}, inter={:?}, dst={:?}"
,
src_kind
,
inter_kind
,
dst_kind
);
return
Ok
(());
}
use
crate
::
transfer
::
executor
::
TransferOptionsInternal
;
let
agent
=
build_agent_for_kinds
(
&
[
src_kind
,
inter_kind
,
dst_kind
])
?
;
// Create layouts: source pinned, device intermediate, destination pinned
let
src
=
build_layout
(
agent
.clone
(),
src_layout
,
src_kind
,
4
);
let
device
=
build_layout
(
agent
.clone
(),
inter_layout
,
inter_kind
,
4
);
let
dst
=
build_layout
(
agent
.clone
(),
dst_layout
,
dst_kind
,
4
);
let
src_blocks
=
vec!
[
0
,
1
];
let
device_blocks
=
vec!
[
0
,
1
];
let
dst_blocks
=
vec!
[
2
,
3
];
// Fill source and compute checksums
let
checksums
=
fill_and_checksum
(
&
src
,
&
src_blocks
,
FillPattern
::
Sequential
)
?
;
let
ctx
=
create_transfer_context
(
agent
,
None
)
.unwrap
();
// Transfer: Pinned[0,1] -> Device[0,1]
let
notification
=
execute_transfer
(
&
src
,
&
device
,
&
src_blocks
,
&
device_blocks
,
TransferOptionsInternal
::
default
(),
ctx
.context
(),
)
?
;
notification
.await
?
;
// Transfer: Device[0,1] -> Pinned[2,3]
let
notification
=
execute_transfer
(
&
device
,
&
dst
,
&
device_blocks
,
&
dst_blocks
,
TransferOptionsInternal
::
default
(),
ctx
.context
(),
)
?
;
notification
.await
?
;
// Verify checksums match
verify_checksums_by_position
(
&
checksums
,
&
src_blocks
,
&
dst
,
&
dst_blocks
)
?
;
Ok
(())
}
#[cfg(feature
=
"testing-nixl-gds"
)]
#[rstest]
#[case(StorageKind::Device(
0
),
StorageKind::Disk(
0
))]
#[case(StorageKind::Disk(
0
),
StorageKind::Device(
0
))]
#[tokio::test]
async
fn
test_gds
(
#[case]
src_kind
:
StorageKind
,
#[values(LayoutType::FC,
LayoutType::LW)]
src_layout
:
LayoutType
,
#[case]
dst_kind
:
StorageKind
,
#[values(LayoutType::FC,
LayoutType::LW)]
dst_layout
:
LayoutType
,
)
->
Result
<
()
>
{
let
capabilities
=
TransferCapabilities
::
default
()
.with_gds_if_supported
();
if
!
capabilities
.allow_gds
{
println!
(
"System does not support GDS. Skipping test."
);
return
Ok
(());
}
let
agent
=
build_agent_for_kinds
(
&
[
src_kind
,
dst_kind
])
?
;
let
src
=
build_layout
(
agent
.clone
(),
src_layout
,
src_kind
,
4
);
let
dst
=
build_layout
(
agent
.clone
(),
dst_layout
,
dst_kind
,
4
);
let
src_blocks
=
vec!
[
0
,
1
];
let
dst_blocks
=
vec!
[
2
,
3
];
let
checksums
=
fill_and_checksum
(
&
src
,
&
src_blocks
,
FillPattern
::
Sequential
)
?
;
let
ctx
=
create_transfer_context
(
agent
,
Some
(
capabilities
))
.unwrap
();
let
notification
=
execute_transfer
(
&
src
,
&
dst
,
&
src_blocks
,
&
dst_blocks
,
TransferOptionsInternal
::
default
(),
ctx
.context
(),
)
?
;
notification
.await
?
;
verify_checksums_by_position
(
&
checksums
,
&
src_blocks
,
&
dst
,
&
dst_blocks
)
?
;
Ok
(())
}
#[rstest]
#[case(
1024
)]
#[case(
2048
)]
#[case(
4096
)]
#[case(
8192
)]
#[case(
16384
)]
#[tokio::test]
async
fn
test_large_block_counts
(
#[case]
block_count
:
usize
)
{
let
agent
=
create_test_agent
(
&
format!
(
"test_large_block_counts_{}"
,
block_count
));
let
src
=
create_fc_layout
(
agent
.clone
(),
StorageKind
::
Pinned
,
block_count
);
let
device
=
create_fc_layout
(
agent
.clone
(),
StorageKind
::
Device
(
0
),
block_count
);
let
src_blocks
=
(
0
..
block_count
)
.collect
::
<
Vec
<
_
>>
();
let
device_blocks
=
(
0
..
block_count
)
.collect
::
<
Vec
<
_
>>
();
let
ctx
=
create_transfer_context
(
agent
,
None
)
.unwrap
();
let
notification
=
execute_transfer
(
&
src
,
&
device
,
&
src_blocks
,
&
device_blocks
,
TransferOptionsInternal
::
default
(),
ctx
.context
(),
)
.unwrap
();
notification
.await
.unwrap
();
}
// ============================================================================
// Parameterized Bounce Tests with Guard Block Validation
// ============================================================================
/// Test bounce transfers with guard block validation.
///
/// This test validates that:
/// 1. Data can be transferred: host[src_blocks] → bounce[src_blocks] → host[dst_blocks]
/// 2. Guard blocks adjacent to dst_blocks remain unchanged (no memory corruption)
/// 3. Works correctly with different storage types, layouts, and transfer modes
///
/// Test pattern (6 blocks total):
/// - Source blocks: [0, 1]
/// - Destination blocks: [3, 4]
/// - Guard blocks: [2, 5] (adjacent to destination, should remain unchanged)
#[rstest]
// Storage combinations (host, bounce)
#[case(StorageKind::System,
StorageKind::Pinned,
"sys_pin"
)]
#[case(StorageKind::Pinned,
StorageKind::System,
"pin_sys"
)]
#[case(StorageKind::Pinned,
StorageKind::Device(
0
),
"pin_dev"
)]
#[tokio::test]
async
fn
test_bounce_with_guards_fc_fc_full
(
#[case]
host_storage
:
StorageKind
,
#[case]
bounce_storage
:
StorageKind
,
#[case]
name_suffix
:
&
str
,
)
{
test_bounce_with_guards_impl
(
host_storage
,
bounce_storage
,
LayoutKind
::
FC
,
LayoutKind
::
FC
,
TransferMode
::
FullBlocks
,
name_suffix
,
)
.await
.unwrap
();
}
#[rstest]
#[case(StorageKind::System,
StorageKind::Pinned,
"sys_pin"
)]
#[case(StorageKind::Pinned,
StorageKind::System,
"pin_sys"
)]
#[case(StorageKind::Pinned,
StorageKind::Device(
0
),
"pin_dev"
)]
#[tokio::test]
async
fn
test_bounce_with_guards_fc_lw_full
(
#[case]
host_storage
:
StorageKind
,
#[case]
bounce_storage
:
StorageKind
,
#[case]
name_suffix
:
&
str
,
)
{
test_bounce_with_guards_impl
(
host_storage
,
bounce_storage
,
LayoutKind
::
FC
,
LayoutKind
::
LW
,
TransferMode
::
FullBlocks
,
name_suffix
,
)
.await
.unwrap
();
}
#[rstest]
#[case(StorageKind::System,
StorageKind::Pinned,
"sys_pin"
)]
#[case(StorageKind::Pinned,
StorageKind::System,
"pin_sys"
)]
#[case(StorageKind::Pinned,
StorageKind::Device(
0
),
"pin_dev"
)]
#[tokio::test]
async
fn
test_bounce_with_guards_lw_fc_full
(
#[case]
host_storage
:
StorageKind
,
#[case]
bounce_storage
:
StorageKind
,
#[case]
name_suffix
:
&
str
,
)
{
test_bounce_with_guards_impl
(
host_storage
,
bounce_storage
,
LayoutKind
::
LW
,
LayoutKind
::
FC
,
TransferMode
::
FullBlocks
,
name_suffix
,
)
.await
.unwrap
();
}
#[rstest]
#[case(StorageKind::System,
StorageKind::Pinned,
"sys_pin"
)]
#[case(StorageKind::Pinned,
StorageKind::System,
"pin_sys"
)]
#[case(StorageKind::Pinned,
StorageKind::Device(
0
),
"pin_dev"
)]
#[tokio::test]
async
fn
test_bounce_with_guards_lw_lw_full
(
#[case]
host_storage
:
StorageKind
,
#[case]
bounce_storage
:
StorageKind
,
#[case]
name_suffix
:
&
str
,
)
{
test_bounce_with_guards_impl
(
host_storage
,
bounce_storage
,
LayoutKind
::
LW
,
LayoutKind
::
LW
,
TransferMode
::
FullBlocks
,
name_suffix
,
)
.await
.unwrap
();
}
#[rstest]
#[case(StorageKind::Pinned,
StorageKind::Device(
0
),
"pin_dev"
)]
#[tokio::test]
async
fn
test_bounce_with_guards_fc_fc_layer0
(
#[case]
host_storage
:
StorageKind
,
#[case]
bounce_storage
:
StorageKind
,
#[case]
name_suffix
:
&
str
,
)
{
test_bounce_with_guards_impl
(
host_storage
,
bounce_storage
,
LayoutKind
::
FC
,
LayoutKind
::
FC
,
TransferMode
::
FirstLayerOnly
,
name_suffix
,
)
.await
.unwrap
();
}
#[rstest]
#[case(StorageKind::Pinned,
StorageKind::Device(
0
),
"pin_dev"
)]
#[tokio::test]
async
fn
test_bounce_with_guards_lw_lw_layer0
(
#[case]
host_storage
:
StorageKind
,
#[case]
bounce_storage
:
StorageKind
,
#[case]
name_suffix
:
&
str
,
)
{
test_bounce_with_guards_impl
(
host_storage
,
bounce_storage
,
LayoutKind
::
LW
,
LayoutKind
::
LW
,
TransferMode
::
FirstLayerOnly
,
name_suffix
,
)
.await
.unwrap
();
}
/// Implementation helper for bounce tests with guard blocks.
async
fn
test_bounce_with_guards_impl
(
host_storage
:
StorageKind
,
bounce_storage
:
StorageKind
,
host_layout
:
LayoutKind
,
bounce_layout
:
LayoutKind
,
mode
:
TransferMode
,
name_suffix
:
&
str
,
)
->
Result
<
()
>
{
let
num_blocks
=
6
;
let
test_name
=
format!
(
"bounce_{}_{:?}_{:?}_{}_{}"
,
name_suffix
,
host_layout
,
bounce_layout
,
mode
.suffix
(),
std
::
time
::
SystemTime
::
now
()
.duration_since
(
std
::
time
::
UNIX_EPOCH
)
.unwrap
()
.as_millis
()
);
let
agent
=
create_test_agent
(
&
test_name
);
// Create layouts
let
host
=
create_layout
(
agent
.clone
(),
LayoutSpec
::
new
(
host_layout
,
host_storage
),
num_blocks
,
);
let
bounce
=
create_layout
(
agent
.clone
(),
LayoutSpec
::
new
(
bounce_layout
,
bounce_storage
),
num_blocks
,
);
// Block assignments:
// - Transfer: host[0,1] → bounce[0,1] → host[3,4]
// - Guards: host[2,5] (should remain unchanged)
let
src_blocks
=
vec!
[
0
,
1
];
let
dst_blocks
=
vec!
[
3
,
4
];
let
guard_blocks
=
vec!
[
2
,
5
];
// Setup: Fill source blocks and guard blocks
let
src_checksums
=
fill_and_checksum_with_mode
(
&
host
,
&
src_blocks
,
FillPattern
::
Sequential
,
mode
)
?
;
let
guard_checksums
=
create_guard_blocks
(
&
host
,
&
guard_blocks
,
FillPattern
::
Constant
(
0xFF
))
?
;
let
ctx
=
create_transfer_context
(
agent
,
None
)
?
;
// Execute bounce: host[0,1] → bounce[0,1]
let
mut
options_builder
=
TransferOptionsInternal
::
builder
();
if
let
Some
(
range
)
=
mode
.layer_range
()
{
options_builder
=
options_builder
.layer_range
(
range
);
}
let
notification
=
execute_transfer
(
&
host
,
&
bounce
,
&
src_blocks
,
&
src_blocks
,
options_builder
.build
()
?
,
ctx
.context
(),
)
?
;
notification
.await
?
;
// Execute bounce: bounce[0,1] → host[3,4]
let
mut
options_builder
=
TransferOptionsInternal
::
builder
();
if
let
Some
(
range
)
=
mode
.layer_range
()
{
options_builder
=
options_builder
.layer_range
(
range
);
}
let
notification
=
execute_transfer
(
&
bounce
,
&
host
,
&
src_blocks
,
&
dst_blocks
,
options_builder
.build
()
?
,
ctx
.context
(),
)
?
;
notification
.await
?
;
// Verify: Data integrity + guards unchanged
verify_checksums_by_position_with_mode
(
&
src_checksums
,
&
src_blocks
,
&
host
,
&
dst_blocks
,
mode
)
?
;
verify_guard_blocks_unchanged
(
&
host
,
&
guard_blocks
,
&
guard_checksums
)
?
;
Ok
(())
}
// ============================================================================
// Parameterized Direct Transfer Tests
// ============================================================================
/// Test direct transfers with parameterization over storage, layout, and transfer mode.
///
/// This demonstrates the DRY parameterized approach that can replace the 18 individual
/// tests above (System<=>System, Pinned<=>Pinned, cross-type, etc).
///
/// Note: Only tests System<=>System, Pinned<=>Pinned, and System<=>Pinned since we can only
/// fill/checksum System and Pinned storage. For Device tests, use bounce tests instead.
#[rstest]
// Storage combinations (only fillable storage types)
#[case(StorageKind::System,
StorageKind::System,
"sys_sys"
)]
#[case(StorageKind::Pinned,
StorageKind::Pinned,
"pin_pin"
)]
#[case(StorageKind::System,
StorageKind::Pinned,
"sys_pin"
)]
#[case(StorageKind::Pinned,
StorageKind::System,
"pin_sys"
)]
#[tokio::test]
async
fn
test_direct_transfer_fc_fc_full
(
#[case]
src_storage
:
StorageKind
,
#[case]
dst_storage
:
StorageKind
,
#[case]
name_suffix
:
&
str
,
)
{
test_direct_transfer_impl
(
src_storage
,
dst_storage
,
LayoutKind
::
FC
,
LayoutKind
::
FC
,
TransferMode
::
FullBlocks
,
name_suffix
,
)
.await
.unwrap
();
}
#[rstest]
#[case(StorageKind::System,
StorageKind::Pinned,
"sys_pin"
)]
#[case(StorageKind::Pinned,
StorageKind::System,
"pin_sys"
)]
#[tokio::test]
async
fn
test_direct_transfer_fc_lw_layer0
(
#[case]
src_storage
:
StorageKind
,
#[case]
dst_storage
:
StorageKind
,
#[case]
name_suffix
:
&
str
,
)
{
test_direct_transfer_impl
(
src_storage
,
dst_storage
,
LayoutKind
::
FC
,
LayoutKind
::
LW
,
TransferMode
::
FirstLayerOnly
,
name_suffix
,
)
.await
.unwrap
();
}
#[rstest]
#[case(StorageKind::Pinned,
StorageKind::Pinned,
"pin_pin"
)]
#[tokio::test]
async
fn
test_direct_transfer_lw_lw_layer1
(
#[case]
src_storage
:
StorageKind
,
#[case]
dst_storage
:
StorageKind
,
#[case]
name_suffix
:
&
str
,
)
{
test_direct_transfer_impl
(
src_storage
,
dst_storage
,
LayoutKind
::
LW
,
LayoutKind
::
LW
,
TransferMode
::
SecondLayerOnly
,
name_suffix
,
)
.await
.unwrap
();
}
/// Implementation helper for direct transfer tests.
async
fn
test_direct_transfer_impl
(
src_storage
:
StorageKind
,
dst_storage
:
StorageKind
,
src_layout
:
LayoutKind
,
dst_layout
:
LayoutKind
,
mode
:
TransferMode
,
name_suffix
:
&
str
,
)
->
Result
<
()
>
{
let
num_blocks
=
4
;
let
test_name
=
format!
(
"direct_{}_{:?}_{:?}_{}_{}"
,
name_suffix
,
src_layout
,
dst_layout
,
mode
.suffix
(),
std
::
time
::
SystemTime
::
now
()
.duration_since
(
std
::
time
::
UNIX_EPOCH
)
.unwrap
()
.as_millis
()
);
let
agent
=
create_test_agent
(
&
test_name
);
// Create layouts
let
src
=
create_layout
(
agent
.clone
(),
LayoutSpec
::
new
(
src_layout
,
src_storage
),
num_blocks
,
);
let
dst
=
create_layout
(
agent
.clone
(),
LayoutSpec
::
new
(
dst_layout
,
dst_storage
),
num_blocks
,
);
// Transfer src[0,1] -> dst[2,3]
let
src_blocks
=
vec!
[
0
,
1
];
let
dst_blocks
=
vec!
[
2
,
3
];
// Fill source and compute checksums
let
src_checksums
=
fill_and_checksum_with_mode
(
&
src
,
&
src_blocks
,
FillPattern
::
Sequential
,
mode
)
?
;
let
ctx
=
create_transfer_context
(
agent
,
None
)
?
;
// Execute transfer
let
mut
options_builder
=
TransferOptionsInternal
::
builder
();
if
let
Some
(
range
)
=
mode
.layer_range
()
{
options_builder
=
options_builder
.layer_range
(
range
);
}
let
notification
=
execute_transfer
(
&
src
,
&
dst
,
&
src_blocks
,
&
dst_blocks
,
options_builder
.build
()
?
,
ctx
.context
(),
)
?
;
notification
.await
?
;
// Verify data integrity
verify_checksums_by_position_with_mode
(
&
src_checksums
,
&
src_blocks
,
&
dst
,
&
dst_blocks
,
mode
)
?
;
Ok
(())
}
// ============================================================================
// Layout Compatibility Helper Tests
// ============================================================================
#[test]
fn
test_validate_layout_compatibility_same_layout
()
{
let
agent
=
create_test_agent
(
"test_compat_same"
);
let
src
=
create_fc_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
let
dst
=
create_fc_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
// Both FC layouts with Unknown KvBlockLayout - should be compatible
assert
!
(
validate_layout_compatibility
(
&
src
,
&
dst
)
.is_ok
());
}
#[test]
fn
test_validate_layout_compatibility_fc_lw_same_block_layout
()
{
let
agent
=
create_test_agent
(
"test_compat_fc_lw"
);
let
src
=
create_fc_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
let
dst
=
create_lw_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
// Both have Unknown/Unknown-derived KvBlockLayout - should be compatible
// (Unknown→Unknown returns false for requires_transform)
assert
!
(
validate_layout_compatibility
(
&
src
,
&
dst
)
.is_ok
());
}
#[test]
fn
test_can_use_whole_block_fc_fc_full_block
()
{
let
agent
=
create_test_agent
(
"test_whole_block_fc_fc"
);
let
src
=
create_fc_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
let
dst
=
create_fc_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
// Both FC + full block transfer = should use whole-block
assert
!
(
can_use_whole_block_transfer
(
&
src
,
&
dst
,
None
));
}
#[test]
fn
test_can_use_whole_block_fc_fc_full_range
()
{
let
agent
=
create_test_agent
(
"test_whole_block_fc_fc_range"
);
let
src
=
create_fc_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
let
dst
=
create_fc_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
// Both FC + full range (0..num_layers) = should use whole-block
let
full_range
=
0
..
src
.layout
()
.num_layers
();
assert
!
(
can_use_whole_block_transfer
(
&
src
,
&
dst
,
Some
(
&
full_range
)));
}
#[test]
fn
test_can_use_whole_block_fc_fc_partial_layer
()
{
let
agent
=
create_test_agent
(
"test_whole_block_partial"
);
let
src
=
create_fc_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
let
dst
=
create_fc_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
// Partial layer transfer = should NOT use whole-block
let
partial_range
=
0
..
1
;
assert
!
(
!
can_use_whole_block_transfer
(
&
src
,
&
dst
,
Some
(
&
partial_range
)
));
}
#[test]
fn
test_can_use_whole_block_fc_lw
()
{
let
agent
=
create_test_agent
(
"test_whole_block_fc_lw"
);
let
src
=
create_fc_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
let
dst
=
create_lw_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
// FC + LW = should NOT use whole-block (dst is not fully contiguous)
assert
!
(
!
can_use_whole_block_transfer
(
&
src
,
&
dst
,
None
));
}
#[test]
fn
test_can_use_whole_block_lw_fc
()
{
let
agent
=
create_test_agent
(
"test_whole_block_lw_fc"
);
let
src
=
create_lw_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
let
dst
=
create_fc_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
// LW + FC = should NOT use whole-block (src is not fully contiguous)
assert
!
(
!
can_use_whole_block_transfer
(
&
src
,
&
dst
,
None
));
}
#[test]
fn
test_can_use_whole_block_lw_lw
()
{
let
agent
=
create_test_agent
(
"test_whole_block_lw_lw"
);
let
src
=
create_lw_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
let
dst
=
create_lw_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
// LW + LW = should NOT use whole-block (neither is fully contiguous)
assert
!
(
!
can_use_whole_block_transfer
(
&
src
,
&
dst
,
None
));
}
// ============================================================================
// Whole-Block Transfer Integration Tests
// ============================================================================
/// Test that FC→FC transfers with full blocks use the whole-block path.
///
/// This test verifies data integrity for FC→FC transfers that should use
/// the optimized whole-block memcpy path.
#[rstest]
#[case(StorageKind::System,
StorageKind::System)]
#[case(StorageKind::System,
StorageKind::Pinned)]
#[case(StorageKind::Pinned,
StorageKind::Pinned)]
#[tokio::test]
async
fn
test_whole_block_transfer_fc_fc
(
#[case]
src_storage
:
StorageKind
,
#[case]
dst_storage
:
StorageKind
,
)
->
Result
<
()
>
{
let
agent
=
create_test_agent
(
"test_whole_block_fc_fc_transfer"
);
let
src
=
create_fc_layout
(
agent
.clone
(),
src_storage
,
4
);
let
dst
=
create_fc_layout
(
agent
.clone
(),
dst_storage
,
4
);
// Verify this should use whole-block path
assert
!
(
can_use_whole_block_transfer
(
&
src
,
&
dst
,
None
));
let
src_blocks
=
vec!
[
0
,
1
];
let
dst_blocks
=
vec!
[
2
,
3
];
let
checksums
=
fill_and_checksum
(
&
src
,
&
src_blocks
,
FillPattern
::
Sequential
)
?
;
let
ctx
=
create_transfer_context
(
agent
,
None
)
?
;
let
notification
=
execute_transfer
(
&
src
,
&
dst
,
&
src_blocks
,
&
dst_blocks
,
TransferOptionsInternal
::
default
(),
ctx
.context
(),
)
?
;
notification
.await
?
;
verify_checksums_by_position
(
&
checksums
,
&
src_blocks
,
&
dst
,
&
dst_blocks
)
?
;
Ok
(())
}
/// Test that FC→LW transfers fall back to layer-wise path.
///
/// This test verifies data integrity for FC→LW transfers that should use
/// the layer-wise path (not whole-block).
#[rstest]
#[case(StorageKind::System,
StorageKind::System)]
#[case(StorageKind::Pinned,
StorageKind::Pinned)]
#[tokio::test]
async
fn
test_layer_wise_transfer_fc_lw
(
#[case]
src_storage
:
StorageKind
,
#[case]
dst_storage
:
StorageKind
,
)
->
Result
<
()
>
{
let
agent
=
create_test_agent
(
"test_layer_wise_fc_lw_transfer"
);
let
src
=
create_fc_layout
(
agent
.clone
(),
src_storage
,
4
);
let
dst
=
create_lw_layout
(
agent
.clone
(),
dst_storage
,
4
);
// Verify this should NOT use whole-block path
assert
!
(
!
can_use_whole_block_transfer
(
&
src
,
&
dst
,
None
));
let
src_blocks
=
vec!
[
0
,
1
];
let
dst_blocks
=
vec!
[
2
,
3
];
let
checksums
=
fill_and_checksum
(
&
src
,
&
src_blocks
,
FillPattern
::
Sequential
)
?
;
let
ctx
=
create_transfer_context
(
agent
,
None
)
?
;
let
notification
=
execute_transfer
(
&
src
,
&
dst
,
&
src_blocks
,
&
dst_blocks
,
TransferOptionsInternal
::
default
(),
ctx
.context
(),
)
?
;
notification
.await
?
;
verify_checksums_by_position
(
&
checksums
,
&
src_blocks
,
&
dst
,
&
dst_blocks
)
?
;
Ok
(())
}
/// Test partial layer transfer uses layer-wise path even for FC→FC.
#[tokio::test]
async
fn
test_partial_layer_transfer_uses_layer_wise
()
->
Result
<
()
>
{
let
agent
=
create_test_agent
(
"test_partial_layer"
);
let
src
=
create_fc_layout
(
agent
.clone
(),
StorageKind
::
System
,
4
);
let
dst
=
create_fc_layout
(
agent
.clone
(),
StorageKind
::
Pinned
,
4
);
// Verify partial transfer should NOT use whole-block path
let
partial_range
=
0
..
1
;
assert
!
(
!
can_use_whole_block_transfer
(
&
src
,
&
dst
,
Some
(
&
partial_range
)
));
let
src_blocks
=
vec!
[
0
,
1
];
let
dst_blocks
=
vec!
[
2
,
3
];
// Fill source with sequential pattern for layer 0 only
let
checksums
=
fill_and_checksum_with_mode
(
&
src
,
&
src_blocks
,
FillPattern
::
Sequential
,
TransferMode
::
FirstLayerOnly
,
)
?
;
let
ctx
=
create_transfer_context
(
agent
,
None
)
?
;
let
options
=
TransferOptionsInternal
::
builder
()
.layer_range
(
partial_range
)
.build
()
?
;
let
notification
=
execute_transfer
(
&
src
,
&
dst
,
&
src_blocks
,
&
dst_blocks
,
options
,
ctx
.context
())
?
;
notification
.await
?
;
verify_checksums_by_position_with_mode
(
&
checksums
,
&
src_blocks
,
&
dst
,
&
dst_blocks
,
TransferMode
::
FirstLayerOnly
,
)
?
;
Ok
(())
}
// ============================================================================
// Transfer Coverage Gap Tests
// ============================================================================
/// Test that transferring layer 0 and layer 1 independently produces the same
/// result as a full-block transfer.
///
/// `test_partial_layer_transfer_uses_layer_wise` only transfers layer 0. This test
/// verifies that layer 0 + layer 1 transferred independently compose to the same
/// result as transferring all layers at once.
#[rstest]
#[case(LayoutKind::FC,
LayoutKind::FC)]
#[case(LayoutKind::FC,
LayoutKind::LW)]
#[case(LayoutKind::LW,
LayoutKind::FC)]
#[case(LayoutKind::LW,
LayoutKind::LW)]
#[tokio::test]
async
fn
test_layer_composition_equals_full_block
(
#[case]
src_kind
:
LayoutKind
,
#[case]
dst_kind
:
LayoutKind
,
)
->
Result
<
()
>
{
let
agent
=
create_test_agent
(
"test_layer_composition"
);
let
src
=
create_layout
(
agent
.clone
(),
LayoutSpec
::
new
(
src_kind
,
StorageKind
::
Pinned
),
4
,
);
let
dst_full
=
create_layout
(
agent
.clone
(),
LayoutSpec
::
new
(
dst_kind
,
StorageKind
::
Pinned
),
4
,
);
let
dst_layered
=
create_layout
(
agent
.clone
(),
LayoutSpec
::
new
(
dst_kind
,
StorageKind
::
Pinned
),
4
,
);
let
src_blocks
=
vec!
[
0
,
1
];
let
dst_blocks
=
vec!
[
2
,
3
];
// Fill source blocks with sequential pattern (all layers)
fill_and_checksum
(
&
src
,
&
src_blocks
,
FillPattern
::
Sequential
)
?
;
let
ctx
=
create_transfer_context
(
agent
,
None
)
?
;
// Full-block transfer: src[0,1] → dst_full[2,3]
let
notification
=
execute_transfer
(
&
src
,
&
dst_full
,
&
src_blocks
,
&
dst_blocks
,
TransferOptionsInternal
::
default
(),
ctx
.context
(),
)
?
;
notification
.await
?
;
// Layer-wise transfers: src[0,1] → dst_layered[2,3] layer by layer
let
options_layer0
=
TransferOptionsInternal
::
builder
()
.layer_range
(
0
..
1
)
.build
()
?
;
let
notification
=
execute_transfer
(
&
src
,
&
dst_layered
,
&
src_blocks
,
&
dst_blocks
,
options_layer0
,
ctx
.context
(),
)
?
;
notification
.await
?
;
let
options_layer1
=
TransferOptionsInternal
::
builder
()
.layer_range
(
1
..
2
)
.build
()
?
;
let
notification
=
execute_transfer
(
&
src
,
&
dst_layered
,
&
src_blocks
,
&
dst_blocks
,
options_layer1
,
ctx
.context
(),
)
?
;
notification
.await
?
;
// Compute full-block checksums on both destinations
let
checksums_full
=
compute_block_checksums
(
&
dst_full
,
&
dst_blocks
)
?
;
let
checksums_layered
=
compute_block_checksums
(
&
dst_layered
,
&
dst_blocks
)
?
;
// Layer-wise composition must equal full-block transfer
for
&
block_id
in
&
dst_blocks
{
assert_eq!
(
checksums_full
[
&
block_id
],
checksums_layered
[
&
block_id
],
"Block {}: full-block checksum ({}) != layer-composed checksum ({})"
,
block_id
,
checksums_full
[
&
block_id
],
checksums_layered
[
&
block_id
],
);
}
Ok
(())
}
/// Test that FC↔LW CUDA transfers through Pinned↔Device correctly use the
/// vectorized path and preserve data integrity through a roundtrip.
///
/// Existing tests cover FC↔LW via memcpy (System/Pinned) and via `test_p2p`.
/// This test explicitly asserts that CUDA FC↔LW transfers go through the
/// vectorized path (not whole-block) and verifies roundtrip integrity.
#[rstest]
#[case(LayoutKind::FC,
LayoutKind::LW)]
#[case(LayoutKind::LW,
LayoutKind::FC)]
#[tokio::test]
async
fn
test_cuda_fc_lw_roundtrip_uses_vectorized
(
#[case]
host_kind
:
LayoutKind
,
#[case]
device_kind
:
LayoutKind
,
)
->
Result
<
()
>
{
let
agent
=
build_agent_for_kinds
(
&
[
StorageKind
::
Pinned
,
StorageKind
::
Device
(
0
)])
?
;
let
host
=
create_layout
(
agent
.clone
(),
LayoutSpec
::
new
(
host_kind
,
StorageKind
::
Pinned
),
4
,
);
let
device
=
create_layout
(
agent
.clone
(),
LayoutSpec
::
new
(
device_kind
,
StorageKind
::
Device
(
0
)),
4
,
);
// Confirm vectorized path will be used (not whole-block)
assert
!
(
!
can_use_whole_block_transfer
(
&
host
,
&
device
,
None
),
"FC↔LW across Pinned↔Device should use vectorized path, not whole-block"
);
let
src_blocks
=
vec!
[
0
,
1
];
let
device_blocks
=
vec!
[
0
,
1
];
let
dst_blocks
=
vec!
[
2
,
3
];
// Fill host source blocks
let
checksums
=
fill_and_checksum
(
&
host
,
&
src_blocks
,
FillPattern
::
Sequential
)
?
;
let
ctx
=
create_transfer_context
(
agent
,
None
)
?
;
// H2D: host[0,1] → device[0,1]
let
notification
=
execute_transfer
(
&
host
,
&
device
,
&
src_blocks
,
&
device_blocks
,
TransferOptionsInternal
::
default
(),
ctx
.context
(),
)
?
;
notification
.await
?
;
// D2H: device[0,1] → host[2,3]
let
notification
=
execute_transfer
(
&
device
,
&
host
,
&
device_blocks
,
&
dst_blocks
,
TransferOptionsInternal
::
default
(),
ctx
.context
(),
)
?
;
notification
.await
?
;
// Verify roundtrip: host[0,1] == host[2,3]
verify_checksums_by_position
(
&
checksums
,
&
src_blocks
,
&
host
,
&
dst_blocks
)
?
;
Ok
(())
}
lib/kvbm-physical/src/transfer/tests/mod.rs
0 → 100644
View file @
9ab148dc
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Comprehensive transfer tests for verifying data integrity across storage types and layout configurations.
mod
local_transfers
;
/// Skip test if stub kernels are in use (no real CUDA available).
///
/// Call this at the start of any test that requires real CUDA operations.
/// When stubs are in use, the test will print a message and return early.
///
/// # Example
/// ```ignore
/// #[test]
/// fn my_cuda_test() {
/// skip_if_stubs!();
/// // ... test code that requires CUDA ...
/// }
/// ```
#[allow(unused_macros)]
macro_rules!
skip_if_stubs
{
()
=>
{
if
kvbm_kernels
::
is_using_stubs
()
{
eprintln!
(
"Skipping test '{}': stub kernels in use (no real CUDA)"
,
module_path!
()
);
return
;
}
};
}
/// Check if any of the storage kinds require CUDA, and skip if stubs are in use.
///
/// Call this at the start of parameterized tests that may or may not use Device storage.
#[allow(unused_macros)]
macro_rules!
skip_if_stubs_and_device
{
(
$
(
$kind:expr
),
+
$
(,)
?
)
=>
{
if
kvbm_kernels
::
is_using_stubs
()
{
let
needs_cuda
=
false
$
(||
matches!
(
$kind
,
StorageKind
::
Device
(
_
)))
+
;
if
needs_cuda
{
eprintln!
(
"Skipping test '{}': stub kernels in use and test requires Device storage"
,
module_path!
()
);
return
Ok
(());
}
}
};
}
// Make the macros available to submodules
#[allow(unused_imports)]
pub
(
crate
)
use
skip_if_stubs
;
#[allow(unused_imports)]
pub
(
crate
)
use
skip_if_stubs_and_device
;
use
super
::{
BlockChecksum
,
FillPattern
,
NixlAgent
,
PhysicalLayout
,
StorageKind
,
TransferCapabilities
,
compute_block_checksums
,
compute_layer_checksums
,
fill_blocks
,
fill_layers
,
};
use
crate
::{
BlockId
,
layout
::{
BlockDimension
,
LayoutConfig
,
builder
::{
HasConfig
,
NoLayout
,
NoMemory
,
PhysicalLayoutBuilder
},
},
};
use
anyhow
::
Result
;
use
cudarc
::
driver
::
sys
::
CUdevice_attribute_enum
;
use
cudarc
::
driver
::{
CudaContext
,
CudaStream
,
LaunchConfig
,
PushKernelArg
};
use
cudarc
::
nvrtc
::{
CompileOptions
,
compile_ptx_with_opts
};
use
std
::
collections
::
HashMap
;
use
std
::
ops
::
Range
;
use
std
::
sync
::{
Arc
,
OnceLock
};
use
std
::
time
::{
Duration
,
Instant
};
/// Layout kind for parameterized testing.
#[derive(Debug,
Clone,
Copy,
PartialEq,
Eq)]
pub
enum
LayoutKind
{
/// Fully contiguous layout
FC
,
/// Layer-wise (layer-separate) layout
LW
,
}
/// Storage and layout specification for creating test layouts.
#[derive(Debug,
Clone,
Copy)]
pub
struct
LayoutSpec
{
pub
kind
:
LayoutKind
,
pub
storage
:
StorageKind
,
}
impl
LayoutSpec
{
pub
fn
new
(
kind
:
LayoutKind
,
storage
:
StorageKind
)
->
Self
{
Self
{
kind
,
storage
}
}
}
/// Transfer mode for parameterized testing.
#[derive(Debug,
Clone,
Copy,
PartialEq,
Eq)]
pub
enum
TransferMode
{
/// Transfer entire blocks (all layers)
FullBlocks
,
/// Transfer only the first layer
FirstLayerOnly
,
/// Transfer only the second layer
SecondLayerOnly
,
}
impl
TransferMode
{
/// Convert to optional layer range for execute_transfer.
pub
fn
layer_range
(
&
self
)
->
Option
<
Range
<
usize
>>
{
match
self
{
TransferMode
::
FullBlocks
=>
None
,
TransferMode
::
FirstLayerOnly
=>
Some
(
0
..
1
),
TransferMode
::
SecondLayerOnly
=>
Some
(
1
..
2
),
}
}
/// Get a descriptive suffix for test names.
pub
fn
suffix
(
&
self
)
->
&
'static
str
{
match
self
{
TransferMode
::
FullBlocks
=>
"full"
,
TransferMode
::
FirstLayerOnly
=>
"layer0"
,
TransferMode
::
SecondLayerOnly
=>
"layer1"
,
}
}
}
/// Standard layout configuration for all tests.
pub
fn
standard_config
(
num_blocks
:
usize
)
->
LayoutConfig
{
LayoutConfig
::
builder
()
.num_blocks
(
num_blocks
)
.num_layers
(
2
)
.outer_dim
(
2
)
.page_size
(
16
)
.inner_dim
(
128
)
.dtype_width_bytes
(
2
)
.build
()
.unwrap
()
}
/// Helper function for creating a PhysicalLayout builder with standard config.
///
/// This is used by other test modules (fill, checksum, validation) for backwards compatibility.
pub
fn
builder
(
num_blocks
:
usize
)
->
PhysicalLayoutBuilder
<
HasConfig
,
NoLayout
,
NoMemory
>
{
let
agent
=
create_test_agent
(
"test_agent"
);
let
config
=
standard_config
(
num_blocks
);
PhysicalLayout
::
builder
(
agent
)
.with_config
(
config
)
}
/// Create a test agent with no backends.
///
/// Use this for tests that don't require specific NIXL backends.
pub
fn
create_test_agent
(
name
:
&
str
)
->
NixlAgent
{
NixlAgent
::
new
(
name
)
.expect
(
"Failed to create agent"
)
}
/// Create a test agent with specific backends (strict - all must succeed).
#[expect(dead_code)]
pub
fn
create_test_agent_with_backends
(
name
:
&
str
,
backends
:
&
[
&
str
])
->
Result
<
NixlAgent
>
{
NixlAgent
::
with_backends
(
name
,
backends
)
}
/// Create a fully contiguous physical layout with the specified storage type.
pub
fn
create_fc_layout
(
agent
:
NixlAgent
,
storage_kind
:
StorageKind
,
num_blocks
:
usize
,
)
->
PhysicalLayout
{
let
config
=
standard_config
(
num_blocks
);
let
builder
=
PhysicalLayout
::
builder
(
agent
)
.with_config
(
config
)
.fully_contiguous
();
match
storage_kind
{
StorageKind
::
System
=>
builder
.allocate_system
()
.build
()
.unwrap
(),
StorageKind
::
Pinned
=>
builder
.allocate_pinned
(
None
)
.build
()
.unwrap
(),
StorageKind
::
Device
(
device_id
)
=>
builder
.allocate_device
(
device_id
)
.build
()
.unwrap
(),
StorageKind
::
Disk
(
_
)
=>
builder
.allocate_disk
(
None
)
.build
()
.unwrap
(),
}
}
/// Create a layer-separate physical layout with the specified storage type.
pub
fn
create_lw_layout
(
agent
:
NixlAgent
,
storage_kind
:
StorageKind
,
num_blocks
:
usize
,
)
->
PhysicalLayout
{
let
config
=
standard_config
(
num_blocks
);
let
builder
=
PhysicalLayout
::
builder
(
agent
)
.with_config
(
config
)
.layer_separate
(
BlockDimension
::
BlockIsFirstDim
);
match
storage_kind
{
StorageKind
::
System
=>
builder
.allocate_system
()
.build
()
.unwrap
(),
StorageKind
::
Pinned
=>
builder
.allocate_pinned
(
None
)
.build
()
.unwrap
(),
StorageKind
::
Device
(
device_id
)
=>
builder
.allocate_device
(
device_id
)
.build
()
.unwrap
(),
StorageKind
::
Disk
(
_
)
=>
builder
.allocate_disk
(
None
)
.build
()
.unwrap
(),
}
}
/// Create a physical layout based on the specification.
///
/// This is a DRY helper that dispatches to create_fc_layout or create_lw_layout
/// based on the layout kind in the spec.
pub
fn
create_layout
(
agent
:
NixlAgent
,
spec
:
LayoutSpec
,
num_blocks
:
usize
)
->
PhysicalLayout
{
match
spec
.kind
{
LayoutKind
::
FC
=>
create_fc_layout
(
agent
,
spec
.storage
,
num_blocks
),
LayoutKind
::
LW
=>
create_lw_layout
(
agent
,
spec
.storage
,
num_blocks
),
}
}
/// Create a transport manager for testing with the specified agent.
///
/// Note: The agent should already have backends configured. Use `create_test_agent`
/// or `build_agent_with_backends` to create properly configured agents.
pub
fn
create_transfer_context
(
agent
:
NixlAgent
,
capabilities
:
Option
<
TransferCapabilities
>
,
)
->
Result
<
crate
::
manager
::
TransferManager
>
{
crate
::
manager
::
TransferManager
::
builder
()
.capabilities
(
capabilities
.unwrap_or_default
())
.nixl_agent
(
agent
)
.cuda_device_id
(
0
)
.build
()
}
/// Fill blocks and compute checksums.
///
/// This can only be called on System or Pinned layouts.
pub
fn
fill_and_checksum
(
layout
:
&
PhysicalLayout
,
block_ids
:
&
[
BlockId
],
pattern
:
FillPattern
,
)
->
Result
<
HashMap
<
BlockId
,
BlockChecksum
>>
{
fill_blocks
(
layout
,
block_ids
,
pattern
)
?
;
compute_block_checksums
(
layout
,
block_ids
)
}
/// Fill blocks or layers based on transfer mode and compute checksums.
///
/// This is a mode-aware version of fill_and_checksum that handles both
/// full block transfers and layer-wise transfers.
pub
fn
fill_and_checksum_with_mode
(
layout
:
&
PhysicalLayout
,
block_ids
:
&
[
BlockId
],
pattern
:
FillPattern
,
mode
:
TransferMode
,
)
->
Result
<
HashMap
<
BlockId
,
BlockChecksum
>>
{
match
mode
{
TransferMode
::
FullBlocks
=>
{
fill_blocks
(
layout
,
block_ids
,
pattern
)
?
;
compute_block_checksums
(
layout
,
block_ids
)
}
TransferMode
::
FirstLayerOnly
=>
{
fill_layers
(
layout
,
block_ids
,
0
..
1
,
pattern
)
?
;
compute_layer_checksums
(
layout
,
block_ids
,
0
..
1
)
}
TransferMode
::
SecondLayerOnly
=>
{
fill_layers
(
layout
,
block_ids
,
1
..
2
,
pattern
)
?
;
compute_layer_checksums
(
layout
,
block_ids
,
1
..
2
)
}
}
}
/// Verify that destination block checksums match the expected source checksums.
///
/// This function compares checksums in order, assuming the source and destination
/// block arrays have a 1:1 correspondence (src[i] was transferred to dst[i]).
pub
fn
verify_checksums_by_position
(
src_checksums
:
&
HashMap
<
BlockId
,
BlockChecksum
>
,
src_block_ids
:
&
[
BlockId
],
dst_layout
:
&
PhysicalLayout
,
dst_block_ids
:
&
[
BlockId
],
)
->
Result
<
()
>
{
assert_eq!
(
src_block_ids
.len
(),
dst_block_ids
.len
(),
"Source and destination block arrays must have same length"
);
let
dst_checksums
=
compute_block_checksums
(
dst_layout
,
dst_block_ids
)
?
;
for
(
src_id
,
dst_id
)
in
src_block_ids
.iter
()
.zip
(
dst_block_ids
.iter
())
{
let
src_checksum
=
src_checksums
.get
(
src_id
)
.unwrap_or_else
(||
panic!
(
"Missing source checksum for block {}"
,
src_id
));
let
dst_checksum
=
dst_checksums
.get
(
dst_id
)
.unwrap_or_else
(||
panic!
(
"Missing destination checksum for block {}"
,
dst_id
));
assert_eq!
(
src_checksum
,
dst_checksum
,
"Checksum mismatch: src[{}] != dst[{}]: {} != {}"
,
src_id
,
dst_id
,
src_checksum
,
dst_checksum
);
}
Ok
(())
}
/// Verify checksums with transfer mode awareness.
///
/// This is a mode-aware version that handles both full block and layer-wise verification.
pub
fn
verify_checksums_by_position_with_mode
(
src_checksums
:
&
HashMap
<
BlockId
,
BlockChecksum
>
,
src_block_ids
:
&
[
BlockId
],
dst_layout
:
&
PhysicalLayout
,
dst_block_ids
:
&
[
BlockId
],
mode
:
TransferMode
,
)
->
Result
<
()
>
{
assert_eq!
(
src_block_ids
.len
(),
dst_block_ids
.len
(),
"Source and destination block arrays must have same length"
);
let
dst_checksums
=
match
mode
{
TransferMode
::
FullBlocks
=>
compute_block_checksums
(
dst_layout
,
dst_block_ids
)
?
,
TransferMode
::
FirstLayerOnly
=>
compute_layer_checksums
(
dst_layout
,
dst_block_ids
,
0
..
1
)
?
,
TransferMode
::
SecondLayerOnly
=>
compute_layer_checksums
(
dst_layout
,
dst_block_ids
,
1
..
2
)
?
,
};
for
(
src_id
,
dst_id
)
in
src_block_ids
.iter
()
.zip
(
dst_block_ids
.iter
())
{
let
src_checksum
=
src_checksums
.get
(
src_id
)
.unwrap_or_else
(||
panic!
(
"Missing source checksum for block {}"
,
src_id
));
let
dst_checksum
=
dst_checksums
.get
(
dst_id
)
.unwrap_or_else
(||
panic!
(
"Missing destination checksum for block {}"
,
dst_id
));
assert_eq!
(
src_checksum
,
dst_checksum
,
"Checksum mismatch (mode={:?}): src[{}] != dst[{}]: {} != {}"
,
mode
,
src_id
,
dst_id
,
src_checksum
,
dst_checksum
);
}
Ok
(())
}
/// Fill guard blocks and return their checksums for later verification.
///
/// Guard blocks are blocks adjacent to transfer destinations that should
/// remain unchanged during transfers. This function fills them with a
/// distinctive pattern and returns their checksums for later validation.
///
/// # Arguments
/// * `layout` - The physical layout containing the guard blocks
/// * `guard_block_ids` - Block IDs to use as guards
/// * `pattern` - Fill pattern for guard blocks (typically a constant like 0xFF)
///
/// # Returns
/// A map of block ID to checksum for all guard blocks
pub
fn
create_guard_blocks
(
layout
:
&
PhysicalLayout
,
guard_block_ids
:
&
[
usize
],
pattern
:
FillPattern
,
)
->
Result
<
HashMap
<
usize
,
BlockChecksum
>>
{
fill_blocks
(
layout
,
guard_block_ids
,
pattern
)
?
;
compute_block_checksums
(
layout
,
guard_block_ids
)
}
/// Verify that guard blocks remain unchanged after transfers.
///
/// This function compares the current checksums of guard blocks against
/// their expected values. Any mismatch indicates memory corruption or
/// unintended overwrites during transfer operations.
///
/// # Arguments
/// * `layout` - The physical layout containing the guard blocks
/// * `guard_block_ids` - Block IDs to verify
/// * `expected_checksums` - Expected checksums from create_guard_blocks
///
/// # Errors
/// Returns an error if any guard block checksum has changed
pub
fn
verify_guard_blocks_unchanged
(
layout
:
&
PhysicalLayout
,
guard_block_ids
:
&
[
usize
],
expected_checksums
:
&
HashMap
<
usize
,
BlockChecksum
>
,
)
->
Result
<
()
>
{
let
current_checksums
=
compute_block_checksums
(
layout
,
guard_block_ids
)
?
;
for
&
block_id
in
guard_block_ids
{
let
expected
=
expected_checksums
.get
(
&
block_id
)
.unwrap_or_else
(||
panic!
(
"Missing expected checksum for guard block {}"
,
block_id
));
let
current
=
current_checksums
.get
(
&
block_id
)
.unwrap_or_else
(||
panic!
(
"Missing current checksum for guard block {}"
,
block_id
));
if
expected
!=
current
{
return
Err
(
anyhow
::
anyhow!
(
"Guard block {} was modified during transfer! Expected: {}, Got: {}"
,
block_id
,
expected
,
current
));
}
}
Ok
(())
}
/// CUDA sleep kernel source code.
const
SLEEP_KERNEL_SRC
:
&
str
=
r#"
extern "C" __global__ void sleep_kernel(unsigned long long min_cycles) {
const unsigned long long start = clock64();
while ((clock64() - start) < min_cycles) {
asm volatile("");
}
}
"#
;
/// A reusable CUDA sleep utility for tests.
///
/// This struct provides a simple interface to execute GPU sleep operations
/// with calibrated timing. It compiles the sleep kernel once per CUDA context
/// and caches the calibration for reuse.
///
/// The calibration is conservative (prefers longer sleep durations over shorter)
/// to ensure minimum sleep times are met.
pub
struct
CudaSleep
{
function
:
cudarc
::
driver
::
CudaFunction
,
cycles_per_ms
:
f64
,
}
impl
CudaSleep
{
/// Get or create a CudaSleep instance for the given CUDA context.
///
/// This function uses lazy initialization and caches instances per device ID.
/// The first call for each device will compile the kernel and run calibration.
///
/// # Arguments
/// * `cuda_ctx` - The CUDA context to use
///
/// # Returns
/// A shared reference to the CudaSleep instance for this context's device.
pub
fn
for_context
(
cuda_ctx
:
&
Arc
<
CudaContext
>
)
->
Result
<
Arc
<
Self
>>
{
static
INSTANCES
:
OnceLock
<
parking_lot
::
Mutex
<
HashMap
<
usize
,
Arc
<
CudaSleep
>>>>
=
OnceLock
::
new
();
let
instances
=
INSTANCES
.get_or_init
(||
parking_lot
::
Mutex
::
new
(
HashMap
::
new
()));
let
device_ordinal
=
cuda_ctx
.ordinal
();
// Fast path: check if instance already exists
{
let
instances_guard
=
instances
.lock
();
if
let
Some
(
instance
)
=
instances_guard
.get
(
&
device_ordinal
)
{
return
Ok
(
Arc
::
clone
(
instance
));
}
}
// Slow path: create new instance with calibration
let
instance
=
Arc
::
new
(
Self
::
new
(
cuda_ctx
)
?
);
// Store in cache
let
mut
instances_guard
=
instances
.lock
();
instances_guard
.entry
(
device_ordinal
)
.or_insert_with
(||
Arc
::
clone
(
&
instance
));
Ok
(
instance
)
}
/// Create a new CudaSleep instance with calibration.
///
/// This compiles the sleep kernel and runs a calibration loop to determine
/// the relationship between clock cycles and wall-clock time.
fn
new
(
cuda_ctx
:
&
Arc
<
CudaContext
>
)
->
Result
<
Self
>
{
// Get device compute capability
let
major
=
cuda_ctx
.attribute
(
CUdevice_attribute_enum
::
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR
)
?
;
let
minor
=
cuda_ctx
.attribute
(
CUdevice_attribute_enum
::
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR
)
?
;
// Compile PTX for this device
let
mut
compile_opts
=
CompileOptions
{
name
:
Some
(
"sleep_kernel.cu"
.into
()),
..
Default
::
default
()
};
compile_opts
.options
.push
(
format!
(
"--gpu-architecture=compute_{}{}"
,
major
,
minor
));
let
ptx
=
compile_ptx_with_opts
(
SLEEP_KERNEL_SRC
,
compile_opts
)
?
;
let
module
=
cuda_ctx
.load_module
(
ptx
)
?
;
let
function
=
module
.load_function
(
"sleep_kernel"
)
?
;
// Get device clock rate
let
clock_rate_khz
=
cuda_ctx
.attribute
(
CUdevice_attribute_enum
::
CU_DEVICE_ATTRIBUTE_CLOCK_RATE
)
?
as
u64
;
// Create a temporary stream for calibration
let
stream
=
cuda_ctx
.new_stream
()
?
;
// Warm up to absorb JIT overhead
let
warm_cycles
=
clock_rate_khz
.saturating_mul
(
10
)
.max
(
1
);
Self
::
launch_kernel
(
&
function
,
&
stream
,
warm_cycles
)
?
;
stream
.synchronize
()
?
;
// Run calibration loop
let
desired_delay
=
Duration
::
from_millis
(
600
);
let
mut
target_cycles
=
clock_rate_khz
.saturating_mul
(
50
)
.max
(
1
);
// ~50ms starting point
let
mut
actual_duration
=
Duration
::
ZERO
;
for
_
in
0
..
8
{
let
start
=
Instant
::
now
();
Self
::
launch_kernel
(
&
function
,
&
stream
,
target_cycles
)
?
;
stream
.synchronize
()
?
;
actual_duration
=
start
.elapsed
();
if
actual_duration
>=
desired_delay
{
break
;
}
target_cycles
=
target_cycles
.saturating_mul
(
2
);
}
// Calculate cycles per millisecond with conservative 20% margin
// (prefer longer sleeps over shorter)
let
cycles_per_ms
=
if
actual_duration
.as_millis
()
>
0
{
(
target_cycles
as
f64
/
actual_duration
.as_millis
()
as
f64
)
*
1.2
}
else
{
clock_rate_khz
as
f64
// Fallback to clock rate
};
Ok
(
Self
{
function
,
cycles_per_ms
,
})
}
/// Launch the sleep kernel with the specified number of cycles.
fn
launch_kernel
(
function
:
&
cudarc
::
driver
::
CudaFunction
,
stream
:
&
Arc
<
CudaStream
>
,
cycles
:
u64
,
)
->
Result
<
()
>
{
let
launch_cfg
=
LaunchConfig
{
grid_dim
:
(
1
,
1
,
1
),
block_dim
:
(
1
,
1
,
1
),
shared_mem_bytes
:
0
,
};
let
mut
launch
=
stream
.launch_builder
(
function
);
unsafe
{
launch
.arg
(
&
cycles
);
launch
.launch
(
launch_cfg
)
?
;
}
Ok
(())
}
/// Launch a sleep operation on the given stream.
///
/// This queues a GPU kernel that will sleep for approximately the specified
/// duration. The sleep is conservative and may take longer than requested.
///
/// # Arguments
/// * `duration` - The minimum duration to sleep
/// * `stream` - The CUDA stream to launch the kernel on
///
/// # Returns
/// Ok(()) if the kernel was successfully queued
pub
fn
launch
(
&
self
,
duration
:
Duration
,
stream
:
&
Arc
<
CudaStream
>
)
->
Result
<
()
>
{
let
target_cycles
=
(
duration
.as_millis
()
as
f64
*
self
.cycles_per_ms
)
as
u64
;
Self
::
launch_kernel
(
&
self
.function
,
stream
,
target_cycles
)
}
}
lib/kvbm-physical/src/transfer/validation.rs
0 → 100644
View file @
9ab148dc
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Block ID validation for transfers.
//!
//! This module provides validation functions to ensure block transfers are safe and correct.
use
crate
::
BlockId
;
use
super
::
PhysicalLayout
;
use
std
::
collections
::
HashSet
;
use
thiserror
::
Error
;
/// Validation errors for block transfers.
#[derive(Debug,
Error,
PartialEq)]
pub
enum
BlockValidationError
{
/// Destination block IDs contain duplicates.
#[error(
"Destination block IDs are not unique: duplicates = {duplicates:?}"
)]
DuplicateDestinationBlocks
{
duplicates
:
Vec
<
BlockId
>
},
/// Source and destination blocks overlap when using the same layout.
#[error(
"Source and destination blocks overlap (same layout): overlapping = {overlapping:?}"
)]
OverlappingBlocks
{
overlapping
:
Vec
<
BlockId
>
},
/// Lists have mismatched lengths.
#[error(
"Block ID lists have mismatched lengths: src={src_len}, dst={dst_len}, bounce={bounce_len:?}"
)]
LengthMismatch
{
src_len
:
usize
,
dst_len
:
usize
,
bounce_len
:
Option
<
usize
>
,
},
/// Block ID is out of range for the layout.
#[error(
"Block ID {block_id} out of range for {layout_name} (max={max})"
)]
BlockOutOfRange
{
block_id
:
BlockId
,
layout_name
:
&
'static
str
,
max
:
usize
,
},
/// Bounce block IDs contain duplicates.
#[error(
"Bounce block IDs are not unique: duplicates = {duplicates:?}"
)]
DuplicateBounceBlocks
{
duplicates
:
Vec
<
BlockId
>
},
}
/// Validate that destination block IDs are unique (no duplicates).
///
/// # Arguments
/// * `dst_block_ids` - Destination block IDs
///
/// # Returns
/// Ok(()) if unique, Err with duplicate IDs otherwise
pub
fn
validate_dst_unique
(
dst_block_ids
:
&
[
BlockId
])
->
Result
<
(),
BlockValidationError
>
{
let
mut
seen
=
HashSet
::
new
();
let
mut
duplicates
=
Vec
::
new
();
for
&
id
in
dst_block_ids
{
if
!
seen
.insert
(
id
)
&&
!
duplicates
.contains
(
&
id
)
{
duplicates
.push
(
id
);
}
}
if
duplicates
.is_empty
()
{
Ok
(())
}
else
{
Err
(
BlockValidationError
::
DuplicateDestinationBlocks
{
duplicates
})
}
}
/// Validate that bounce block IDs are unique (no duplicates).
pub
fn
validate_bounce_unique
(
bounce_block_ids
:
&
[
BlockId
])
->
Result
<
(),
BlockValidationError
>
{
let
mut
seen
=
HashSet
::
new
();
let
mut
duplicates
=
Vec
::
new
();
for
&
id
in
bounce_block_ids
{
if
!
seen
.insert
(
id
)
&&
!
duplicates
.contains
(
&
id
)
{
duplicates
.push
(
id
);
}
}
if
duplicates
.is_empty
()
{
Ok
(())
}
else
{
Err
(
BlockValidationError
::
DuplicateBounceBlocks
{
duplicates
})
}
}
/// Check if two layouts are the same by comparing their Arc pointers.
///
/// This is a conservative check - if pointers differ, layouts might still be the same
/// but we treat them as different to avoid false positives in disjoint validation.
fn
are_same_layout
(
layout1
:
&
PhysicalLayout
,
layout2
:
&
PhysicalLayout
)
->
bool
{
// Compare Arc pointer addresses
std
::
ptr
::
eq
(
std
::
sync
::
Arc
::
as_ptr
(
layout1
.layout
()),
std
::
sync
::
Arc
::
as_ptr
(
layout2
.layout
()),
)
}
/// Validate that src and dst block IDs are disjoint when using the same layout.
///
/// Only enforced in debug mode when src and dst point to the same layout.
///
/// # Arguments
/// * `src_block_ids` - Source block IDs
/// * `dst_block_ids` - Destination block IDs
/// * `src_layout` - Source physical layout
/// * `dst_layout` - Destination physical layout
#[cfg(debug_assertions)]
pub
fn
validate_disjoint_same_layout
(
src_block_ids
:
&
[
BlockId
],
dst_block_ids
:
&
[
BlockId
],
src_layout
:
&
PhysicalLayout
,
dst_layout
:
&
PhysicalLayout
,
)
->
Result
<
(),
BlockValidationError
>
{
// Only check if same layout
if
!
are_same_layout
(
src_layout
,
dst_layout
)
{
return
Ok
(());
}
let
src_set
:
HashSet
<
_
>
=
src_block_ids
.iter
()
.copied
()
.collect
();
let
overlapping
:
Vec
<
_
>
=
dst_block_ids
.iter
()
.filter
(|
id
|
src_set
.contains
(
id
))
.copied
()
.collect
();
if
overlapping
.is_empty
()
{
Ok
(())
}
else
{
Err
(
BlockValidationError
::
OverlappingBlocks
{
overlapping
})
}
}
/// Validate block IDs are in range for a layout.
#[cfg(debug_assertions)]
pub
fn
validate_block_ids_in_range
(
block_ids
:
&
[
BlockId
],
layout
:
&
PhysicalLayout
,
layout_name
:
&
'static
str
,
)
->
Result
<
(),
BlockValidationError
>
{
let
max_blocks
=
layout
.layout
()
.config
()
.num_blocks
;
for
&
block_id
in
block_ids
{
if
block_id
>=
max_blocks
as
BlockId
{
return
Err
(
BlockValidationError
::
BlockOutOfRange
{
block_id
,
layout_name
,
max
:
max_blocks
,
});
}
}
Ok
(())
}
/// Full validation for block transfer (debug mode).
///
/// Validates:
/// - List lengths match
/// - Destination IDs are unique
/// - Bounce IDs are unique (if provided)
/// - Source and destination are disjoint (if same layout)
/// - All block IDs are in range for their respective layouts
#[cfg(debug_assertions)]
pub
fn
validate_block_transfer
(
src_block_ids
:
&
[
BlockId
],
dst_block_ids
:
&
[
BlockId
],
bounce_block_ids
:
Option
<&
[
BlockId
]
>
,
src_layout
:
&
PhysicalLayout
,
dst_layout
:
&
PhysicalLayout
,
bounce_layout
:
Option
<&
PhysicalLayout
>
,
)
->
Result
<
(),
BlockValidationError
>
{
// Validate lengths
if
src_block_ids
.len
()
!=
dst_block_ids
.len
()
{
return
Err
(
BlockValidationError
::
LengthMismatch
{
src_len
:
src_block_ids
.len
(),
dst_len
:
dst_block_ids
.len
(),
bounce_len
:
bounce_block_ids
.map
(|
ids
|
ids
.len
()),
});
}
if
let
Some
(
bounce_ids
)
=
bounce_block_ids
&&
bounce_ids
.len
()
!=
src_block_ids
.len
()
{
return
Err
(
BlockValidationError
::
LengthMismatch
{
src_len
:
src_block_ids
.len
(),
dst_len
:
dst_block_ids
.len
(),
bounce_len
:
Some
(
bounce_ids
.len
()),
});
}
#[cfg(debug_assertions)]
{
// Validate destination uniqueness
validate_dst_unique
(
dst_block_ids
)
?
;
// Validate bounce uniqueness if provided
if
let
Some
(
bounce_ids
)
=
bounce_block_ids
{
validate_bounce_unique
(
bounce_ids
)
?
;
}
// Validate disjoint if same layout
validate_disjoint_same_layout
(
src_block_ids
,
dst_block_ids
,
src_layout
,
dst_layout
)
?
;
// Validate block IDs in range
validate_block_ids_in_range
(
src_block_ids
,
src_layout
,
"source"
)
?
;
validate_block_ids_in_range
(
dst_block_ids
,
dst_layout
,
"destination"
)
?
;
if
let
(
Some
(
bounce_ids
),
Some
(
bounce_layout
))
=
(
bounce_block_ids
,
bounce_layout
)
{
validate_block_ids_in_range
(
bounce_ids
,
bounce_layout
,
"bounce"
)
?
;
}
}
Ok
(())
}
/// Minimal validation for block transfer (release mode).
///
/// Only validates:
/// - List lengths match
/// - Destination IDs are unique
#[cfg(not(debug_assertions))]
pub
fn
validate_block_transfer
(
src_block_ids
:
&
[
BlockId
],
dst_block_ids
:
&
[
BlockId
],
bounce_block_ids
:
Option
<&
[
BlockId
]
>
,
_
src_layout
:
&
PhysicalLayout
,
_
dst_layout
:
&
PhysicalLayout
,
_
bounce_layout
:
Option
<&
PhysicalLayout
>
,
)
->
Result
<
(),
BlockValidationError
>
{
// Validate lengths
if
src_block_ids
.len
()
!=
dst_block_ids
.len
()
{
return
Err
(
BlockValidationError
::
LengthMismatch
{
src_len
:
src_block_ids
.len
(),
dst_len
:
dst_block_ids
.len
(),
bounce_len
:
bounce_block_ids
.map
(|
ids
|
ids
.len
()),
});
}
if
let
Some
(
bounce_ids
)
=
bounce_block_ids
{
if
bounce_ids
.len
()
!=
src_block_ids
.len
()
{
return
Err
(
BlockValidationError
::
LengthMismatch
{
src_len
:
src_block_ids
.len
(),
dst_len
:
dst_block_ids
.len
(),
bounce_len
:
Some
(
bounce_ids
.len
()),
});
}
}
// Validate destination uniqueness
validate_dst_unique
(
dst_block_ids
)
?
;
Ok
(())
}
#[cfg(all(test,
feature
=
"testing-kvbm"
))]
mod
tests
{
use
super
::
super
::
tests
::
*
;
use
super
::
*
;
#[test]
fn
test_dst_unique_valid
()
{
let
ids
=
vec!
[
0
,
1
,
2
,
3
,
4
];
assert
!
(
validate_dst_unique
(
&
ids
)
.is_ok
());
}
#[test]
fn
test_dst_unique_duplicate
()
{
let
ids
=
vec!
[
0
,
1
,
2
,
1
,
3
];
let
result
=
validate_dst_unique
(
&
ids
);
assert
!
(
result
.is_err
());
match
result
.unwrap_err
()
{
BlockValidationError
::
DuplicateDestinationBlocks
{
duplicates
}
=>
{
assert_eq!
(
duplicates
,
vec!
[
1
]);
}
_
=>
panic!
(
"Wrong error type"
),
}
}
#[test]
fn
test_dst_unique_multiple_duplicates
()
{
let
ids
=
vec!
[
0
,
1
,
2
,
1
,
3
,
2
];
let
result
=
validate_dst_unique
(
&
ids
);
assert
!
(
result
.is_err
());
match
result
.unwrap_err
()
{
BlockValidationError
::
DuplicateDestinationBlocks
{
duplicates
}
=>
{
assert
!
(
duplicates
.contains
(
&
1
));
assert
!
(
duplicates
.contains
(
&
2
));
}
_
=>
panic!
(
"Wrong error type"
),
}
}
#[test]
#[cfg(debug_assertions)]
fn
test_disjoint_same_layout_valid
()
{
let
physical
=
builder
(
2
)
.fully_contiguous
()
.allocate_system
()
.build
()
.unwrap
();
let
src_ids
=
vec!
[
0
,
1
,
2
];
let
dst_ids
=
vec!
[
5
,
6
,
7
];
assert
!
(
validate_disjoint_same_layout
(
&
src_ids
,
&
dst_ids
,
&
physical
,
&
physical
)
.is_ok
());
}
#[test]
#[cfg(debug_assertions)]
fn
test_disjoint_same_layout_overlap
()
{
let
physical
=
builder
(
2
)
.fully_contiguous
()
.allocate_system
()
.build
()
.unwrap
();
let
src_ids
=
vec!
[
0
,
1
,
2
];
let
dst_ids
=
vec!
[
2
,
3
,
4
];
// 2 overlaps
let
result
=
validate_disjoint_same_layout
(
&
src_ids
,
&
dst_ids
,
&
physical
,
&
physical
);
assert
!
(
result
.is_err
());
match
result
.unwrap_err
()
{
BlockValidationError
::
OverlappingBlocks
{
overlapping
}
=>
{
assert_eq!
(
overlapping
,
vec!
[
2
]);
}
_
=>
panic!
(
"Wrong error type"
),
}
}
#[test]
fn
test_disjoint_different_layouts_ok
()
{
let
physical1
=
builder
(
2
)
.fully_contiguous
()
.allocate_system
()
.build
()
.unwrap
();
let
physical2
=
builder
(
2
)
.fully_contiguous
()
.allocate_system
()
.build
()
.unwrap
();
let
src_ids
=
vec!
[
0
,
1
,
2
];
let
dst_ids
=
vec!
[
0
,
1
,
2
];
// Same IDs but different layouts
// Should be OK since different layouts
#[cfg(debug_assertions)]
assert
!
(
validate_disjoint_same_layout
(
&
src_ids
,
&
dst_ids
,
&
physical1
,
&
physical2
)
.is_ok
());
}
#[test]
fn
test_length_mismatch
()
{
let
physical1
=
builder
(
2
)
.fully_contiguous
()
.allocate_system
()
.build
()
.unwrap
();
let
physical2
=
builder
(
2
)
.fully_contiguous
()
.allocate_system
()
.build
()
.unwrap
();
let
src_ids
=
vec!
[
0
,
1
,
2
];
let
dst_ids
=
vec!
[
5
,
6
];
// Different length
let
result
=
validate_block_transfer
(
&
src_ids
,
&
dst_ids
,
None
,
&
physical1
,
&
physical2
,
None
);
assert
!
(
result
.is_err
());
match
result
.unwrap_err
()
{
BlockValidationError
::
LengthMismatch
{
src_len
,
dst_len
,
bounce_len
,
}
=>
{
assert_eq!
(
src_len
,
3
);
assert_eq!
(
dst_len
,
2
);
assert_eq!
(
bounce_len
,
None
);
}
_
=>
panic!
(
"Wrong error type"
),
}
}
// #[test]
// #[cfg(debug_assertions)]
// fn test_block_out_of_range() {
// let (_layout, physical) = create_test_layout(5); // Only 5 blocks
// let src_ids = vec![0, 1, 2];
// let dst_ids = vec![3, 4, 10]; // 10 is out of range
// let result = validate_block_ids_in_range(&dst_ids, &physical, "destination");
// assert!(result.is_err());
// match result.unwrap_err() {
// BlockValidationError::BlockOutOfRange {
// block_id,
// layout_name,
// max,
// } => {
// assert_eq!(block_id, 10);
// assert_eq!(layout_name, "destination");
// assert_eq!(max, 5);
// }
// _ => panic!("Wrong error type"),
// }
// }
// #[test]
// fn test_bounce_length_mismatch() {
// let (_layout1, physical1) = create_test_layout(10);
// let (_layout2, physical2) = create_test_layout(10);
// let (_layout3, physical3) = create_test_layout(10);
// let src_ids = vec![0, 1, 2];
// let dst_ids = vec![5, 6, 7];
// let bounce_ids = vec![8, 9]; // Wrong length
// let result = validate_block_transfer(
// &src_ids,
// &dst_ids,
// Some(&bounce_ids),
// &physical1,
// &physical2,
// Some(&physical3),
// );
// assert!(result.is_err());
// match result.unwrap_err() {
// BlockValidationError::LengthMismatch {
// src_len,
// dst_len,
// bounce_len,
// } => {
// assert_eq!(src_len, 3);
// assert_eq!(dst_len, 3);
// assert_eq!(bounce_len, Some(2));
// }
// _ => panic!("Wrong error type"),
// }
// }
// #[test]
// fn test_full_validation_success() {
// let (_layout1, physical1) = create_test_layout(10);
// let (_layout2, physical2) = create_test_layout(10);
// let (_layout3, physical3) = create_test_layout(10);
// let src_ids = vec![0, 1, 2];
// let dst_ids = vec![5, 6, 7];
// let bounce_ids = vec![8, 9, 3];
// assert!(
// validate_block_transfer(
// &src_ids,
// &dst_ids,
// Some(&bounce_ids),
// &physical1,
// &physical2,
// Some(&physical3),
// )
// .is_ok()
// );
// }
}
lib/memory/src/nixl.rs
View file @
9ab148dc
...
...
@@ -15,7 +15,8 @@ pub use agent::NixlAgent;
pub
use
config
::
NixlBackendConfig
;
pub
use
nixl_sys
::{
Agent
,
MemType
,
NotificationMap
,
OptArgs
,
RegistrationHandle
,
XferDescList
,
XferOp
,
XferRequest
,
Agent
,
MemType
,
NotificationMap
,
OptArgs
,
RegistrationHandle
,
XferDescList
,
XferOp
,
XferRequest
,
is_stub
,
};
pub
use
serde
::{
Deserialize
,
Serialize
};
...
...
lib/memory/src/nixl/agent.rs
View file @
9ab148dc
...
...
@@ -8,7 +8,7 @@
//! - `NixlBackendConfig`: Configuration for NIXL backends from environment variables
use
anyhow
::
Result
;
use
nixl_sys
::
Agent
;
use
nixl_sys
::
{
Agent
,
is_stub
}
;
use
std
::
collections
::{
HashMap
,
HashSet
};
use
crate
::
nixl
::
NixlBackendConfig
;
...
...
@@ -34,6 +34,9 @@ pub struct NixlAgent {
impl
NixlAgent
{
/// Create a NIXL agent without any backends.
pub
fn
new
(
name
:
&
str
)
->
Result
<
Self
>
{
if
is_stub
()
{
return
Err
(
anyhow
::
anyhow!
(
"NIXL is not supported in stub mode"
));
}
let
agent
=
Agent
::
new
(
name
)
?
;
Ok
(
Self
{
...
...
lib/runtime/examples/Cargo.lock
View file @
9ab148dc
...
...
@@ -412,9 +412,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
[[package]]
name = "chrono"
version = "0.4.4
3
"
version = "0.4.4
4
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118
"
checksum = "
c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0
"
dependencies = [
"iana-time-zone",
"num-traits",
...
...
@@ -682,9 +682,9 @@ dependencies = [
[[package]]
name = "deranged"
version = "0.5.
6
"
version = "0.5.
8
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4
"
checksum = "
7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c
"
dependencies = [
"powerfmt",
"serde_core",
...
...
@@ -1687,9 +1687,9 @@ dependencies = [
[[package]]
name = "js-sys"
version = "0.3.
86
"
version = "0.3.
91
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
d36139f1c97c42c0c86a411910b04e48d4939a0376e6e0f989420cbdee0120e5
"
checksum = "
b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c
"
dependencies = [
"once_cell",
"wasm-bindgen",
...
...
@@ -1911,20 +1911,21 @@ checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
[[package]]
name = "libredox"
version = "0.1.1
2
"
version = "0.1.1
4
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616
"
checksum = "
1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a
"
dependencies = [
"bitflags 2.11.0",
"libc",
"redox_syscall 0.7.1",
"plain",
"redox_syscall 0.7.3",
]
[[package]]
name = "linux-raw-sys"
version = "0.1
1.0
"
version = "0.1
2.1
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039
"
checksum = "
32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53
"
[[package]]
name = "litemap"
...
...
@@ -2411,18 +2412,18 @@ dependencies = [
[[package]]
name = "pin-project"
version = "1.1.1
0
"
version = "1.1.1
1
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a
"
checksum = "
f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517
"
dependencies = [
"pin-project-internal",
]
[[package]]
name = "pin-project-internal"
version = "1.1.1
0
"
version = "1.1.1
1
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861
"
checksum = "
d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6
"
dependencies = [
"proc-macro2",
"quote",
...
...
@@ -2431,9 +2432,9 @@ dependencies = [
[[package]]
name = "pin-project-lite"
version = "0.2.1
6
"
version = "0.2.1
7
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b
"
checksum = "
a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd
"
[[package]]
name = "pin-utils"
...
...
@@ -2457,6 +2458,12 @@ version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "plain"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
[[package]]
name = "portable-atomic"
version = "1.13.1"
...
...
@@ -2631,9 +2638,9 @@ dependencies = [
[[package]]
name = "pulldown-cmark"
version = "0.13.
0
"
version = "0.13.
1
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0
"
checksum = "
83c41efbf8f90ac44de7f3a868f0867851d261b56291732d0cbf7cceaaeb55a6
"
dependencies = [
"bitflags 2.11.0",
"memchr",
...
...
@@ -2809,9 +2816,9 @@ dependencies = [
[[package]]
name = "redox_syscall"
version = "0.7.
1
"
version = "0.7.
3
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
35985aa610addc02e24fc232012c86fd11f14111180f902b67e2d5331f8ebf2b
"
checksum = "
6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16
"
dependencies = [
"bitflags 2.11.0",
]
...
...
@@ -2861,9 +2868,9 @@ dependencies = [
[[package]]
name = "regex-syntax"
version = "0.8.
9
"
version = "0.8.
10
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c
"
checksum = "
dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a
"
[[package]]
name = "reqwest"
...
...
@@ -2957,9 +2964,9 @@ dependencies = [
[[package]]
name = "rustix"
version = "1.1.
3
"
version = "1.1.
4
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34
"
checksum = "
b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190
"
dependencies = [
"bitflags 2.11.0",
"errno",
...
...
@@ -2970,9 +2977,9 @@ dependencies = [
[[package]]
name = "rustls"
version = "0.23.3
6
"
version = "0.23.3
7
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b
"
checksum = "
758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4
"
dependencies = [
"aws-lc-rs",
"log",
...
...
@@ -3483,9 +3490,9 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
[[package]]
name = "tempfile"
version = "3.2
5
.0"
version = "3.2
6
.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1
"
checksum = "
82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0
"
dependencies = [
"fastrand",
"getrandom 0.4.1",
...
...
@@ -4141,9 +4148,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen"
version = "0.2.1
09
"
version = "0.2.1
14
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
9ff9c7baef35ac3c0e17d8bfc9ad75eb62f85a2f02bccc906699dadb0aa9c622
"
checksum = "
6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e
"
dependencies = [
"cfg-if 1.0.4",
"once_cell",
...
...
@@ -4154,9 +4161,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.
59
"
version = "0.4.
64
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
d24699cd39db9966cf6e2ef10d2f72779c961ad905911f395ea201c3ec9f545d
"
checksum = "
e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8
"
dependencies = [
"cfg-if 1.0.4",
"futures-util",
...
...
@@ -4168,9 +4175,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.1
09
"
version = "0.2.1
14
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
39455e84ad887a0bbc93c116d72403f1bb0a39e37dd6f235a43e2128a0c7f1fd
"
checksum = "
18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6
"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
...
...
@@ -4178,9 +4185,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.1
09
"
version = "0.2.1
14
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
dff4761f60b0b51fd13fec8764167b7bbcc34498ce3e52805fe1db6f2d56b6d6
"
checksum = "
03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3
"
dependencies = [
"bumpalo",
"proc-macro2",
...
...
@@ -4191,9 +4198,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.1
09
"
version = "0.2.1
14
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
bc6a171c53d98021a93a474c4a4579d76ba97f9517d871bc12e27640f218b6dd
"
checksum = "
75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16
"
dependencies = [
"unicode-ident",
]
...
...
@@ -4247,9 +4254,9 @@ dependencies = [
[[package]]
name = "web-sys"
version = "0.3.
86
"
version = "0.3.
91
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
668fa5d00434e890a452ab060d24e3904d1be93f7bb01b70e5603baa2b8ab23b
"
checksum = "
854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9
"
dependencies = [
"js-sys",
"wasm-bindgen",
...
...
@@ -4757,18 +4764,18 @@ dependencies = [
[[package]]
name = "zerocopy"
version = "0.8.
39
"
version = "0.8.
40
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a
"
checksum = "
a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5
"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.
39
"
version = "0.8.
40
"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517
"
checksum = "
f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953
"
dependencies = [
"proc-macro2",
"quote",
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment