Unverified Commit 9ab148dc authored by Ryan Olson's avatar Ryan Olson Committed by GitHub
Browse files

feat: kvbm-physical (#6490)


Signed-off-by: default avatarRyan Olson <rolson@nvidia.com>
parent 7546c193
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Transfer completion notification system.
//!
//! This module provides abstractions for waiting on transfer completions using different
//! mechanisms: polling-based (NIXL status, CUDA events) and event-based (NIXL notifications).
use std::collections::HashMap;
use std::sync::Arc;
use std::time::{Duration, Instant};
use anyhow::Result;
use tokio::sync::mpsc;
use tokio::time::interval;
use tracing::{error, warn};
use uuid::Uuid;
use velo_events::{EventHandle, EventManager};
pub mod cuda_event;
pub mod nixl_events;
pub mod nixl_status;
pub mod notification;
pub use cuda_event::CudaEventChecker;
pub use nixl_events::{RegisterNixlNotification, process_nixl_notification_events};
pub use nixl_status::NixlStatusChecker;
pub use notification::TransferCompleteNotification;
/// Trait for checking if a transfer operation has completed.
/// Supports polling-based completion checks (NIXL status, CUDA events).
pub trait CompletionChecker: Send {
/// Returns true if the transfer is complete, false if still pending.
fn is_complete(&self) -> Result<bool>;
}
/// Registration message for polling-based transfer completion.
pub struct RegisterPollingNotification<C: CompletionChecker> {
pub uuid: Uuid,
pub checker: C,
pub event_handle: EventHandle,
}
/// Tracking struct for outstanding polling-based transfers.
struct OutstandingPollingTransfer<C: CompletionChecker> {
checker: C,
event_handle: EventHandle,
arrived_at: Instant,
last_warned_at: Option<Instant>,
}
/// Helper function to check if a transfer should be warned about and log the warning.
/// Returns the new last_warned_at time if a warning was issued.
fn check_and_warn_slow_transfer(
uuid: &Uuid,
arrived_at: Instant,
last_warned_at: Option<Instant>,
) -> Option<Instant> {
let elapsed = arrived_at.elapsed();
if elapsed > Duration::from_secs(60) {
let should_warn = last_warned_at
.map(|last| last.elapsed() > Duration::from_secs(30))
.unwrap_or(true);
if should_warn {
warn!(
uuid = %uuid,
elapsed_secs = elapsed.as_secs(),
"Transfer has been pending for over 1 minute"
);
return Some(Instant::now());
}
}
last_warned_at
}
/// Generic polling-based transfer completion handler.
/// Works with any CompletionChecker implementation (NIXL status, CUDA events, etc.)
pub async fn process_polling_notifications<C: CompletionChecker>(
mut rx: mpsc::Receiver<RegisterPollingNotification<C>>,
system: Arc<EventManager>,
) {
let mut outstanding: HashMap<Uuid, OutstandingPollingTransfer<C>> = HashMap::new();
let mut check_interval = interval(Duration::from_millis(1));
loop {
tokio::select! {
// Handle new transfer requests
notification = rx.recv() => {
match notification {
Some(notif) => {
outstanding.insert(notif.uuid, OutstandingPollingTransfer {
checker: notif.checker,
event_handle: notif.event_handle,
arrived_at: Instant::now(),
last_warned_at: None,
});
}
None => {
// Channel closed, finish processing outstanding transfers then exit
break;
}
}
}
// Periodically check status of outstanding transfers
_ = check_interval.tick(), if !outstanding.is_empty() => {
let mut completed = Vec::new();
for (uuid, transfer) in outstanding.iter_mut() {
// Check transfer status
match transfer.checker.is_complete() {
Ok(true) => {
// Transfer complete - mark for removal
completed.push((*uuid, Ok(())));
}
Ok(false) => {
// Transfer still in progress - check if we should warn
transfer.last_warned_at = check_and_warn_slow_transfer(
uuid,
transfer.arrived_at,
transfer.last_warned_at,
);
}
Err(e) => {
warn!(
uuid = %uuid,
error = %e,
"Transfer status check failed"
);
completed.push((*uuid, Err(e)));
}
}
}
// Remove completed transfers and signal completion
for (uuid, result) in completed {
if let Some(transfer) = outstanding.remove(&uuid) {
// Signal completion via Nova event system
match result {
Ok(()) => {
if let Err(e) = system.trigger(transfer.event_handle) {
error!(
uuid = %uuid,
error = %e,
"Failed to trigger completion event"
);
}
}
Err(e) => {
if let Err(err) = system.poison(transfer.event_handle, e.to_string()) {
error!(
uuid = %uuid,
error = %err,
"Failed to poison completion event"
);
}
}
}
}
}
}
}
}
// Channel closed, but we may still have outstanding transfers
// Continue processing them until all are complete
while !outstanding.is_empty() {
check_interval.tick().await;
let mut completed = Vec::new();
for (uuid, transfer) in outstanding.iter_mut() {
match transfer.checker.is_complete() {
Ok(true) => completed.push((*uuid, Ok(()))),
Ok(false) => {}
Err(e) => completed.push((*uuid, Err(e))),
}
}
for (uuid, result) in completed {
if let Some(transfer) = outstanding.remove(&uuid) {
match result {
Ok(()) => {
let _ = system.trigger(transfer.event_handle);
}
Err(e) => {
let _ = system.poison(transfer.event_handle, e.to_string());
}
}
}
}
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! NIXL notification-based completion handler.
use std::collections::HashMap;
use std::sync::Arc;
use std::time::{Duration, Instant};
use dynamo_memory::nixl::{Agent as NixlAgent, NotificationMap, XferRequest};
use tokio::sync::mpsc;
use tokio::time::interval;
use tracing::{error, warn};
use uuid::Uuid;
use velo_events::{EventHandle, EventManager};
/// Registration message for NIXL notification-based transfer completion.
pub struct RegisterNixlNotification {
pub uuid: Uuid,
pub xfer_req: XferRequest,
pub event_handle: EventHandle,
}
/// Tracking struct for outstanding NIXL notification transfers.
struct OutstandingTransfer {
#[allow(dead_code)] // Kept for potential future cleanup or debugging
xfer_req: XferRequest,
event_handle: EventHandle,
arrived_at: Instant,
last_warned_at: Option<Instant>,
}
/// Helper function to check if a transfer should be warned about and log the warning.
/// Returns the new last_warned_at time if a warning was issued.
fn check_and_warn_slow_transfer(
uuid: &Uuid,
arrived_at: Instant,
last_warned_at: Option<Instant>,
) -> Option<Instant> {
let elapsed = arrived_at.elapsed();
if elapsed > Duration::from_secs(60) {
let should_warn = last_warned_at
.map(|last| last.elapsed() > Duration::from_secs(30))
.unwrap_or(true);
if should_warn {
warn!(
uuid = %uuid,
elapsed_secs = elapsed.as_secs(),
"Transfer has been pending for over 1 minute"
);
return Some(Instant::now());
}
}
last_warned_at
}
/// NIXL notification-based transfer completion handler.
/// Fetches notifications in batches and matches them against outstanding transfers.
pub async fn process_nixl_notification_events(
agent: NixlAgent,
mut rx: mpsc::Receiver<RegisterNixlNotification>,
system: Arc<EventManager>,
) {
let mut outstanding: HashMap<Uuid, OutstandingTransfer> = HashMap::new();
let mut check_interval = interval(Duration::from_millis(1));
loop {
tokio::select! {
// Handle new transfer requests
notification = rx.recv() => {
match notification {
Some(notif) => {
outstanding.insert(notif.uuid, OutstandingTransfer {
xfer_req: notif.xfer_req,
event_handle: notif.event_handle,
arrived_at: Instant::now(),
last_warned_at: None,
});
}
None => {
// Channel closed, finish processing outstanding transfers then exit
break;
}
}
}
// Periodically fetch and process notifications
_ = check_interval.tick(), if !outstanding.is_empty() => {
// Create notification map inside this branch to avoid Send issues
let mut notif_map = match NotificationMap::new() {
Ok(map) => map,
Err(e) => {
warn!(error = %e, "Failed to create notification map");
continue;
}
};
// Fetch all pending notifications
if let Err(e) = agent.get_notifications(&mut notif_map, None) {
warn!(error = %e, "Failed to fetch NIXL notifications");
continue;
}
// Process notifications and match against outstanding transfers
let notifications = match notif_map.take_notifs() {
Ok(notifs) => notifs,
Err(e) => {
warn!(error = %e, "Failed to extract notifications from map");
continue;
}
};
let mut completed = Vec::new();
// Iterate through all notifications
for (_agent_name, notif_strings) in notifications {
for notif_str in notif_strings {
// Try to parse notification as UUID
// NOTE: This assumes notifications contain UUIDs.
// The actual format may be different and may need adjustment.
if let Ok(notif_uuid) = Uuid::parse_str(&notif_str) {
if outstanding.contains_key(&notif_uuid) {
completed.push(notif_uuid);
} else {
// Notification arrived before we started waiting for it
// This is the race condition we need to handle
warn!(
uuid = %notif_uuid,
"Received notification for transfer not in outstanding map (early arrival)"
);
}
}
}
}
// Check for slow transfers and update warnings
for (uuid, transfer) in outstanding.iter_mut() {
if !completed.contains(uuid) {
transfer.last_warned_at = check_and_warn_slow_transfer(
uuid,
transfer.arrived_at,
transfer.last_warned_at,
);
}
}
// Remove completed transfers and signal completion
for uuid in completed {
if let Some(transfer) = outstanding.remove(&uuid)
&& let Err(e) = system.trigger(transfer.event_handle) {
error!(
uuid = %uuid,
error = %e,
"Failed to trigger completion event"
);
}
}
}
}
}
// Channel closed, but we may still have outstanding transfers
// Continue processing them until all are complete
while !outstanding.is_empty() {
check_interval.tick().await;
let mut notif_map = match NotificationMap::new() {
Ok(map) => map,
Err(_) => continue,
};
if let Ok(()) = agent.get_notifications(&mut notif_map, None)
&& let Ok(notifications) = notif_map.take_notifs()
{
let mut completed = Vec::new();
for (_agent_name, notif_strings) in notifications {
for notif_str in notif_strings {
if let Ok(notif_uuid) = Uuid::parse_str(&notif_str)
&& outstanding.contains_key(&notif_uuid)
{
completed.push(notif_uuid);
}
}
}
for uuid in completed {
if let Some(transfer) = outstanding.remove(&uuid) {
let _ = system.trigger(transfer.event_handle);
}
}
}
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! NIXL status polling-based completion checker.
use anyhow::{Result, anyhow};
use dynamo_memory::nixl::{Agent as NixlAgent, XferRequest};
use super::CompletionChecker;
/// Completion checker that polls NIXL transfer status.
pub struct NixlStatusChecker {
agent: NixlAgent,
xfer_req: XferRequest,
}
impl NixlStatusChecker {
pub fn new(agent: NixlAgent, xfer_req: XferRequest) -> Self {
Self { agent, xfer_req }
}
}
impl CompletionChecker for NixlStatusChecker {
fn is_complete(&self) -> Result<bool> {
// get_xfer_status returns XferStatus enum:
// - XferStatus::Success means transfer is complete
// - XferStatus::InProgress means still pending
match self.agent.get_xfer_status(&self.xfer_req) {
Ok(status) => Ok(status.is_success()),
Err(e) => Err(anyhow!("NIXL transfer status check failed: {}", e)),
}
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Transfer completion notification handle.
use anyhow::Result;
use futures::future::{Either, Ready, ready};
use std::{
pin::Pin,
sync::Arc,
task::{Context, Poll},
};
use velo_events::{Event, EventAwaiter, EventManager};
pub enum TransferAwaiter {
Local(EventAwaiter),
// Sync(SyncResult),
}
impl std::future::Future for TransferAwaiter {
type Output = Result<()>;
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
match self.get_mut() {
Self::Local(waiter) => Pin::new(waiter).poll(cx),
// Self::Sync(sync) => Pin::new(sync).poll(cx),
}
}
}
/// Notification handle for an in-progress transfer.
///
/// This object can be awaited to block until the transfer completes.
/// The transfer is tracked by a background handler that polls for completion
/// or processes notification events.
///
/// Uses `futures::Either` to avoid event system overhead for synchronous completions.
/// Pending transfers use `LocalEventWaiter` which avoids heap allocation and repeated
/// DashMap lookups when awaiting.
pub struct TransferCompleteNotification {
awaiter: Either<Ready<Result<()>>, TransferAwaiter>,
}
impl TransferCompleteNotification {
/// Create a notification that is already completed (for synchronous transfers).
///
/// This is useful for transfers that complete immediately without needing
/// background polling, such as memcpy operations.
///
/// This is extremely efficient - no allocations, locks, or event system overhead.
pub fn completed() -> Self {
Self {
awaiter: Either::Left(ready(Ok(()))),
}
}
/// Create a notification from a `LocalEventWaiter`.
///
/// This is the primary way to construct a notification when you already
/// have an event waiter from the event system.
pub fn from_awaiter(awaiter: EventAwaiter) -> Self {
Self {
awaiter: Either::Right(TransferAwaiter::Local(awaiter)),
}
}
// /// Create a notification from a synchronous active message result.
// pub fn from_sync_result(sync: SyncResult) -> Self {
// Self {
// awaiter: Either::Right(TransferAwaiter::Sync(sync)),
// }
// }
/// Check if the notification can yield the current task.
///
/// The internal ::Left arm is guaranteed to be ready, while the ::Right arm is not.
pub fn could_yield(&self) -> bool {
matches!(self.awaiter, Either::Right(_))
}
/// Aggregate multiple notifications into one that completes when all are done.
///
/// This is useful when a transfer is split across multiple workers and you want
/// to wait for all of them to complete.
///
/// # Arguments
/// * `notifications` - The notifications to aggregate
/// * `events` - The event system to create the aggregate event
/// * `runtime` - The tokio runtime handle to spawn the aggregation task
///
/// # Behavior
/// - If the list is empty, returns an already-completed notification
/// - If there's only one, returns it directly
/// - Otherwise, creates a new event and spawns a task to await all notifications
pub fn aggregate(
notifications: Vec<Self>,
events: &Arc<EventManager>,
runtime: &tokio::runtime::Handle,
) -> Result<Self> {
if notifications.is_empty() {
return Ok(Self::completed());
}
if notifications.len() == 1 {
return Ok(notifications.into_iter().next().unwrap());
}
// Check if all notifications are already complete (no yielding needed)
if notifications.iter().all(|n| !n.could_yield()) {
return Ok(Self::completed());
}
// Create a new event for the aggregate completion
let event = events.new_event()?;
let awaiter = events.awaiter(event.handle())?;
// Spawn task that awaits all notifications and triggers/poisons the event
runtime.spawn(await_all_notifications(notifications, event));
Ok(Self::from_awaiter(awaiter))
}
}
/// Awaits all transfer notifications and signals completion via the event.
///
/// This function awaits ALL notifications regardless of individual failures,
/// then triggers the event on success or poisons it with error details on failure.
async fn await_all_notifications(
notifications: Vec<TransferCompleteNotification>,
local_event: Event,
) {
// Await all notifications, collecting results
let results: Vec<Result<()>> =
futures::future::join_all(notifications.into_iter().map(|n| n.into_future())).await;
// Check for any failures
let errors: Vec<_> = results.into_iter().filter_map(|r| r.err()).collect();
if errors.is_empty() {
// Ignore trigger error - if event system is shutdown, nothing to do
let _ = local_event.trigger();
} else {
let error_msg = errors
.iter()
.map(|e| e.to_string())
.collect::<Vec<_>>()
.join("; ");
// Ignore poison error - if event system is shutdown, nothing to do
let _ = local_event.poison(error_msg);
}
}
impl std::future::IntoFuture for TransferCompleteNotification {
type Output = Result<()>;
type IntoFuture = Either<Ready<Result<()>>, TransferAwaiter>;
fn into_future(self) -> Self::IntoFuture {
self.awaiter
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Transfer options for configuring block and layer transfers.
use super::BounceBuffer;
use crate::layout::KvBlockLayout;
use cudarc::driver::CudaStream;
use derive_builder::Builder;
use derive_getters::Dissolve;
use std::ops::Range;
use std::sync::Arc;
/// Options for configuring transfer operations.
///
/// This structure provides configuration for block and layer transfers,
/// including layer ranges, NIXL write notifications, and bounce buffers.
///
/// # Examples
///
/// ```rust,ignore
/// let options = TransferOptions::builder()
/// .nixl_write_notification(42)
/// .layer_range(0..10)
/// .build();
/// ```
#[derive(Clone, Default, Builder, Dissolve)]
#[builder(pattern = "owned", default)]
pub struct TransferOptions {
/// Range of layers to transfer (None = all layers).
///
/// When specified, only the layers in this range will be transferred.
/// This is useful for partial block transfers or layer-specific operations.
#[builder(default, setter(strip_option))]
pub layer_range: Option<Range<usize>>,
/// NIXL write notification value delivered after RDMA write completes.
///
/// When specified, NIXL will deliver this notification value to the remote
/// node after the RDMA write operation completes. This enables efficient
/// notification of transfer completion without requiring polling.
#[builder(default, setter(strip_option))]
pub nixl_write_notification: Option<u64>,
/// Bounce buffer specification for multi-hop transfers.
///
/// When direct transfers are not allowed or efficient, this specifies
/// an intermediate staging area. The transfer will be split into two hops:
/// source → bounce buffer → destination.
#[builder(default, setter(strip_option, into))]
pub bounce_buffer: Option<BounceBuffer>,
/// Optional caller-provided CUDA stream for the transfer.
///
/// When provided, the transfer executor will use this stream instead of
/// acquiring one from the pool. The caller is responsible for synchronization -
/// no event is recorded by the executor.
///
/// This is useful for layer-wise transfers where all layers must execute
/// on the same stream to allow proper event sequencing.
#[builder(default, setter(strip_option))]
pub cuda_stream: Option<Arc<CudaStream>>,
/// Override source block layout interpretation.
///
/// When set, the transfer executor will treat source blocks as having
/// this layout instead of the layout's default block_layout().
/// This enables transferring blocks that are stored in one format
/// but should be interpreted as another (e.g., operational → universal).
#[builder(default, setter(strip_option))]
pub src_kv_layout: Option<KvBlockLayout>,
/// Override destination block layout interpretation.
///
/// When set, the transfer executor will treat destination blocks as having
/// this layout instead of the layout's default block_layout().
/// This enables writing blocks in a different format than the destination
/// layout's native format.
#[builder(default, setter(strip_option))]
pub dst_kv_layout: Option<KvBlockLayout>,
}
impl TransferOptions {
/// Create a new builder for transfer options.
pub fn builder() -> TransferOptionsBuilder {
TransferOptionsBuilder::default()
}
/// Create transfer options from an optional layer range.
pub fn from_layer_range(layer_range: Option<Range<usize>>) -> Self {
Self {
layer_range,
..Self::default()
}
}
/// Create default transfer options.
///
/// This transfers all layers with no special configuration.
pub fn new() -> Self {
Self::default()
}
}
#[cfg(all(test, feature = "testing-kvbm"))]
mod tests {
use super::*;
#[test]
fn test_default_options() {
let options = TransferOptions::default();
assert!(options.layer_range.is_none());
assert!(options.nixl_write_notification.is_none());
assert!(options.bounce_buffer.is_none());
}
#[test]
fn test_builder_with_notification() {
let options = TransferOptions::builder()
.nixl_write_notification(42)
.build()
.unwrap();
assert_eq!(options.nixl_write_notification, Some(42));
assert!(options.layer_range.is_none());
}
#[test]
fn test_builder_with_layer_range() {
let options = TransferOptions::builder()
.layer_range(0..10)
.build()
.unwrap();
assert_eq!(options.layer_range, Some(0..10));
assert!(options.nixl_write_notification.is_none());
}
#[test]
fn test_builder_with_all_options() {
let options = TransferOptions::builder()
.nixl_write_notification(100)
.layer_range(5..15)
.build()
.unwrap();
assert_eq!(options.nixl_write_notification, Some(100));
assert_eq!(options.layer_range, Some(5..15));
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
#![allow(dead_code)]
//! Transfer preferences for resolving redundant strategy choices.
//!
//! Some source/destination combinations can use multiple transfer strategies.
//! For example:
//! - System ↔ Pinned: memcpy or NIXL
//! - Pinned ↔ Device: CUDA or NIXL
//!
//! This module provides preferences to control which strategy to prefer.
use serde::{Deserialize, Serialize};
/// Policy for choosing between native transports (memcpy/CUDA) and NIXL.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
pub enum NativeVsNixlPolicy {
/// Always prefer native transports (memcpy/CUDA) when available
PreferNative,
/// Always prefer NIXL when available
PreferNixl,
/// Use native for local-to-local, NIXL for remote/disk
#[default]
Automatic,
}
/// Transfer preferences for strategy selection.
///
/// These preferences allow fine-grained control over transfer strategy selection
/// when multiple valid strategies exist for a source/destination pair.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TransferPreferences {
/// Policy for native vs NIXL transport selection
pub native_vs_nixl: NativeVsNixlPolicy,
/// Whether to prefer async CUDA operations over blocking ones
pub prefer_async_cuda: bool,
}
impl Default for TransferPreferences {
fn default() -> Self {
Self {
native_vs_nixl: NativeVsNixlPolicy::default(),
prefer_async_cuda: true,
}
}
}
impl TransferPreferences {
/// Create preferences with all defaults.
pub fn new() -> Self {
Self::default()
}
/// Create preferences that always prefer native transports.
pub fn prefer_native() -> Self {
Self {
native_vs_nixl: NativeVsNixlPolicy::PreferNative,
prefer_async_cuda: true,
}
}
/// Create preferences that always prefer NIXL.
pub fn prefer_nixl() -> Self {
Self {
native_vs_nixl: NativeVsNixlPolicy::PreferNixl,
prefer_async_cuda: true,
}
}
/// Set the native vs NIXL policy.
pub fn with_native_vs_nixl(mut self, policy: NativeVsNixlPolicy) -> Self {
self.native_vs_nixl = policy;
self
}
/// Set whether to prefer async CUDA operations.
pub fn with_async_cuda(mut self, prefer_async: bool) -> Self {
self.prefer_async_cuda = prefer_async;
self
}
}
#[cfg(all(test, feature = "testing-kvbm"))]
mod tests {
use super::*;
#[test]
fn test_default_preferences() {
let prefs = TransferPreferences::default();
assert_eq!(prefs.native_vs_nixl, NativeVsNixlPolicy::Automatic);
assert!(prefs.prefer_async_cuda);
}
#[test]
fn test_prefer_native() {
let prefs = TransferPreferences::prefer_native();
assert_eq!(prefs.native_vs_nixl, NativeVsNixlPolicy::PreferNative);
assert!(prefs.prefer_async_cuda);
}
#[test]
fn test_prefer_nixl() {
let prefs = TransferPreferences::prefer_nixl();
assert_eq!(prefs.native_vs_nixl, NativeVsNixlPolicy::PreferNixl);
assert!(prefs.prefer_async_cuda);
}
#[test]
fn test_builder_pattern() {
let prefs = TransferPreferences::new()
.with_native_vs_nixl(NativeVsNixlPolicy::PreferNixl)
.with_async_cuda(false);
assert_eq!(prefs.native_vs_nixl, NativeVsNixlPolicy::PreferNixl);
assert!(!prefs.prefer_async_cuda);
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Transfer strategy selection based on source and destination storage locations.
use dynamo_memory::StorageKind;
use crate::{layout::PhysicalLayout, transfer::TransferContext};
use super::TransferCapabilities;
/// Transfer strategy to use for copying memory between locations.
///
/// The strategy is determined by the source and destination storage locations.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TransferStrategy {
/// CPU memcpy (for host-to-host transfers)
Memcpy,
/// CUDA async host-to-device transfer
CudaAsyncH2D,
/// CUDA async device-to-host transfer
CudaAsyncD2H,
/// CUDA async device-to-device transfer
CudaAsyncD2D,
/// NIXL read operation (pull from remote)
NixlRead,
/// NIXL write operation (push to remote)
NixlWrite,
/// NIXL write (flipped local and remote order)
/// This is needed for some NIXL backends.
/// For example, the POSIX backend requires that host memory
/// always be the "local" descriptor list, regardless of whether
/// it's a read or write.
#[expect(dead_code)]
NixlWriteFlipped,
/// NIXL read (flipped local and remote order)
NixlReadFlipped,
/// Invalid/unsupported transfer
#[expect(dead_code)]
Invalid,
}
/// Plan for executing a transfer, either direct or via bounce buffer.
///
/// Some transfers require staging through host memory when direct paths
/// are not enabled via capabilities.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TransferPlan {
/// Direct single-hop transfer using the specified strategy.
Direct(TransferStrategy),
/// Two-hop transfer requiring a bounce buffer in host memory.
///
/// This is used when:
/// - Device → Remote (without GPU RDMA)
/// - Disk → Remote
/// - Device ↔ Disk (without GDS)
TwoHop {
/// First hop strategy (src → bounce)
first: TransferStrategy,
/// Bounce buffer location (always Pinned for best performance)
bounce_location: StorageKind,
/// Second hop strategy (bounce → dst)
second: TransferStrategy,
},
}
pub(crate) fn select_strategy(
src: &PhysicalLayout,
dst: &PhysicalLayout,
ctx: &TransferContext,
) -> anyhow::Result<TransferPlan> {
let is_src_local = src.nixl_metadata().agent_name() == ctx.nixl_agent().name();
let is_dst_local = dst.nixl_metadata().agent_name() == ctx.nixl_agent().name();
if !is_src_local && !is_dst_local {
return Err(anyhow::anyhow!(
"Both src and dst are remote - this is not supported."
));
}
if is_src_local && is_dst_local {
return Ok(select_direct_strategy(
src.location(),
dst.location(),
false,
ctx.capabilities(),
));
}
select_remote_strategy_v2(
src.location(),
is_src_local,
dst.location(),
is_dst_local,
ctx.capabilities(),
)
}
/// Select the appropriate transfer plan based on source and destination locations.
///
/// # Arguments
/// * `src` - Source storage location (always local)
/// * `dst` - Destination storage location (can be local or remote)
/// * `dst_is_remote` - Whether destination is on a remote node
/// * `capabilities` - Transfer capability flags
///
/// # Returns
/// A transfer plan (direct or two-hop)
///
/// # Conservative Default Policy
///
/// With default capabilities (all disabled):
/// - Device can only transfer to/from Host
/// - Disk can only transfer to/from Host
/// - Host can transfer to Device, Disk, or Remote
/// - Device ↔ Device is allowed (native CUDA)
///
/// Transfers that would violate this policy are staged through host:
/// - Device → Remote: Device → Host → Remote (2 hops)
/// - Disk → Remote: Disk → Host → Remote (2 hops)
/// - Device ↔ Disk: Device → Host → Disk (2 hops)
///
/// # Optional Direct Paths
///
/// - `allow_gds`: Enables Disk ↔ Device direct transfers
/// - `allow_gpu_rdma`: Enables Device → Remote direct transfers
fn select_direct_strategy(
src: StorageKind,
dst: StorageKind,
dst_is_remote: bool,
capabilities: &TransferCapabilities,
) -> TransferPlan {
use StorageKind::*;
use TransferStrategy::*;
// Handle remote destination
if dst_is_remote {
return select_remote_strategy(src, capabilities);
}
// Local-to-local transfers
match (src, dst) {
// Host ↔ Host - direct memcpy
(System, System) | (System, Pinned) | (Pinned, System) | (Pinned, Pinned) => {
TransferPlan::Direct(Memcpy)
}
// Host → Device - direct CUDA
(System, Device(_)) => panic!("System to Device transfers are not supported"),
(Pinned, Device(_)) => TransferPlan::Direct(CudaAsyncH2D),
// Device → Host - direct CUDA
(Device(_), System) => panic!("Device to System transfers are not supported"),
(Device(_), Pinned) => TransferPlan::Direct(CudaAsyncD2H),
// Device ↔ Device - direct CUDA
(Device(_), Device(_)) => TransferPlan::Direct(CudaAsyncD2D),
// Host ↔ Disk - direct NIXL
(System, Disk(_)) | (Pinned, Disk(_)) => TransferPlan::Direct(NixlWrite),
(Disk(_), System) | (Disk(_), Pinned) => TransferPlan::Direct(NixlReadFlipped),
// Disk ↔ Disk - NIXL doesn't seem to support direct transfers here.
// Leaving this as two-hop for now.
(Disk(_), Disk(_)) => TransferPlan::TwoHop {
first: NixlReadFlipped,
bounce_location: Pinned,
second: NixlWrite,
},
// Device ↔ Disk - check GDS capability
(Device(_), Disk(_)) => {
if capabilities.allows_device_disk_direct() {
// Direct GDS transfer
TransferPlan::Direct(NixlWrite)
} else {
// Stage through host: Device → Pinned → Disk
TransferPlan::TwoHop {
first: CudaAsyncD2H,
bounce_location: Pinned,
second: NixlWrite,
}
}
}
(Disk(_), Device(_)) => {
if capabilities.allows_device_disk_direct() {
// Direct GDS transfer
TransferPlan::Direct(NixlRead)
} else {
// Stage through host: Disk → Pinned → Device
TransferPlan::TwoHop {
first: NixlReadFlipped,
bounce_location: Pinned,
second: CudaAsyncH2D,
}
}
}
}
}
/// Select transfer strategy for remote destination.
fn select_remote_strategy(src: StorageKind, capabilities: &TransferCapabilities) -> TransferPlan {
use StorageKind::*;
use TransferStrategy::*;
match src {
// Host → Remote - direct NIXL
System | Pinned => TransferPlan::Direct(NixlWrite),
// Device → Remote - check GPU RDMA capability
Device(_) => {
if capabilities.allows_device_remote_direct() {
// Direct GPU RDMA transfer
TransferPlan::Direct(NixlWrite)
} else {
// Stage through host: Device → Pinned → Remote
TransferPlan::TwoHop {
first: CudaAsyncD2H,
bounce_location: Pinned,
second: NixlWrite,
}
}
}
// Disk → Remote - always stage through host
Disk(_) => TransferPlan::TwoHop {
first: NixlWrite,
bounce_location: Pinned,
second: NixlWrite,
},
}
}
fn select_remote_strategy_v2(
src: StorageKind,
is_src_local: bool,
dst: StorageKind,
is_dst_local: bool,
capabilities: &TransferCapabilities,
) -> anyhow::Result<TransferPlan> {
// We only support System, Pinned and Device for remote transfers.
// Later we might support staged/bounce buffer transfers.
if matches!(src, StorageKind::Disk(_)) || matches!(dst, StorageKind::Disk(_)) {
return Err(anyhow::anyhow!(
"Neither local nor remote disk transfers are supported over NIXL at this time."
));
}
if !capabilities.allow_gpu_rdma
&& (matches!(src, StorageKind::Device(_)) || matches!(dst, StorageKind::Device(_)))
{
return Err(anyhow::anyhow!(
"GPU RDMA is disabled - this transfer requires GPU RDMA."
));
}
if is_src_local && !is_dst_local {
return Ok(TransferPlan::Direct(TransferStrategy::NixlWrite));
}
if is_dst_local && !is_src_local {
return Ok(TransferPlan::Direct(TransferStrategy::NixlReadFlipped));
}
unreachable!("Both src and dst are remote - this is not supported.");
}
#[cfg(all(test, feature = "testing-kvbm"))]
mod tests {
use super::*;
fn default_caps() -> TransferCapabilities {
TransferCapabilities::default()
}
#[test]
fn test_host_to_host_transfers() {
let caps = default_caps();
assert_eq!(
select_direct_strategy(StorageKind::System, StorageKind::System, false, &caps),
TransferPlan::Direct(TransferStrategy::Memcpy)
);
assert_eq!(
select_direct_strategy(StorageKind::System, StorageKind::Pinned, false, &caps),
TransferPlan::Direct(TransferStrategy::Memcpy)
);
assert_eq!(
select_direct_strategy(StorageKind::Pinned, StorageKind::System, false, &caps),
TransferPlan::Direct(TransferStrategy::Memcpy)
);
assert_eq!(
select_direct_strategy(StorageKind::Pinned, StorageKind::Pinned, false, &caps),
TransferPlan::Direct(TransferStrategy::Memcpy)
);
}
#[test]
fn test_host_to_device_transfers() {
let caps = default_caps();
// // System (unpinned) to device should be blocking
// assert_eq!(
// select_direct_strategy(StorageKind::System, StorageKind::Device(0), false, &caps),
// TransferPlan::Direct(TransferStrategy::CudaBlockingH2D)
// );
// Pinned to device should be async
assert_eq!(
select_direct_strategy(StorageKind::Pinned, StorageKind::Device(0), false, &caps),
TransferPlan::Direct(TransferStrategy::CudaAsyncH2D)
);
}
#[test]
fn test_device_to_host_transfers() {
let caps = default_caps();
// // Device to system should be blocking
// assert_eq!(
// select_direct_strategy(StorageKind::Device(0), StorageKind::System, false, &caps),
// TransferPlan::Direct(TransferStrategy::CudaBlockingD2H)
// );
// Device to pinned should be async
assert_eq!(
select_direct_strategy(StorageKind::Device(0), StorageKind::Pinned, false, &caps),
TransferPlan::Direct(TransferStrategy::CudaAsyncD2H)
);
}
#[test]
fn test_device_to_device_transfers() {
let caps = default_caps();
assert_eq!(
select_direct_strategy(StorageKind::Device(0), StorageKind::Device(1), false, &caps),
TransferPlan::Direct(TransferStrategy::CudaAsyncD2D)
);
assert_eq!(
select_direct_strategy(StorageKind::Device(3), StorageKind::Device(3), false, &caps),
TransferPlan::Direct(TransferStrategy::CudaAsyncD2D)
);
}
#[test]
fn test_disk_to_host_transfers() {
let caps = default_caps();
// Disk to host - direct NIXL
assert_eq!(
select_direct_strategy(StorageKind::Disk(42), StorageKind::System, false, &caps),
TransferPlan::Direct(TransferStrategy::NixlReadFlipped)
);
assert_eq!(
select_direct_strategy(StorageKind::Disk(42), StorageKind::Pinned, false, &caps),
TransferPlan::Direct(TransferStrategy::NixlReadFlipped)
);
}
#[test]
fn test_host_to_disk_transfers() {
let caps = default_caps();
// Host to disk - direct NIXL
assert_eq!(
select_direct_strategy(StorageKind::System, StorageKind::Disk(42), false, &caps),
TransferPlan::Direct(TransferStrategy::NixlWrite)
);
assert_eq!(
select_direct_strategy(StorageKind::Pinned, StorageKind::Disk(42), false, &caps),
TransferPlan::Direct(TransferStrategy::NixlWrite)
);
}
#[test]
fn test_device_to_disk_without_gds() {
let caps = default_caps(); // GDS disabled
// Device → Disk should use bounce buffer
let plan =
select_direct_strategy(StorageKind::Device(0), StorageKind::Disk(42), false, &caps);
match plan {
TransferPlan::TwoHop {
first,
bounce_location,
second,
} => {
assert_eq!(first, TransferStrategy::CudaAsyncD2H);
assert_eq!(bounce_location, StorageKind::Pinned);
assert_eq!(second, TransferStrategy::NixlWrite);
}
_ => panic!("Expected TwoHop plan"),
}
}
#[test]
fn test_disk_to_device_without_gds() {
let caps = default_caps(); // GDS disabled
// Disk → Device should use bounce buffer
let plan =
select_direct_strategy(StorageKind::Disk(42), StorageKind::Device(0), false, &caps);
match plan {
TransferPlan::TwoHop {
first,
bounce_location,
second,
} => {
assert_eq!(first, TransferStrategy::NixlReadFlipped);
assert_eq!(bounce_location, StorageKind::Pinned);
assert_eq!(second, TransferStrategy::CudaAsyncH2D);
}
_ => panic!("Expected TwoHop plan"),
}
}
#[test]
fn test_device_to_disk_with_gds() {
let caps = TransferCapabilities::default().with_gds(true);
// Device → Disk should be direct with GDS
assert_eq!(
select_direct_strategy(StorageKind::Device(0), StorageKind::Disk(42), false, &caps),
TransferPlan::Direct(TransferStrategy::NixlWrite)
);
}
#[test]
fn test_disk_to_device_with_gds() {
let caps = TransferCapabilities::default().with_gds(true);
// Disk → Device should be direct with GDS
assert_eq!(
select_direct_strategy(StorageKind::Disk(42), StorageKind::Device(0), false, &caps),
TransferPlan::Direct(TransferStrategy::NixlRead)
);
}
#[test]
fn test_host_to_remote() {
let caps = default_caps();
// Host → Remote - always direct
assert_eq!(
select_direct_strategy(StorageKind::System, StorageKind::System, true, &caps),
TransferPlan::Direct(TransferStrategy::NixlWrite)
);
assert_eq!(
select_direct_strategy(StorageKind::Pinned, StorageKind::Pinned, true, &caps),
TransferPlan::Direct(TransferStrategy::NixlWrite)
);
}
#[test]
fn test_device_to_remote_without_rdma() {
let caps = default_caps(); // GPU RDMA disabled
// Device → Remote should use bounce buffer
let plan = select_direct_strategy(StorageKind::Device(0), StorageKind::System, true, &caps);
match plan {
TransferPlan::TwoHop {
first,
bounce_location,
second,
} => {
assert_eq!(first, TransferStrategy::CudaAsyncD2H);
assert_eq!(bounce_location, StorageKind::Pinned);
assert_eq!(second, TransferStrategy::NixlWrite);
}
_ => panic!("Expected TwoHop plan"),
}
}
#[test]
fn test_device_to_remote_with_rdma() {
let caps = TransferCapabilities::default().with_gpu_rdma(true);
// Device → Remote should be direct with GPU RDMA
assert_eq!(
select_direct_strategy(StorageKind::Device(0), StorageKind::Device(0), true, &caps),
TransferPlan::Direct(TransferStrategy::NixlWrite)
);
}
#[test]
fn test_disk_to_remote() {
let caps = default_caps();
// Disk → Remote always uses bounce buffer
let plan = select_direct_strategy(StorageKind::Disk(42), StorageKind::System, true, &caps);
match plan {
TransferPlan::TwoHop {
first,
bounce_location,
second,
} => {
assert_eq!(first, TransferStrategy::NixlWrite);
assert_eq!(bounce_location, StorageKind::Pinned);
assert_eq!(second, TransferStrategy::NixlWrite);
}
_ => panic!("Expected TwoHop plan"),
}
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Round-trip testing infrastructure for transfer verification.
//!
//! This module provides utilities for testing data integrity across transfers
//! by comparing checksums after round-trip operations:
//! 1. Source blocks (host) → Intermediate (device/disk/remote)
//! 2. Intermediate → Destination blocks (host, different IDs)
//! 3. Verify checksums match between source and destination
use super::context::TransferContext;
use super::{
BlockChecksum, FillPattern, PhysicalLayout, StorageKind, compute_block_checksums, fill_blocks,
transfer_blocks,
};
use anyhow::{Result, anyhow};
use std::collections::HashMap;
/// Result of a round-trip test.
#[derive(Debug)]
pub struct RoundTripTestResult {
/// Source block checksums (keyed by source block ID)
pub source_checksums: HashMap<usize, BlockChecksum>,
/// Destination block checksums (keyed by destination block ID)
pub dest_checksums: HashMap<usize, BlockChecksum>,
/// Block ID mapping used (src_id, dst_id)
pub block_mapping: Vec<(usize, usize)>,
/// Whether all checksums matched
pub success: bool,
/// Mismatched blocks (if any)
pub mismatches: Vec<(usize, usize)>, // (src_id, dst_id) pairs that didn't match
}
impl RoundTripTestResult {
/// Check if the round-trip test passed.
pub fn is_success(&self) -> bool {
self.success
}
/// Get the number of blocks tested.
pub fn num_blocks(&self) -> usize {
self.block_mapping.len()
}
/// Get a detailed report of the test results.
pub fn report(&self) -> String {
if self.success {
format!(
"Round-trip test PASSED: {}/{} blocks verified successfully",
self.num_blocks(),
self.num_blocks()
)
} else {
format!(
"Round-trip test FAILED: {}/{} blocks mismatched\nMismatches: {:?}",
self.mismatches.len(),
self.num_blocks(),
self.mismatches
)
}
}
}
/// Builder for round-trip tests.
///
/// This allows configuring a test that transfers data from source blocks
/// to intermediate storage and back to different destination blocks,
/// verifying data integrity via checksums.
pub struct RoundTripTest {
/// Source physical layout (must be local)
source: PhysicalLayout,
/// Intermediate physical layout (can be remote/device/disk)
intermediate: PhysicalLayout,
/// Destination physical layout (must be local)
destination: PhysicalLayout,
/// Block mapping: (src_id, intermediate_id, dst_id)
block_mapping: Vec<(usize, usize, usize)>,
/// Fill pattern for source blocks
fill_pattern: FillPattern,
}
impl RoundTripTest {
/// Create a new round-trip test.
///
/// # Arguments
/// * `source` - Source physical layout (must be local)
/// * `intermediate` - Intermediate physical layout
/// * `destination` - Destination physical layout (must be local)
pub fn new(
source: PhysicalLayout,
intermediate: PhysicalLayout,
destination: PhysicalLayout,
) -> Result<Self> {
if source.is_remote() {
return Err(anyhow!("Source layout must be local"));
}
if destination.is_remote() {
return Err(anyhow!("Destination layout must be local"));
}
Ok(Self {
source,
intermediate,
destination,
block_mapping: Vec::new(),
fill_pattern: FillPattern::Sequential,
})
}
/// Set the fill pattern for source blocks.
pub fn with_fill_pattern(mut self, pattern: FillPattern) -> Self {
self.fill_pattern = pattern;
self
}
/// Add a block mapping for the round-trip test.
///
/// # Arguments
/// * `src_id` - Source block ID
/// * `intermediate_id` - Intermediate block ID
/// * `dst_id` - Destination block ID
pub fn add_block_mapping(
mut self,
src_id: usize,
intermediate_id: usize,
dst_id: usize,
) -> Self {
self.block_mapping.push((src_id, intermediate_id, dst_id));
self
}
/// Add multiple block mappings at once.
///
/// This is a convenience method for adding several mappings.
pub fn with_block_mappings(mut self, mappings: &[(usize, usize, usize)]) -> Self {
self.block_mapping.extend_from_slice(mappings);
self
}
/// Run the round-trip test.
///
/// # Workflow
/// 1. Fill source blocks with the specified pattern
/// 2. Compute source checksums
/// 3. Transfer source → intermediate
/// 4. Transfer intermediate → destination
/// 5. Compute destination checksums
/// 6. Compare checksums
///
/// # Arguments
/// * `ctx` - Transfer context with CUDA stream and NIXL agent
pub async fn run(self, ctx: &TransferContext) -> Result<RoundTripTestResult> {
if self.block_mapping.is_empty() {
return Err(anyhow!("No block mappings specified"));
}
// Step 1: Fill source blocks
let src_ids: Vec<usize> = self.block_mapping.iter().map(|(src, _, _)| *src).collect();
fill_blocks(&self.source, &src_ids, self.fill_pattern)?;
// Step 2: Compute source checksums
let source_checksums = compute_block_checksums(&self.source, &src_ids)?;
// Step 3: Transfer source → intermediate
let src_ids_intermediate: Vec<usize> =
self.block_mapping.iter().map(|(src, _, _)| *src).collect();
let inter_ids_from_src: Vec<usize> = self
.block_mapping
.iter()
.map(|(_, inter, _)| *inter)
.collect();
let notification = transfer_blocks(
&self.source,
&self.intermediate,
&src_ids_intermediate,
&inter_ids_from_src,
ctx,
)?;
notification.await?;
// Step 4: Transfer intermediate → destination
let inter_ids_to_dst: Vec<usize> = self
.block_mapping
.iter()
.map(|(_, inter, _)| *inter)
.collect();
let dst_ids_from_inter: Vec<usize> =
self.block_mapping.iter().map(|(_, _, dst)| *dst).collect();
let notification = transfer_blocks(
&self.intermediate,
&self.destination,
&inter_ids_to_dst,
&dst_ids_from_inter,
ctx,
)?;
notification.await?;
// Step 5: Compute destination checksums
let dst_ids: Vec<usize> = self.block_mapping.iter().map(|(_, _, dst)| *dst).collect();
let dest_checksums = compute_block_checksums(&self.destination, &dst_ids)?;
// Step 6: Compare checksums
let mut mismatches = Vec::new();
for (src_id, _, dst_id) in &self.block_mapping {
let src_checksum = &source_checksums[src_id];
let dst_checksum = &dest_checksums[dst_id];
if src_checksum != dst_checksum {
mismatches.push((*src_id, *dst_id));
}
}
let success = mismatches.is_empty();
let block_mapping: Vec<(usize, usize)> = self
.block_mapping
.iter()
.map(|(src, _, dst)| (*src, *dst))
.collect();
Ok(RoundTripTestResult {
source_checksums,
dest_checksums,
block_mapping,
success,
mismatches,
})
}
}
#[cfg(all(test, feature = "testing-kvbm"))]
mod tests {
use super::*;
use crate::v2::layout::{
FullyContiguousLayout, Layout, LayoutConfig, MemoryRegion, OwnedMemoryRegion,
};
use std::sync::Arc;
// Helper to create a minimal transfer context for testing
// In real tests with CUDA/NIXL, this would be properly constructed
fn create_test_context() -> TransferContext {
// For now, we'll skip these tests if CUDA is not available
// In the future, we can mock TransferContext or use conditional compilation
todo!("Create test context - requires CUDA/NIXL setup")
}
#[tokio::test]
#[ignore = "Requires CUDA/NIXL setup"]
async fn test_round_trip_host_to_host() {
// Create three layouts: source, intermediate, destination
let (src_layout, _src_mem) = create_test_layout(4);
let (inter_layout, _inter_mem) = create_test_layout(4);
let (dst_layout, _dst_mem) = create_test_layout(4);
let source = PhysicalLayout::new_local(src_layout, StorageKind::System);
let intermediate = PhysicalLayout::new_local(inter_layout, StorageKind::Pinned);
let destination = PhysicalLayout::new_local(dst_layout, StorageKind::System);
// Build round-trip test with different block IDs
// Source: blocks [0, 1, 2, 3]
// Intermediate: blocks [0, 1, 2, 3]
// Destination: blocks [0, 1, 2, 3] (different memory than source)
let test = RoundTripTest::new(source, intermediate, destination)
.unwrap()
.with_fill_pattern(FillPattern::Sequential)
.add_block_mapping(0, 0, 0)
.add_block_mapping(1, 1, 1)
.add_block_mapping(2, 2, 2)
.add_block_mapping(3, 3, 3);
// Create a transfer context (requires actual CUDA/NIXL setup)
let ctx = create_test_context();
// Run the test
let result = test.run(&ctx).await.unwrap();
assert!(result.is_success(), "{}", result.report());
assert_eq!(result.num_blocks(), 4);
}
#[tokio::test]
#[ignore = "Requires CUDA/NIXL setup"]
async fn test_round_trip_different_block_ids() {
// Create layouts with enough blocks
let (src_layout, _src_mem) = create_test_layout(8);
let (inter_layout, _inter_mem) = create_test_layout(8);
let (dst_layout, _dst_mem) = create_test_layout(8);
let source = PhysicalLayout::new_local(src_layout, StorageKind::System);
let intermediate = PhysicalLayout::new_local(inter_layout, StorageKind::Pinned);
let destination = PhysicalLayout::new_local(dst_layout, StorageKind::System);
// Test with non-overlapping block IDs
// Source: blocks [0, 1, 2, 3]
// Intermediate: blocks [2, 3, 4, 5]
// Destination: blocks [4, 5, 6, 7]
let test = RoundTripTest::new(source, intermediate, destination)
.unwrap()
.with_fill_pattern(FillPattern::BlockBased)
.with_block_mappings(&[(0, 2, 4), (1, 3, 5), (2, 4, 6), (3, 5, 7)]);
let ctx = create_test_context();
let result = test.run(&ctx).await.unwrap();
assert!(result.is_success(), "{}", result.report());
assert_eq!(result.num_blocks(), 4);
}
#[test]
fn test_round_trip_builder() {
let (src_layout, _) = create_test_layout(4);
let (inter_layout, _) = create_test_layout(4);
let (dst_layout, _) = create_test_layout(4);
let source = PhysicalLayout::new_local(src_layout, StorageKind::System);
let intermediate = PhysicalLayout::new_local(inter_layout, StorageKind::Pinned);
let destination = PhysicalLayout::new_local(dst_layout, StorageKind::System);
let test = RoundTripTest::new(source, intermediate, destination)
.unwrap()
.with_fill_pattern(FillPattern::Constant(42))
.add_block_mapping(0, 0, 1)
.add_block_mapping(1, 1, 2);
assert_eq!(test.block_mapping.len(), 2);
}
#[test]
fn test_round_trip_requires_local_source() {
let (src_layout, _) = create_test_layout(1);
let (inter_layout, _) = create_test_layout(1);
let (dst_layout, _) = create_test_layout(1);
let source =
PhysicalLayout::new_remote(src_layout, StorageKind::System, "remote".to_string());
let intermediate = PhysicalLayout::new_local(inter_layout, StorageKind::Pinned);
let destination = PhysicalLayout::new_local(dst_layout, StorageKind::System);
let result = RoundTripTest::new(source, intermediate, destination);
assert!(result.is_err());
}
#[test]
fn test_round_trip_requires_local_destination() {
let (src_layout, _) = create_test_layout(1);
let (inter_layout, _) = create_test_layout(1);
let (dst_layout, _) = create_test_layout(1);
let source = PhysicalLayout::new_local(src_layout, StorageKind::System);
let intermediate = PhysicalLayout::new_local(inter_layout, StorageKind::Pinned);
let destination =
PhysicalLayout::new_remote(dst_layout, StorageKind::System, "remote".to_string());
let result = RoundTripTest::new(source, intermediate, destination);
assert!(result.is_err());
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Local transfer tests where source and destination use the same NIXL agent.
//!
//! These tests verify data integrity across:
//! - Different storage types (System, Pinned, Device)
//! - Different layout types (Fully Contiguous, Layer-wise)
//! - Different transfer strategies (Memcpy, CUDA H2D/D2H)
use super::*;
use crate::transfer::TransferCapabilities;
use crate::transfer::executor::TransferOptionsInternal;
use crate::transfer::executor::execute_transfer;
use crate::transfer::{can_use_whole_block_transfer, validate_layout_compatibility};
use anyhow::Result;
use rstest::rstest;
// ============================================================================
// System <=> System Tests (Memcpy)
// ============================================================================
#[derive(Clone)]
enum LayoutType {
FC,
LW,
}
fn build_layout(
agent: NixlAgent,
layout_type: LayoutType,
storage_kind: StorageKind,
num_blocks: usize,
) -> PhysicalLayout {
match layout_type {
LayoutType::FC => create_fc_layout(agent, storage_kind, num_blocks),
LayoutType::LW => create_lw_layout(agent, storage_kind, num_blocks),
}
}
/// Check if a transfer between two storage kinds requires GDS_MT (Device ↔ Disk direct).
fn requires_gds(src_kind: StorageKind, dst_kind: StorageKind) -> bool {
matches!(
(src_kind, dst_kind),
(StorageKind::Device(_), StorageKind::Disk(_))
| (StorageKind::Disk(_), StorageKind::Device(_))
)
}
/// Check if a transfer between two storage kinds is unsupported.
///
/// Device ↔ System transfers are not supported - must use Pinned memory for CUDA transfers.
fn is_unsupported_transfer(src_kind: StorageKind, dst_kind: StorageKind) -> bool {
matches!(
(src_kind, dst_kind),
(StorageKind::Device(_), StorageKind::System)
| (StorageKind::System, StorageKind::Device(_))
)
}
/// Probe whether a NIXL backend is available by attempting to add it to a temporary agent.
fn is_nixl_backend_available(backend: &str) -> bool {
let mut agent = match NixlAgent::new("__backend_probe__") {
Ok(a) => a,
Err(_) => return false,
};
agent.add_backend(backend).is_ok()
}
fn build_agent_for_kinds(kinds: &[StorageKind]) -> Result<NixlAgent> {
let mut agent = NixlAgent::new("agent")?;
let mut added_backends = Vec::new();
// Determine required backends for all storage kinds
for &kind in kinds {
match kind {
StorageKind::System | StorageKind::Pinned => {
if !added_backends.contains(&"POSIX") {
let _ = agent.add_backend("POSIX"); // Optional for DRAM
added_backends.push("POSIX");
}
}
StorageKind::Device(_) => {
if !added_backends.contains(&"UCX") {
agent.add_backend("UCX")?; // Required for VRAM
added_backends.push("UCX");
}
}
StorageKind::Disk(_) => {
if !added_backends.contains(&"POSIX") {
let _ = agent.add_backend("POSIX"); // Optional for disk I/O
added_backends.push("POSIX");
}
}
}
}
// GDS_MT is optional for Device <-> Disk (will be checked separately)
for window in kinds.windows(2) {
if requires_gds(window[0], window[1]) && !added_backends.contains(&"GDS_MT") {
let _ = agent.add_backend("GDS_MT");
break;
}
}
Ok(agent)
}
#[rstest]
#[tokio::test]
async fn test_p2p(
#[values(LayoutType::FC, LayoutType::LW)] src_layout: LayoutType,
#[values(
StorageKind::System,
StorageKind::Pinned,
StorageKind::Device(0),
StorageKind::Disk(0)
)]
src_kind: StorageKind,
#[values(LayoutType::FC, LayoutType::LW)] dst_layout: LayoutType,
#[values(
StorageKind::System,
StorageKind::Pinned,
StorageKind::Device(0),
StorageKind::Disk(0)
)]
dst_kind: StorageKind,
) -> Result<()> {
// Skip unsupported Device ↔ System transfers (must use Pinned for CUDA)
if is_unsupported_transfer(src_kind, dst_kind) {
eprintln!(
"Skipping unsupported Device ↔ System transfer: src={:?}, dst={:?}",
src_kind, dst_kind
);
return Ok(());
}
// Device ↔ Disk direct transfers require GDS_MT
if requires_gds(src_kind, dst_kind) && !is_nixl_backend_available("GDS_MT") {
eprintln!("Skipping Device ↔ Disk test - GDS_MT backend unavailable");
return Ok(());
}
use crate::transfer::{BounceBufferInternal, executor::TransferOptionsInternal};
let agent = build_agent_for_kinds(&[src_kind, dst_kind])?;
let src = build_layout(agent.clone(), src_layout, src_kind, 4);
let dst = build_layout(agent.clone(), dst_layout, dst_kind, 4);
let bounce_layout = build_layout(agent.clone(), LayoutType::FC, StorageKind::Pinned, 4);
let bounce_buffer_spec = BounceBufferInternal::from_layout(bounce_layout, vec![0, 1]);
let src_blocks = vec![0, 1];
let dst_blocks = vec![2, 3];
let checksums = fill_and_checksum(&src, &src_blocks, FillPattern::Sequential)?;
let ctx = create_transfer_context(agent, None).unwrap();
let options = TransferOptionsInternal::builder()
.bounce_buffer(bounce_buffer_spec)
.build()?;
let notification =
execute_transfer(&src, &dst, &src_blocks, &dst_blocks, options, ctx.context())?;
notification.await?;
verify_checksums_by_position(&checksums, &src_blocks, &dst, &dst_blocks)?;
Ok(())
}
#[rstest]
#[tokio::test]
async fn test_roundtrip(
#[values(LayoutType::FC, LayoutType::LW)] src_layout: LayoutType,
#[values(StorageKind::System, StorageKind::Pinned, StorageKind::Device(0))]
src_kind: StorageKind,
#[values(LayoutType::FC, LayoutType::LW)] inter_layout: LayoutType,
#[values(StorageKind::System, StorageKind::Pinned, StorageKind::Device(0))]
inter_kind: StorageKind,
#[values(LayoutType::FC, LayoutType::LW)] dst_layout: LayoutType,
#[values(StorageKind::System, StorageKind::Pinned, StorageKind::Device(0))]
dst_kind: StorageKind,
) -> Result<()> {
// Skip unsupported Device ↔ System transfers (must use Pinned for CUDA)
if is_unsupported_transfer(src_kind, inter_kind)
|| is_unsupported_transfer(inter_kind, dst_kind)
{
eprintln!(
"Skipping unsupported Device ↔ System transfer: src={:?}, inter={:?}, dst={:?}",
src_kind, inter_kind, dst_kind
);
return Ok(());
}
use crate::transfer::executor::TransferOptionsInternal;
let agent = build_agent_for_kinds(&[src_kind, inter_kind, dst_kind])?;
// Create layouts: source pinned, device intermediate, destination pinned
let src = build_layout(agent.clone(), src_layout, src_kind, 4);
let device = build_layout(agent.clone(), inter_layout, inter_kind, 4);
let dst = build_layout(agent.clone(), dst_layout, dst_kind, 4);
let src_blocks = vec![0, 1];
let device_blocks = vec![0, 1];
let dst_blocks = vec![2, 3];
// Fill source and compute checksums
let checksums = fill_and_checksum(&src, &src_blocks, FillPattern::Sequential)?;
let ctx = create_transfer_context(agent, None).unwrap();
// Transfer: Pinned[0,1] -> Device[0,1]
let notification = execute_transfer(
&src,
&device,
&src_blocks,
&device_blocks,
TransferOptionsInternal::default(),
ctx.context(),
)?;
notification.await?;
// Transfer: Device[0,1] -> Pinned[2,3]
let notification = execute_transfer(
&device,
&dst,
&device_blocks,
&dst_blocks,
TransferOptionsInternal::default(),
ctx.context(),
)?;
notification.await?;
// Verify checksums match
verify_checksums_by_position(&checksums, &src_blocks, &dst, &dst_blocks)?;
Ok(())
}
#[cfg(feature = "testing-nixl-gds")]
#[rstest]
#[case(StorageKind::Device(0), StorageKind::Disk(0))]
#[case(StorageKind::Disk(0), StorageKind::Device(0))]
#[tokio::test]
async fn test_gds(
#[case] src_kind: StorageKind,
#[values(LayoutType::FC, LayoutType::LW)] src_layout: LayoutType,
#[case] dst_kind: StorageKind,
#[values(LayoutType::FC, LayoutType::LW)] dst_layout: LayoutType,
) -> Result<()> {
let capabilities = TransferCapabilities::default().with_gds_if_supported();
if !capabilities.allow_gds {
println!("System does not support GDS. Skipping test.");
return Ok(());
}
let agent = build_agent_for_kinds(&[src_kind, dst_kind])?;
let src = build_layout(agent.clone(), src_layout, src_kind, 4);
let dst = build_layout(agent.clone(), dst_layout, dst_kind, 4);
let src_blocks = vec![0, 1];
let dst_blocks = vec![2, 3];
let checksums = fill_and_checksum(&src, &src_blocks, FillPattern::Sequential)?;
let ctx = create_transfer_context(agent, Some(capabilities)).unwrap();
let notification = execute_transfer(
&src,
&dst,
&src_blocks,
&dst_blocks,
TransferOptionsInternal::default(),
ctx.context(),
)?;
notification.await?;
verify_checksums_by_position(&checksums, &src_blocks, &dst, &dst_blocks)?;
Ok(())
}
#[rstest]
#[case(1024)]
#[case(2048)]
#[case(4096)]
#[case(8192)]
#[case(16384)]
#[tokio::test]
async fn test_large_block_counts(#[case] block_count: usize) {
let agent = create_test_agent(&format!("test_large_block_counts_{}", block_count));
let src = create_fc_layout(agent.clone(), StorageKind::Pinned, block_count);
let device = create_fc_layout(agent.clone(), StorageKind::Device(0), block_count);
let src_blocks = (0..block_count).collect::<Vec<_>>();
let device_blocks = (0..block_count).collect::<Vec<_>>();
let ctx = create_transfer_context(agent, None).unwrap();
let notification = execute_transfer(
&src,
&device,
&src_blocks,
&device_blocks,
TransferOptionsInternal::default(),
ctx.context(),
)
.unwrap();
notification.await.unwrap();
}
// ============================================================================
// Parameterized Bounce Tests with Guard Block Validation
// ============================================================================
/// Test bounce transfers with guard block validation.
///
/// This test validates that:
/// 1. Data can be transferred: host[src_blocks] → bounce[src_blocks] → host[dst_blocks]
/// 2. Guard blocks adjacent to dst_blocks remain unchanged (no memory corruption)
/// 3. Works correctly with different storage types, layouts, and transfer modes
///
/// Test pattern (6 blocks total):
/// - Source blocks: [0, 1]
/// - Destination blocks: [3, 4]
/// - Guard blocks: [2, 5] (adjacent to destination, should remain unchanged)
#[rstest]
// Storage combinations (host, bounce)
#[case(StorageKind::System, StorageKind::Pinned, "sys_pin")]
#[case(StorageKind::Pinned, StorageKind::System, "pin_sys")]
#[case(StorageKind::Pinned, StorageKind::Device(0), "pin_dev")]
#[tokio::test]
async fn test_bounce_with_guards_fc_fc_full(
#[case] host_storage: StorageKind,
#[case] bounce_storage: StorageKind,
#[case] name_suffix: &str,
) {
test_bounce_with_guards_impl(
host_storage,
bounce_storage,
LayoutKind::FC,
LayoutKind::FC,
TransferMode::FullBlocks,
name_suffix,
)
.await
.unwrap();
}
#[rstest]
#[case(StorageKind::System, StorageKind::Pinned, "sys_pin")]
#[case(StorageKind::Pinned, StorageKind::System, "pin_sys")]
#[case(StorageKind::Pinned, StorageKind::Device(0), "pin_dev")]
#[tokio::test]
async fn test_bounce_with_guards_fc_lw_full(
#[case] host_storage: StorageKind,
#[case] bounce_storage: StorageKind,
#[case] name_suffix: &str,
) {
test_bounce_with_guards_impl(
host_storage,
bounce_storage,
LayoutKind::FC,
LayoutKind::LW,
TransferMode::FullBlocks,
name_suffix,
)
.await
.unwrap();
}
#[rstest]
#[case(StorageKind::System, StorageKind::Pinned, "sys_pin")]
#[case(StorageKind::Pinned, StorageKind::System, "pin_sys")]
#[case(StorageKind::Pinned, StorageKind::Device(0), "pin_dev")]
#[tokio::test]
async fn test_bounce_with_guards_lw_fc_full(
#[case] host_storage: StorageKind,
#[case] bounce_storage: StorageKind,
#[case] name_suffix: &str,
) {
test_bounce_with_guards_impl(
host_storage,
bounce_storage,
LayoutKind::LW,
LayoutKind::FC,
TransferMode::FullBlocks,
name_suffix,
)
.await
.unwrap();
}
#[rstest]
#[case(StorageKind::System, StorageKind::Pinned, "sys_pin")]
#[case(StorageKind::Pinned, StorageKind::System, "pin_sys")]
#[case(StorageKind::Pinned, StorageKind::Device(0), "pin_dev")]
#[tokio::test]
async fn test_bounce_with_guards_lw_lw_full(
#[case] host_storage: StorageKind,
#[case] bounce_storage: StorageKind,
#[case] name_suffix: &str,
) {
test_bounce_with_guards_impl(
host_storage,
bounce_storage,
LayoutKind::LW,
LayoutKind::LW,
TransferMode::FullBlocks,
name_suffix,
)
.await
.unwrap();
}
#[rstest]
#[case(StorageKind::Pinned, StorageKind::Device(0), "pin_dev")]
#[tokio::test]
async fn test_bounce_with_guards_fc_fc_layer0(
#[case] host_storage: StorageKind,
#[case] bounce_storage: StorageKind,
#[case] name_suffix: &str,
) {
test_bounce_with_guards_impl(
host_storage,
bounce_storage,
LayoutKind::FC,
LayoutKind::FC,
TransferMode::FirstLayerOnly,
name_suffix,
)
.await
.unwrap();
}
#[rstest]
#[case(StorageKind::Pinned, StorageKind::Device(0), "pin_dev")]
#[tokio::test]
async fn test_bounce_with_guards_lw_lw_layer0(
#[case] host_storage: StorageKind,
#[case] bounce_storage: StorageKind,
#[case] name_suffix: &str,
) {
test_bounce_with_guards_impl(
host_storage,
bounce_storage,
LayoutKind::LW,
LayoutKind::LW,
TransferMode::FirstLayerOnly,
name_suffix,
)
.await
.unwrap();
}
/// Implementation helper for bounce tests with guard blocks.
async fn test_bounce_with_guards_impl(
host_storage: StorageKind,
bounce_storage: StorageKind,
host_layout: LayoutKind,
bounce_layout: LayoutKind,
mode: TransferMode,
name_suffix: &str,
) -> Result<()> {
let num_blocks = 6;
let test_name = format!(
"bounce_{}_{:?}_{:?}_{}_{}",
name_suffix,
host_layout,
bounce_layout,
mode.suffix(),
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis()
);
let agent = create_test_agent(&test_name);
// Create layouts
let host = create_layout(
agent.clone(),
LayoutSpec::new(host_layout, host_storage),
num_blocks,
);
let bounce = create_layout(
agent.clone(),
LayoutSpec::new(bounce_layout, bounce_storage),
num_blocks,
);
// Block assignments:
// - Transfer: host[0,1] → bounce[0,1] → host[3,4]
// - Guards: host[2,5] (should remain unchanged)
let src_blocks = vec![0, 1];
let dst_blocks = vec![3, 4];
let guard_blocks = vec![2, 5];
// Setup: Fill source blocks and guard blocks
let src_checksums =
fill_and_checksum_with_mode(&host, &src_blocks, FillPattern::Sequential, mode)?;
let guard_checksums = create_guard_blocks(&host, &guard_blocks, FillPattern::Constant(0xFF))?;
let ctx = create_transfer_context(agent, None)?;
// Execute bounce: host[0,1] → bounce[0,1]
let mut options_builder = TransferOptionsInternal::builder();
if let Some(range) = mode.layer_range() {
options_builder = options_builder.layer_range(range);
}
let notification = execute_transfer(
&host,
&bounce,
&src_blocks,
&src_blocks,
options_builder.build()?,
ctx.context(),
)?;
notification.await?;
// Execute bounce: bounce[0,1] → host[3,4]
let mut options_builder = TransferOptionsInternal::builder();
if let Some(range) = mode.layer_range() {
options_builder = options_builder.layer_range(range);
}
let notification = execute_transfer(
&bounce,
&host,
&src_blocks,
&dst_blocks,
options_builder.build()?,
ctx.context(),
)?;
notification.await?;
// Verify: Data integrity + guards unchanged
verify_checksums_by_position_with_mode(&src_checksums, &src_blocks, &host, &dst_blocks, mode)?;
verify_guard_blocks_unchanged(&host, &guard_blocks, &guard_checksums)?;
Ok(())
}
// ============================================================================
// Parameterized Direct Transfer Tests
// ============================================================================
/// Test direct transfers with parameterization over storage, layout, and transfer mode.
///
/// This demonstrates the DRY parameterized approach that can replace the 18 individual
/// tests above (System<=>System, Pinned<=>Pinned, cross-type, etc).
///
/// Note: Only tests System<=>System, Pinned<=>Pinned, and System<=>Pinned since we can only
/// fill/checksum System and Pinned storage. For Device tests, use bounce tests instead.
#[rstest]
// Storage combinations (only fillable storage types)
#[case(StorageKind::System, StorageKind::System, "sys_sys")]
#[case(StorageKind::Pinned, StorageKind::Pinned, "pin_pin")]
#[case(StorageKind::System, StorageKind::Pinned, "sys_pin")]
#[case(StorageKind::Pinned, StorageKind::System, "pin_sys")]
#[tokio::test]
async fn test_direct_transfer_fc_fc_full(
#[case] src_storage: StorageKind,
#[case] dst_storage: StorageKind,
#[case] name_suffix: &str,
) {
test_direct_transfer_impl(
src_storage,
dst_storage,
LayoutKind::FC,
LayoutKind::FC,
TransferMode::FullBlocks,
name_suffix,
)
.await
.unwrap();
}
#[rstest]
#[case(StorageKind::System, StorageKind::Pinned, "sys_pin")]
#[case(StorageKind::Pinned, StorageKind::System, "pin_sys")]
#[tokio::test]
async fn test_direct_transfer_fc_lw_layer0(
#[case] src_storage: StorageKind,
#[case] dst_storage: StorageKind,
#[case] name_suffix: &str,
) {
test_direct_transfer_impl(
src_storage,
dst_storage,
LayoutKind::FC,
LayoutKind::LW,
TransferMode::FirstLayerOnly,
name_suffix,
)
.await
.unwrap();
}
#[rstest]
#[case(StorageKind::Pinned, StorageKind::Pinned, "pin_pin")]
#[tokio::test]
async fn test_direct_transfer_lw_lw_layer1(
#[case] src_storage: StorageKind,
#[case] dst_storage: StorageKind,
#[case] name_suffix: &str,
) {
test_direct_transfer_impl(
src_storage,
dst_storage,
LayoutKind::LW,
LayoutKind::LW,
TransferMode::SecondLayerOnly,
name_suffix,
)
.await
.unwrap();
}
/// Implementation helper for direct transfer tests.
async fn test_direct_transfer_impl(
src_storage: StorageKind,
dst_storage: StorageKind,
src_layout: LayoutKind,
dst_layout: LayoutKind,
mode: TransferMode,
name_suffix: &str,
) -> Result<()> {
let num_blocks = 4;
let test_name = format!(
"direct_{}_{:?}_{:?}_{}_{}",
name_suffix,
src_layout,
dst_layout,
mode.suffix(),
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis()
);
let agent = create_test_agent(&test_name);
// Create layouts
let src = create_layout(
agent.clone(),
LayoutSpec::new(src_layout, src_storage),
num_blocks,
);
let dst = create_layout(
agent.clone(),
LayoutSpec::new(dst_layout, dst_storage),
num_blocks,
);
// Transfer src[0,1] -> dst[2,3]
let src_blocks = vec![0, 1];
let dst_blocks = vec![2, 3];
// Fill source and compute checksums
let src_checksums =
fill_and_checksum_with_mode(&src, &src_blocks, FillPattern::Sequential, mode)?;
let ctx = create_transfer_context(agent, None)?;
// Execute transfer
let mut options_builder = TransferOptionsInternal::builder();
if let Some(range) = mode.layer_range() {
options_builder = options_builder.layer_range(range);
}
let notification = execute_transfer(
&src,
&dst,
&src_blocks,
&dst_blocks,
options_builder.build()?,
ctx.context(),
)?;
notification.await?;
// Verify data integrity
verify_checksums_by_position_with_mode(&src_checksums, &src_blocks, &dst, &dst_blocks, mode)?;
Ok(())
}
// ============================================================================
// Layout Compatibility Helper Tests
// ============================================================================
#[test]
fn test_validate_layout_compatibility_same_layout() {
let agent = create_test_agent("test_compat_same");
let src = create_fc_layout(agent.clone(), StorageKind::System, 4);
let dst = create_fc_layout(agent.clone(), StorageKind::System, 4);
// Both FC layouts with Unknown KvBlockLayout - should be compatible
assert!(validate_layout_compatibility(&src, &dst).is_ok());
}
#[test]
fn test_validate_layout_compatibility_fc_lw_same_block_layout() {
let agent = create_test_agent("test_compat_fc_lw");
let src = create_fc_layout(agent.clone(), StorageKind::System, 4);
let dst = create_lw_layout(agent.clone(), StorageKind::System, 4);
// Both have Unknown/Unknown-derived KvBlockLayout - should be compatible
// (Unknown→Unknown returns false for requires_transform)
assert!(validate_layout_compatibility(&src, &dst).is_ok());
}
#[test]
fn test_can_use_whole_block_fc_fc_full_block() {
let agent = create_test_agent("test_whole_block_fc_fc");
let src = create_fc_layout(agent.clone(), StorageKind::System, 4);
let dst = create_fc_layout(agent.clone(), StorageKind::System, 4);
// Both FC + full block transfer = should use whole-block
assert!(can_use_whole_block_transfer(&src, &dst, None));
}
#[test]
fn test_can_use_whole_block_fc_fc_full_range() {
let agent = create_test_agent("test_whole_block_fc_fc_range");
let src = create_fc_layout(agent.clone(), StorageKind::System, 4);
let dst = create_fc_layout(agent.clone(), StorageKind::System, 4);
// Both FC + full range (0..num_layers) = should use whole-block
let full_range = 0..src.layout().num_layers();
assert!(can_use_whole_block_transfer(&src, &dst, Some(&full_range)));
}
#[test]
fn test_can_use_whole_block_fc_fc_partial_layer() {
let agent = create_test_agent("test_whole_block_partial");
let src = create_fc_layout(agent.clone(), StorageKind::System, 4);
let dst = create_fc_layout(agent.clone(), StorageKind::System, 4);
// Partial layer transfer = should NOT use whole-block
let partial_range = 0..1;
assert!(!can_use_whole_block_transfer(
&src,
&dst,
Some(&partial_range)
));
}
#[test]
fn test_can_use_whole_block_fc_lw() {
let agent = create_test_agent("test_whole_block_fc_lw");
let src = create_fc_layout(agent.clone(), StorageKind::System, 4);
let dst = create_lw_layout(agent.clone(), StorageKind::System, 4);
// FC + LW = should NOT use whole-block (dst is not fully contiguous)
assert!(!can_use_whole_block_transfer(&src, &dst, None));
}
#[test]
fn test_can_use_whole_block_lw_fc() {
let agent = create_test_agent("test_whole_block_lw_fc");
let src = create_lw_layout(agent.clone(), StorageKind::System, 4);
let dst = create_fc_layout(agent.clone(), StorageKind::System, 4);
// LW + FC = should NOT use whole-block (src is not fully contiguous)
assert!(!can_use_whole_block_transfer(&src, &dst, None));
}
#[test]
fn test_can_use_whole_block_lw_lw() {
let agent = create_test_agent("test_whole_block_lw_lw");
let src = create_lw_layout(agent.clone(), StorageKind::System, 4);
let dst = create_lw_layout(agent.clone(), StorageKind::System, 4);
// LW + LW = should NOT use whole-block (neither is fully contiguous)
assert!(!can_use_whole_block_transfer(&src, &dst, None));
}
// ============================================================================
// Whole-Block Transfer Integration Tests
// ============================================================================
/// Test that FC→FC transfers with full blocks use the whole-block path.
///
/// This test verifies data integrity for FC→FC transfers that should use
/// the optimized whole-block memcpy path.
#[rstest]
#[case(StorageKind::System, StorageKind::System)]
#[case(StorageKind::System, StorageKind::Pinned)]
#[case(StorageKind::Pinned, StorageKind::Pinned)]
#[tokio::test]
async fn test_whole_block_transfer_fc_fc(
#[case] src_storage: StorageKind,
#[case] dst_storage: StorageKind,
) -> Result<()> {
let agent = create_test_agent("test_whole_block_fc_fc_transfer");
let src = create_fc_layout(agent.clone(), src_storage, 4);
let dst = create_fc_layout(agent.clone(), dst_storage, 4);
// Verify this should use whole-block path
assert!(can_use_whole_block_transfer(&src, &dst, None));
let src_blocks = vec![0, 1];
let dst_blocks = vec![2, 3];
let checksums = fill_and_checksum(&src, &src_blocks, FillPattern::Sequential)?;
let ctx = create_transfer_context(agent, None)?;
let notification = execute_transfer(
&src,
&dst,
&src_blocks,
&dst_blocks,
TransferOptionsInternal::default(),
ctx.context(),
)?;
notification.await?;
verify_checksums_by_position(&checksums, &src_blocks, &dst, &dst_blocks)?;
Ok(())
}
/// Test that FC→LW transfers fall back to layer-wise path.
///
/// This test verifies data integrity for FC→LW transfers that should use
/// the layer-wise path (not whole-block).
#[rstest]
#[case(StorageKind::System, StorageKind::System)]
#[case(StorageKind::Pinned, StorageKind::Pinned)]
#[tokio::test]
async fn test_layer_wise_transfer_fc_lw(
#[case] src_storage: StorageKind,
#[case] dst_storage: StorageKind,
) -> Result<()> {
let agent = create_test_agent("test_layer_wise_fc_lw_transfer");
let src = create_fc_layout(agent.clone(), src_storage, 4);
let dst = create_lw_layout(agent.clone(), dst_storage, 4);
// Verify this should NOT use whole-block path
assert!(!can_use_whole_block_transfer(&src, &dst, None));
let src_blocks = vec![0, 1];
let dst_blocks = vec![2, 3];
let checksums = fill_and_checksum(&src, &src_blocks, FillPattern::Sequential)?;
let ctx = create_transfer_context(agent, None)?;
let notification = execute_transfer(
&src,
&dst,
&src_blocks,
&dst_blocks,
TransferOptionsInternal::default(),
ctx.context(),
)?;
notification.await?;
verify_checksums_by_position(&checksums, &src_blocks, &dst, &dst_blocks)?;
Ok(())
}
/// Test partial layer transfer uses layer-wise path even for FC→FC.
#[tokio::test]
async fn test_partial_layer_transfer_uses_layer_wise() -> Result<()> {
let agent = create_test_agent("test_partial_layer");
let src = create_fc_layout(agent.clone(), StorageKind::System, 4);
let dst = create_fc_layout(agent.clone(), StorageKind::Pinned, 4);
// Verify partial transfer should NOT use whole-block path
let partial_range = 0..1;
assert!(!can_use_whole_block_transfer(
&src,
&dst,
Some(&partial_range)
));
let src_blocks = vec![0, 1];
let dst_blocks = vec![2, 3];
// Fill source with sequential pattern for layer 0 only
let checksums = fill_and_checksum_with_mode(
&src,
&src_blocks,
FillPattern::Sequential,
TransferMode::FirstLayerOnly,
)?;
let ctx = create_transfer_context(agent, None)?;
let options = TransferOptionsInternal::builder()
.layer_range(partial_range)
.build()?;
let notification =
execute_transfer(&src, &dst, &src_blocks, &dst_blocks, options, ctx.context())?;
notification.await?;
verify_checksums_by_position_with_mode(
&checksums,
&src_blocks,
&dst,
&dst_blocks,
TransferMode::FirstLayerOnly,
)?;
Ok(())
}
// ============================================================================
// Transfer Coverage Gap Tests
// ============================================================================
/// Test that transferring layer 0 and layer 1 independently produces the same
/// result as a full-block transfer.
///
/// `test_partial_layer_transfer_uses_layer_wise` only transfers layer 0. This test
/// verifies that layer 0 + layer 1 transferred independently compose to the same
/// result as transferring all layers at once.
#[rstest]
#[case(LayoutKind::FC, LayoutKind::FC)]
#[case(LayoutKind::FC, LayoutKind::LW)]
#[case(LayoutKind::LW, LayoutKind::FC)]
#[case(LayoutKind::LW, LayoutKind::LW)]
#[tokio::test]
async fn test_layer_composition_equals_full_block(
#[case] src_kind: LayoutKind,
#[case] dst_kind: LayoutKind,
) -> Result<()> {
let agent = create_test_agent("test_layer_composition");
let src = create_layout(
agent.clone(),
LayoutSpec::new(src_kind, StorageKind::Pinned),
4,
);
let dst_full = create_layout(
agent.clone(),
LayoutSpec::new(dst_kind, StorageKind::Pinned),
4,
);
let dst_layered = create_layout(
agent.clone(),
LayoutSpec::new(dst_kind, StorageKind::Pinned),
4,
);
let src_blocks = vec![0, 1];
let dst_blocks = vec![2, 3];
// Fill source blocks with sequential pattern (all layers)
fill_and_checksum(&src, &src_blocks, FillPattern::Sequential)?;
let ctx = create_transfer_context(agent, None)?;
// Full-block transfer: src[0,1] → dst_full[2,3]
let notification = execute_transfer(
&src,
&dst_full,
&src_blocks,
&dst_blocks,
TransferOptionsInternal::default(),
ctx.context(),
)?;
notification.await?;
// Layer-wise transfers: src[0,1] → dst_layered[2,3] layer by layer
let options_layer0 = TransferOptionsInternal::builder()
.layer_range(0..1)
.build()?;
let notification = execute_transfer(
&src,
&dst_layered,
&src_blocks,
&dst_blocks,
options_layer0,
ctx.context(),
)?;
notification.await?;
let options_layer1 = TransferOptionsInternal::builder()
.layer_range(1..2)
.build()?;
let notification = execute_transfer(
&src,
&dst_layered,
&src_blocks,
&dst_blocks,
options_layer1,
ctx.context(),
)?;
notification.await?;
// Compute full-block checksums on both destinations
let checksums_full = compute_block_checksums(&dst_full, &dst_blocks)?;
let checksums_layered = compute_block_checksums(&dst_layered, &dst_blocks)?;
// Layer-wise composition must equal full-block transfer
for &block_id in &dst_blocks {
assert_eq!(
checksums_full[&block_id], checksums_layered[&block_id],
"Block {}: full-block checksum ({}) != layer-composed checksum ({})",
block_id, checksums_full[&block_id], checksums_layered[&block_id],
);
}
Ok(())
}
/// Test that FC↔LW CUDA transfers through Pinned↔Device correctly use the
/// vectorized path and preserve data integrity through a roundtrip.
///
/// Existing tests cover FC↔LW via memcpy (System/Pinned) and via `test_p2p`.
/// This test explicitly asserts that CUDA FC↔LW transfers go through the
/// vectorized path (not whole-block) and verifies roundtrip integrity.
#[rstest]
#[case(LayoutKind::FC, LayoutKind::LW)]
#[case(LayoutKind::LW, LayoutKind::FC)]
#[tokio::test]
async fn test_cuda_fc_lw_roundtrip_uses_vectorized(
#[case] host_kind: LayoutKind,
#[case] device_kind: LayoutKind,
) -> Result<()> {
let agent = build_agent_for_kinds(&[StorageKind::Pinned, StorageKind::Device(0)])?;
let host = create_layout(
agent.clone(),
LayoutSpec::new(host_kind, StorageKind::Pinned),
4,
);
let device = create_layout(
agent.clone(),
LayoutSpec::new(device_kind, StorageKind::Device(0)),
4,
);
// Confirm vectorized path will be used (not whole-block)
assert!(
!can_use_whole_block_transfer(&host, &device, None),
"FC↔LW across Pinned↔Device should use vectorized path, not whole-block"
);
let src_blocks = vec![0, 1];
let device_blocks = vec![0, 1];
let dst_blocks = vec![2, 3];
// Fill host source blocks
let checksums = fill_and_checksum(&host, &src_blocks, FillPattern::Sequential)?;
let ctx = create_transfer_context(agent, None)?;
// H2D: host[0,1] → device[0,1]
let notification = execute_transfer(
&host,
&device,
&src_blocks,
&device_blocks,
TransferOptionsInternal::default(),
ctx.context(),
)?;
notification.await?;
// D2H: device[0,1] → host[2,3]
let notification = execute_transfer(
&device,
&host,
&device_blocks,
&dst_blocks,
TransferOptionsInternal::default(),
ctx.context(),
)?;
notification.await?;
// Verify roundtrip: host[0,1] == host[2,3]
verify_checksums_by_position(&checksums, &src_blocks, &host, &dst_blocks)?;
Ok(())
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Comprehensive transfer tests for verifying data integrity across storage types and layout configurations.
mod local_transfers;
/// Skip test if stub kernels are in use (no real CUDA available).
///
/// Call this at the start of any test that requires real CUDA operations.
/// When stubs are in use, the test will print a message and return early.
///
/// # Example
/// ```ignore
/// #[test]
/// fn my_cuda_test() {
/// skip_if_stubs!();
/// // ... test code that requires CUDA ...
/// }
/// ```
#[allow(unused_macros)]
macro_rules! skip_if_stubs {
() => {
if kvbm_kernels::is_using_stubs() {
eprintln!(
"Skipping test '{}': stub kernels in use (no real CUDA)",
module_path!()
);
return;
}
};
}
/// Check if any of the storage kinds require CUDA, and skip if stubs are in use.
///
/// Call this at the start of parameterized tests that may or may not use Device storage.
#[allow(unused_macros)]
macro_rules! skip_if_stubs_and_device {
($($kind:expr),+ $(,)?) => {
if kvbm_kernels::is_using_stubs() {
let needs_cuda = false $(|| matches!($kind, StorageKind::Device(_)))+;
if needs_cuda {
eprintln!(
"Skipping test '{}': stub kernels in use and test requires Device storage",
module_path!()
);
return Ok(());
}
}
};
}
// Make the macros available to submodules
#[allow(unused_imports)]
pub(crate) use skip_if_stubs;
#[allow(unused_imports)]
pub(crate) use skip_if_stubs_and_device;
use super::{
BlockChecksum, FillPattern, NixlAgent, PhysicalLayout, StorageKind, TransferCapabilities,
compute_block_checksums, compute_layer_checksums, fill_blocks, fill_layers,
};
use crate::{
BlockId,
layout::{
BlockDimension, LayoutConfig,
builder::{HasConfig, NoLayout, NoMemory, PhysicalLayoutBuilder},
},
};
use anyhow::Result;
use cudarc::driver::sys::CUdevice_attribute_enum;
use cudarc::driver::{CudaContext, CudaStream, LaunchConfig, PushKernelArg};
use cudarc::nvrtc::{CompileOptions, compile_ptx_with_opts};
use std::collections::HashMap;
use std::ops::Range;
use std::sync::{Arc, OnceLock};
use std::time::{Duration, Instant};
/// Layout kind for parameterized testing.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LayoutKind {
/// Fully contiguous layout
FC,
/// Layer-wise (layer-separate) layout
LW,
}
/// Storage and layout specification for creating test layouts.
#[derive(Debug, Clone, Copy)]
pub struct LayoutSpec {
pub kind: LayoutKind,
pub storage: StorageKind,
}
impl LayoutSpec {
pub fn new(kind: LayoutKind, storage: StorageKind) -> Self {
Self { kind, storage }
}
}
/// Transfer mode for parameterized testing.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TransferMode {
/// Transfer entire blocks (all layers)
FullBlocks,
/// Transfer only the first layer
FirstLayerOnly,
/// Transfer only the second layer
SecondLayerOnly,
}
impl TransferMode {
/// Convert to optional layer range for execute_transfer.
pub fn layer_range(&self) -> Option<Range<usize>> {
match self {
TransferMode::FullBlocks => None,
TransferMode::FirstLayerOnly => Some(0..1),
TransferMode::SecondLayerOnly => Some(1..2),
}
}
/// Get a descriptive suffix for test names.
pub fn suffix(&self) -> &'static str {
match self {
TransferMode::FullBlocks => "full",
TransferMode::FirstLayerOnly => "layer0",
TransferMode::SecondLayerOnly => "layer1",
}
}
}
/// Standard layout configuration for all tests.
pub fn standard_config(num_blocks: usize) -> LayoutConfig {
LayoutConfig::builder()
.num_blocks(num_blocks)
.num_layers(2)
.outer_dim(2)
.page_size(16)
.inner_dim(128)
.dtype_width_bytes(2)
.build()
.unwrap()
}
/// Helper function for creating a PhysicalLayout builder with standard config.
///
/// This is used by other test modules (fill, checksum, validation) for backwards compatibility.
pub fn builder(num_blocks: usize) -> PhysicalLayoutBuilder<HasConfig, NoLayout, NoMemory> {
let agent = create_test_agent("test_agent");
let config = standard_config(num_blocks);
PhysicalLayout::builder(agent).with_config(config)
}
/// Create a test agent with no backends.
///
/// Use this for tests that don't require specific NIXL backends.
pub fn create_test_agent(name: &str) -> NixlAgent {
NixlAgent::new(name).expect("Failed to create agent")
}
/// Create a test agent with specific backends (strict - all must succeed).
#[expect(dead_code)]
pub fn create_test_agent_with_backends(name: &str, backends: &[&str]) -> Result<NixlAgent> {
NixlAgent::with_backends(name, backends)
}
/// Create a fully contiguous physical layout with the specified storage type.
pub fn create_fc_layout(
agent: NixlAgent,
storage_kind: StorageKind,
num_blocks: usize,
) -> PhysicalLayout {
let config = standard_config(num_blocks);
let builder = PhysicalLayout::builder(agent)
.with_config(config)
.fully_contiguous();
match storage_kind {
StorageKind::System => builder.allocate_system().build().unwrap(),
StorageKind::Pinned => builder.allocate_pinned(None).build().unwrap(),
StorageKind::Device(device_id) => builder.allocate_device(device_id).build().unwrap(),
StorageKind::Disk(_) => builder.allocate_disk(None).build().unwrap(),
}
}
/// Create a layer-separate physical layout with the specified storage type.
pub fn create_lw_layout(
agent: NixlAgent,
storage_kind: StorageKind,
num_blocks: usize,
) -> PhysicalLayout {
let config = standard_config(num_blocks);
let builder = PhysicalLayout::builder(agent)
.with_config(config)
.layer_separate(BlockDimension::BlockIsFirstDim);
match storage_kind {
StorageKind::System => builder.allocate_system().build().unwrap(),
StorageKind::Pinned => builder.allocate_pinned(None).build().unwrap(),
StorageKind::Device(device_id) => builder.allocate_device(device_id).build().unwrap(),
StorageKind::Disk(_) => builder.allocate_disk(None).build().unwrap(),
}
}
/// Create a physical layout based on the specification.
///
/// This is a DRY helper that dispatches to create_fc_layout or create_lw_layout
/// based on the layout kind in the spec.
pub fn create_layout(agent: NixlAgent, spec: LayoutSpec, num_blocks: usize) -> PhysicalLayout {
match spec.kind {
LayoutKind::FC => create_fc_layout(agent, spec.storage, num_blocks),
LayoutKind::LW => create_lw_layout(agent, spec.storage, num_blocks),
}
}
/// Create a transport manager for testing with the specified agent.
///
/// Note: The agent should already have backends configured. Use `create_test_agent`
/// or `build_agent_with_backends` to create properly configured agents.
pub fn create_transfer_context(
agent: NixlAgent,
capabilities: Option<TransferCapabilities>,
) -> Result<crate::manager::TransferManager> {
crate::manager::TransferManager::builder()
.capabilities(capabilities.unwrap_or_default())
.nixl_agent(agent)
.cuda_device_id(0)
.build()
}
/// Fill blocks and compute checksums.
///
/// This can only be called on System or Pinned layouts.
pub fn fill_and_checksum(
layout: &PhysicalLayout,
block_ids: &[BlockId],
pattern: FillPattern,
) -> Result<HashMap<BlockId, BlockChecksum>> {
fill_blocks(layout, block_ids, pattern)?;
compute_block_checksums(layout, block_ids)
}
/// Fill blocks or layers based on transfer mode and compute checksums.
///
/// This is a mode-aware version of fill_and_checksum that handles both
/// full block transfers and layer-wise transfers.
pub fn fill_and_checksum_with_mode(
layout: &PhysicalLayout,
block_ids: &[BlockId],
pattern: FillPattern,
mode: TransferMode,
) -> Result<HashMap<BlockId, BlockChecksum>> {
match mode {
TransferMode::FullBlocks => {
fill_blocks(layout, block_ids, pattern)?;
compute_block_checksums(layout, block_ids)
}
TransferMode::FirstLayerOnly => {
fill_layers(layout, block_ids, 0..1, pattern)?;
compute_layer_checksums(layout, block_ids, 0..1)
}
TransferMode::SecondLayerOnly => {
fill_layers(layout, block_ids, 1..2, pattern)?;
compute_layer_checksums(layout, block_ids, 1..2)
}
}
}
/// Verify that destination block checksums match the expected source checksums.
///
/// This function compares checksums in order, assuming the source and destination
/// block arrays have a 1:1 correspondence (src[i] was transferred to dst[i]).
pub fn verify_checksums_by_position(
src_checksums: &HashMap<BlockId, BlockChecksum>,
src_block_ids: &[BlockId],
dst_layout: &PhysicalLayout,
dst_block_ids: &[BlockId],
) -> Result<()> {
assert_eq!(
src_block_ids.len(),
dst_block_ids.len(),
"Source and destination block arrays must have same length"
);
let dst_checksums = compute_block_checksums(dst_layout, dst_block_ids)?;
for (src_id, dst_id) in src_block_ids.iter().zip(dst_block_ids.iter()) {
let src_checksum = src_checksums
.get(src_id)
.unwrap_or_else(|| panic!("Missing source checksum for block {}", src_id));
let dst_checksum = dst_checksums
.get(dst_id)
.unwrap_or_else(|| panic!("Missing destination checksum for block {}", dst_id));
assert_eq!(
src_checksum, dst_checksum,
"Checksum mismatch: src[{}] != dst[{}]: {} != {}",
src_id, dst_id, src_checksum, dst_checksum
);
}
Ok(())
}
/// Verify checksums with transfer mode awareness.
///
/// This is a mode-aware version that handles both full block and layer-wise verification.
pub fn verify_checksums_by_position_with_mode(
src_checksums: &HashMap<BlockId, BlockChecksum>,
src_block_ids: &[BlockId],
dst_layout: &PhysicalLayout,
dst_block_ids: &[BlockId],
mode: TransferMode,
) -> Result<()> {
assert_eq!(
src_block_ids.len(),
dst_block_ids.len(),
"Source and destination block arrays must have same length"
);
let dst_checksums = match mode {
TransferMode::FullBlocks => compute_block_checksums(dst_layout, dst_block_ids)?,
TransferMode::FirstLayerOnly => compute_layer_checksums(dst_layout, dst_block_ids, 0..1)?,
TransferMode::SecondLayerOnly => compute_layer_checksums(dst_layout, dst_block_ids, 1..2)?,
};
for (src_id, dst_id) in src_block_ids.iter().zip(dst_block_ids.iter()) {
let src_checksum = src_checksums
.get(src_id)
.unwrap_or_else(|| panic!("Missing source checksum for block {}", src_id));
let dst_checksum = dst_checksums
.get(dst_id)
.unwrap_or_else(|| panic!("Missing destination checksum for block {}", dst_id));
assert_eq!(
src_checksum, dst_checksum,
"Checksum mismatch (mode={:?}): src[{}] != dst[{}]: {} != {}",
mode, src_id, dst_id, src_checksum, dst_checksum
);
}
Ok(())
}
/// Fill guard blocks and return their checksums for later verification.
///
/// Guard blocks are blocks adjacent to transfer destinations that should
/// remain unchanged during transfers. This function fills them with a
/// distinctive pattern and returns their checksums for later validation.
///
/// # Arguments
/// * `layout` - The physical layout containing the guard blocks
/// * `guard_block_ids` - Block IDs to use as guards
/// * `pattern` - Fill pattern for guard blocks (typically a constant like 0xFF)
///
/// # Returns
/// A map of block ID to checksum for all guard blocks
pub fn create_guard_blocks(
layout: &PhysicalLayout,
guard_block_ids: &[usize],
pattern: FillPattern,
) -> Result<HashMap<usize, BlockChecksum>> {
fill_blocks(layout, guard_block_ids, pattern)?;
compute_block_checksums(layout, guard_block_ids)
}
/// Verify that guard blocks remain unchanged after transfers.
///
/// This function compares the current checksums of guard blocks against
/// their expected values. Any mismatch indicates memory corruption or
/// unintended overwrites during transfer operations.
///
/// # Arguments
/// * `layout` - The physical layout containing the guard blocks
/// * `guard_block_ids` - Block IDs to verify
/// * `expected_checksums` - Expected checksums from create_guard_blocks
///
/// # Errors
/// Returns an error if any guard block checksum has changed
pub fn verify_guard_blocks_unchanged(
layout: &PhysicalLayout,
guard_block_ids: &[usize],
expected_checksums: &HashMap<usize, BlockChecksum>,
) -> Result<()> {
let current_checksums = compute_block_checksums(layout, guard_block_ids)?;
for &block_id in guard_block_ids {
let expected = expected_checksums
.get(&block_id)
.unwrap_or_else(|| panic!("Missing expected checksum for guard block {}", block_id));
let current = current_checksums
.get(&block_id)
.unwrap_or_else(|| panic!("Missing current checksum for guard block {}", block_id));
if expected != current {
return Err(anyhow::anyhow!(
"Guard block {} was modified during transfer! Expected: {}, Got: {}",
block_id,
expected,
current
));
}
}
Ok(())
}
/// CUDA sleep kernel source code.
const SLEEP_KERNEL_SRC: &str = r#"
extern "C" __global__ void sleep_kernel(unsigned long long min_cycles) {
const unsigned long long start = clock64();
while ((clock64() - start) < min_cycles) {
asm volatile("");
}
}
"#;
/// A reusable CUDA sleep utility for tests.
///
/// This struct provides a simple interface to execute GPU sleep operations
/// with calibrated timing. It compiles the sleep kernel once per CUDA context
/// and caches the calibration for reuse.
///
/// The calibration is conservative (prefers longer sleep durations over shorter)
/// to ensure minimum sleep times are met.
pub struct CudaSleep {
function: cudarc::driver::CudaFunction,
cycles_per_ms: f64,
}
impl CudaSleep {
/// Get or create a CudaSleep instance for the given CUDA context.
///
/// This function uses lazy initialization and caches instances per device ID.
/// The first call for each device will compile the kernel and run calibration.
///
/// # Arguments
/// * `cuda_ctx` - The CUDA context to use
///
/// # Returns
/// A shared reference to the CudaSleep instance for this context's device.
pub fn for_context(cuda_ctx: &Arc<CudaContext>) -> Result<Arc<Self>> {
static INSTANCES: OnceLock<parking_lot::Mutex<HashMap<usize, Arc<CudaSleep>>>> =
OnceLock::new();
let instances = INSTANCES.get_or_init(|| parking_lot::Mutex::new(HashMap::new()));
let device_ordinal = cuda_ctx.ordinal();
// Fast path: check if instance already exists
{
let instances_guard = instances.lock();
if let Some(instance) = instances_guard.get(&device_ordinal) {
return Ok(Arc::clone(instance));
}
}
// Slow path: create new instance with calibration
let instance = Arc::new(Self::new(cuda_ctx)?);
// Store in cache
let mut instances_guard = instances.lock();
instances_guard
.entry(device_ordinal)
.or_insert_with(|| Arc::clone(&instance));
Ok(instance)
}
/// Create a new CudaSleep instance with calibration.
///
/// This compiles the sleep kernel and runs a calibration loop to determine
/// the relationship between clock cycles and wall-clock time.
fn new(cuda_ctx: &Arc<CudaContext>) -> Result<Self> {
// Get device compute capability
let major = cuda_ctx
.attribute(CUdevice_attribute_enum::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR)?;
let minor = cuda_ctx
.attribute(CUdevice_attribute_enum::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR)?;
// Compile PTX for this device
let mut compile_opts = CompileOptions {
name: Some("sleep_kernel.cu".into()),
..Default::default()
};
compile_opts
.options
.push(format!("--gpu-architecture=compute_{}{}", major, minor));
let ptx = compile_ptx_with_opts(SLEEP_KERNEL_SRC, compile_opts)?;
let module = cuda_ctx.load_module(ptx)?;
let function = module.load_function("sleep_kernel")?;
// Get device clock rate
let clock_rate_khz =
cuda_ctx.attribute(CUdevice_attribute_enum::CU_DEVICE_ATTRIBUTE_CLOCK_RATE)? as u64;
// Create a temporary stream for calibration
let stream = cuda_ctx.new_stream()?;
// Warm up to absorb JIT overhead
let warm_cycles = clock_rate_khz.saturating_mul(10).max(1);
Self::launch_kernel(&function, &stream, warm_cycles)?;
stream.synchronize()?;
// Run calibration loop
let desired_delay = Duration::from_millis(600);
let mut target_cycles = clock_rate_khz.saturating_mul(50).max(1); // ~50ms starting point
let mut actual_duration = Duration::ZERO;
for _ in 0..8 {
let start = Instant::now();
Self::launch_kernel(&function, &stream, target_cycles)?;
stream.synchronize()?;
actual_duration = start.elapsed();
if actual_duration >= desired_delay {
break;
}
target_cycles = target_cycles.saturating_mul(2);
}
// Calculate cycles per millisecond with conservative 20% margin
// (prefer longer sleeps over shorter)
let cycles_per_ms = if actual_duration.as_millis() > 0 {
(target_cycles as f64 / actual_duration.as_millis() as f64) * 1.2
} else {
clock_rate_khz as f64 // Fallback to clock rate
};
Ok(Self {
function,
cycles_per_ms,
})
}
/// Launch the sleep kernel with the specified number of cycles.
fn launch_kernel(
function: &cudarc::driver::CudaFunction,
stream: &Arc<CudaStream>,
cycles: u64,
) -> Result<()> {
let launch_cfg = LaunchConfig {
grid_dim: (1, 1, 1),
block_dim: (1, 1, 1),
shared_mem_bytes: 0,
};
let mut launch = stream.launch_builder(function);
unsafe {
launch.arg(&cycles);
launch.launch(launch_cfg)?;
}
Ok(())
}
/// Launch a sleep operation on the given stream.
///
/// This queues a GPU kernel that will sleep for approximately the specified
/// duration. The sleep is conservative and may take longer than requested.
///
/// # Arguments
/// * `duration` - The minimum duration to sleep
/// * `stream` - The CUDA stream to launch the kernel on
///
/// # Returns
/// Ok(()) if the kernel was successfully queued
pub fn launch(&self, duration: Duration, stream: &Arc<CudaStream>) -> Result<()> {
let target_cycles = (duration.as_millis() as f64 * self.cycles_per_ms) as u64;
Self::launch_kernel(&self.function, stream, target_cycles)
}
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Block ID validation for transfers.
//!
//! This module provides validation functions to ensure block transfers are safe and correct.
use crate::BlockId;
use super::PhysicalLayout;
use std::collections::HashSet;
use thiserror::Error;
/// Validation errors for block transfers.
#[derive(Debug, Error, PartialEq)]
pub enum BlockValidationError {
/// Destination block IDs contain duplicates.
#[error("Destination block IDs are not unique: duplicates = {duplicates:?}")]
DuplicateDestinationBlocks { duplicates: Vec<BlockId> },
/// Source and destination blocks overlap when using the same layout.
#[error("Source and destination blocks overlap (same layout): overlapping = {overlapping:?}")]
OverlappingBlocks { overlapping: Vec<BlockId> },
/// Lists have mismatched lengths.
#[error(
"Block ID lists have mismatched lengths: src={src_len}, dst={dst_len}, bounce={bounce_len:?}"
)]
LengthMismatch {
src_len: usize,
dst_len: usize,
bounce_len: Option<usize>,
},
/// Block ID is out of range for the layout.
#[error("Block ID {block_id} out of range for {layout_name} (max={max})")]
BlockOutOfRange {
block_id: BlockId,
layout_name: &'static str,
max: usize,
},
/// Bounce block IDs contain duplicates.
#[error("Bounce block IDs are not unique: duplicates = {duplicates:?}")]
DuplicateBounceBlocks { duplicates: Vec<BlockId> },
}
/// Validate that destination block IDs are unique (no duplicates).
///
/// # Arguments
/// * `dst_block_ids` - Destination block IDs
///
/// # Returns
/// Ok(()) if unique, Err with duplicate IDs otherwise
pub fn validate_dst_unique(dst_block_ids: &[BlockId]) -> Result<(), BlockValidationError> {
let mut seen = HashSet::new();
let mut duplicates = Vec::new();
for &id in dst_block_ids {
if !seen.insert(id) && !duplicates.contains(&id) {
duplicates.push(id);
}
}
if duplicates.is_empty() {
Ok(())
} else {
Err(BlockValidationError::DuplicateDestinationBlocks { duplicates })
}
}
/// Validate that bounce block IDs are unique (no duplicates).
pub fn validate_bounce_unique(bounce_block_ids: &[BlockId]) -> Result<(), BlockValidationError> {
let mut seen = HashSet::new();
let mut duplicates = Vec::new();
for &id in bounce_block_ids {
if !seen.insert(id) && !duplicates.contains(&id) {
duplicates.push(id);
}
}
if duplicates.is_empty() {
Ok(())
} else {
Err(BlockValidationError::DuplicateBounceBlocks { duplicates })
}
}
/// Check if two layouts are the same by comparing their Arc pointers.
///
/// This is a conservative check - if pointers differ, layouts might still be the same
/// but we treat them as different to avoid false positives in disjoint validation.
fn are_same_layout(layout1: &PhysicalLayout, layout2: &PhysicalLayout) -> bool {
// Compare Arc pointer addresses
std::ptr::eq(
std::sync::Arc::as_ptr(layout1.layout()),
std::sync::Arc::as_ptr(layout2.layout()),
)
}
/// Validate that src and dst block IDs are disjoint when using the same layout.
///
/// Only enforced in debug mode when src and dst point to the same layout.
///
/// # Arguments
/// * `src_block_ids` - Source block IDs
/// * `dst_block_ids` - Destination block IDs
/// * `src_layout` - Source physical layout
/// * `dst_layout` - Destination physical layout
#[cfg(debug_assertions)]
pub fn validate_disjoint_same_layout(
src_block_ids: &[BlockId],
dst_block_ids: &[BlockId],
src_layout: &PhysicalLayout,
dst_layout: &PhysicalLayout,
) -> Result<(), BlockValidationError> {
// Only check if same layout
if !are_same_layout(src_layout, dst_layout) {
return Ok(());
}
let src_set: HashSet<_> = src_block_ids.iter().copied().collect();
let overlapping: Vec<_> = dst_block_ids
.iter()
.filter(|id| src_set.contains(id))
.copied()
.collect();
if overlapping.is_empty() {
Ok(())
} else {
Err(BlockValidationError::OverlappingBlocks { overlapping })
}
}
/// Validate block IDs are in range for a layout.
#[cfg(debug_assertions)]
pub fn validate_block_ids_in_range(
block_ids: &[BlockId],
layout: &PhysicalLayout,
layout_name: &'static str,
) -> Result<(), BlockValidationError> {
let max_blocks = layout.layout().config().num_blocks;
for &block_id in block_ids {
if block_id >= max_blocks as BlockId {
return Err(BlockValidationError::BlockOutOfRange {
block_id,
layout_name,
max: max_blocks,
});
}
}
Ok(())
}
/// Full validation for block transfer (debug mode).
///
/// Validates:
/// - List lengths match
/// - Destination IDs are unique
/// - Bounce IDs are unique (if provided)
/// - Source and destination are disjoint (if same layout)
/// - All block IDs are in range for their respective layouts
#[cfg(debug_assertions)]
pub fn validate_block_transfer(
src_block_ids: &[BlockId],
dst_block_ids: &[BlockId],
bounce_block_ids: Option<&[BlockId]>,
src_layout: &PhysicalLayout,
dst_layout: &PhysicalLayout,
bounce_layout: Option<&PhysicalLayout>,
) -> Result<(), BlockValidationError> {
// Validate lengths
if src_block_ids.len() != dst_block_ids.len() {
return Err(BlockValidationError::LengthMismatch {
src_len: src_block_ids.len(),
dst_len: dst_block_ids.len(),
bounce_len: bounce_block_ids.map(|ids| ids.len()),
});
}
if let Some(bounce_ids) = bounce_block_ids
&& bounce_ids.len() != src_block_ids.len()
{
return Err(BlockValidationError::LengthMismatch {
src_len: src_block_ids.len(),
dst_len: dst_block_ids.len(),
bounce_len: Some(bounce_ids.len()),
});
}
#[cfg(debug_assertions)]
{
// Validate destination uniqueness
validate_dst_unique(dst_block_ids)?;
// Validate bounce uniqueness if provided
if let Some(bounce_ids) = bounce_block_ids {
validate_bounce_unique(bounce_ids)?;
}
// Validate disjoint if same layout
validate_disjoint_same_layout(src_block_ids, dst_block_ids, src_layout, dst_layout)?;
// Validate block IDs in range
validate_block_ids_in_range(src_block_ids, src_layout, "source")?;
validate_block_ids_in_range(dst_block_ids, dst_layout, "destination")?;
if let (Some(bounce_ids), Some(bounce_layout)) = (bounce_block_ids, bounce_layout) {
validate_block_ids_in_range(bounce_ids, bounce_layout, "bounce")?;
}
}
Ok(())
}
/// Minimal validation for block transfer (release mode).
///
/// Only validates:
/// - List lengths match
/// - Destination IDs are unique
#[cfg(not(debug_assertions))]
pub fn validate_block_transfer(
src_block_ids: &[BlockId],
dst_block_ids: &[BlockId],
bounce_block_ids: Option<&[BlockId]>,
_src_layout: &PhysicalLayout,
_dst_layout: &PhysicalLayout,
_bounce_layout: Option<&PhysicalLayout>,
) -> Result<(), BlockValidationError> {
// Validate lengths
if src_block_ids.len() != dst_block_ids.len() {
return Err(BlockValidationError::LengthMismatch {
src_len: src_block_ids.len(),
dst_len: dst_block_ids.len(),
bounce_len: bounce_block_ids.map(|ids| ids.len()),
});
}
if let Some(bounce_ids) = bounce_block_ids {
if bounce_ids.len() != src_block_ids.len() {
return Err(BlockValidationError::LengthMismatch {
src_len: src_block_ids.len(),
dst_len: dst_block_ids.len(),
bounce_len: Some(bounce_ids.len()),
});
}
}
// Validate destination uniqueness
validate_dst_unique(dst_block_ids)?;
Ok(())
}
#[cfg(all(test, feature = "testing-kvbm"))]
mod tests {
use super::super::tests::*;
use super::*;
#[test]
fn test_dst_unique_valid() {
let ids = vec![0, 1, 2, 3, 4];
assert!(validate_dst_unique(&ids).is_ok());
}
#[test]
fn test_dst_unique_duplicate() {
let ids = vec![0, 1, 2, 1, 3];
let result = validate_dst_unique(&ids);
assert!(result.is_err());
match result.unwrap_err() {
BlockValidationError::DuplicateDestinationBlocks { duplicates } => {
assert_eq!(duplicates, vec![1]);
}
_ => panic!("Wrong error type"),
}
}
#[test]
fn test_dst_unique_multiple_duplicates() {
let ids = vec![0, 1, 2, 1, 3, 2];
let result = validate_dst_unique(&ids);
assert!(result.is_err());
match result.unwrap_err() {
BlockValidationError::DuplicateDestinationBlocks { duplicates } => {
assert!(duplicates.contains(&1));
assert!(duplicates.contains(&2));
}
_ => panic!("Wrong error type"),
}
}
#[test]
#[cfg(debug_assertions)]
fn test_disjoint_same_layout_valid() {
let physical = builder(2)
.fully_contiguous()
.allocate_system()
.build()
.unwrap();
let src_ids = vec![0, 1, 2];
let dst_ids = vec![5, 6, 7];
assert!(validate_disjoint_same_layout(&src_ids, &dst_ids, &physical, &physical).is_ok());
}
#[test]
#[cfg(debug_assertions)]
fn test_disjoint_same_layout_overlap() {
let physical = builder(2)
.fully_contiguous()
.allocate_system()
.build()
.unwrap();
let src_ids = vec![0, 1, 2];
let dst_ids = vec![2, 3, 4]; // 2 overlaps
let result = validate_disjoint_same_layout(&src_ids, &dst_ids, &physical, &physical);
assert!(result.is_err());
match result.unwrap_err() {
BlockValidationError::OverlappingBlocks { overlapping } => {
assert_eq!(overlapping, vec![2]);
}
_ => panic!("Wrong error type"),
}
}
#[test]
fn test_disjoint_different_layouts_ok() {
let physical1 = builder(2)
.fully_contiguous()
.allocate_system()
.build()
.unwrap();
let physical2 = builder(2)
.fully_contiguous()
.allocate_system()
.build()
.unwrap();
let src_ids = vec![0, 1, 2];
let dst_ids = vec![0, 1, 2]; // Same IDs but different layouts
// Should be OK since different layouts
#[cfg(debug_assertions)]
assert!(validate_disjoint_same_layout(&src_ids, &dst_ids, &physical1, &physical2).is_ok());
}
#[test]
fn test_length_mismatch() {
let physical1 = builder(2)
.fully_contiguous()
.allocate_system()
.build()
.unwrap();
let physical2 = builder(2)
.fully_contiguous()
.allocate_system()
.build()
.unwrap();
let src_ids = vec![0, 1, 2];
let dst_ids = vec![5, 6]; // Different length
let result =
validate_block_transfer(&src_ids, &dst_ids, None, &physical1, &physical2, None);
assert!(result.is_err());
match result.unwrap_err() {
BlockValidationError::LengthMismatch {
src_len,
dst_len,
bounce_len,
} => {
assert_eq!(src_len, 3);
assert_eq!(dst_len, 2);
assert_eq!(bounce_len, None);
}
_ => panic!("Wrong error type"),
}
}
// #[test]
// #[cfg(debug_assertions)]
// fn test_block_out_of_range() {
// let (_layout, physical) = create_test_layout(5); // Only 5 blocks
// let src_ids = vec![0, 1, 2];
// let dst_ids = vec![3, 4, 10]; // 10 is out of range
// let result = validate_block_ids_in_range(&dst_ids, &physical, "destination");
// assert!(result.is_err());
// match result.unwrap_err() {
// BlockValidationError::BlockOutOfRange {
// block_id,
// layout_name,
// max,
// } => {
// assert_eq!(block_id, 10);
// assert_eq!(layout_name, "destination");
// assert_eq!(max, 5);
// }
// _ => panic!("Wrong error type"),
// }
// }
// #[test]
// fn test_bounce_length_mismatch() {
// let (_layout1, physical1) = create_test_layout(10);
// let (_layout2, physical2) = create_test_layout(10);
// let (_layout3, physical3) = create_test_layout(10);
// let src_ids = vec![0, 1, 2];
// let dst_ids = vec![5, 6, 7];
// let bounce_ids = vec![8, 9]; // Wrong length
// let result = validate_block_transfer(
// &src_ids,
// &dst_ids,
// Some(&bounce_ids),
// &physical1,
// &physical2,
// Some(&physical3),
// );
// assert!(result.is_err());
// match result.unwrap_err() {
// BlockValidationError::LengthMismatch {
// src_len,
// dst_len,
// bounce_len,
// } => {
// assert_eq!(src_len, 3);
// assert_eq!(dst_len, 3);
// assert_eq!(bounce_len, Some(2));
// }
// _ => panic!("Wrong error type"),
// }
// }
// #[test]
// fn test_full_validation_success() {
// let (_layout1, physical1) = create_test_layout(10);
// let (_layout2, physical2) = create_test_layout(10);
// let (_layout3, physical3) = create_test_layout(10);
// let src_ids = vec![0, 1, 2];
// let dst_ids = vec![5, 6, 7];
// let bounce_ids = vec![8, 9, 3];
// assert!(
// validate_block_transfer(
// &src_ids,
// &dst_ids,
// Some(&bounce_ids),
// &physical1,
// &physical2,
// Some(&physical3),
// )
// .is_ok()
// );
// }
}
......@@ -15,7 +15,8 @@ pub use agent::NixlAgent;
pub use config::NixlBackendConfig;
pub use nixl_sys::{
Agent, MemType, NotificationMap, OptArgs, RegistrationHandle, XferDescList, XferOp, XferRequest,
Agent, MemType, NotificationMap, OptArgs, RegistrationHandle, XferDescList, XferOp,
XferRequest, is_stub,
};
pub use serde::{Deserialize, Serialize};
......
......@@ -8,7 +8,7 @@
//! - `NixlBackendConfig`: Configuration for NIXL backends from environment variables
use anyhow::Result;
use nixl_sys::Agent;
use nixl_sys::{Agent, is_stub};
use std::collections::{HashMap, HashSet};
use crate::nixl::NixlBackendConfig;
......@@ -34,6 +34,9 @@ pub struct NixlAgent {
impl NixlAgent {
/// Create a NIXL agent without any backends.
pub fn new(name: &str) -> Result<Self> {
if is_stub() {
return Err(anyhow::anyhow!("NIXL is not supported in stub mode"));
}
let agent = Agent::new(name)?;
Ok(Self {
......
......@@ -412,9 +412,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
[[package]]
name = "chrono"
version = "0.4.43"
version = "0.4.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
dependencies = [
"iana-time-zone",
"num-traits",
......@@ -682,9 +682,9 @@ dependencies = [
[[package]]
name = "deranged"
version = "0.5.6"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4"
checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c"
dependencies = [
"powerfmt",
"serde_core",
......@@ -1687,9 +1687,9 @@ dependencies = [
[[package]]
name = "js-sys"
version = "0.3.86"
version = "0.3.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d36139f1c97c42c0c86a411910b04e48d4939a0376e6e0f989420cbdee0120e5"
checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
dependencies = [
"once_cell",
"wasm-bindgen",
......@@ -1911,20 +1911,21 @@ checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
[[package]]
name = "libredox"
version = "0.1.12"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616"
checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a"
dependencies = [
"bitflags 2.11.0",
"libc",
"redox_syscall 0.7.1",
"plain",
"redox_syscall 0.7.3",
]
[[package]]
name = "linux-raw-sys"
version = "0.11.0"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
[[package]]
name = "litemap"
......@@ -2411,18 +2412,18 @@ dependencies = [
[[package]]
name = "pin-project"
version = "1.1.10"
version = "1.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a"
checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517"
dependencies = [
"pin-project-internal",
]
[[package]]
name = "pin-project-internal"
version = "1.1.10"
version = "1.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6"
dependencies = [
"proc-macro2",
"quote",
......@@ -2431,9 +2432,9 @@ dependencies = [
[[package]]
name = "pin-project-lite"
version = "0.2.16"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
[[package]]
name = "pin-utils"
......@@ -2457,6 +2458,12 @@ version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "plain"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
[[package]]
name = "portable-atomic"
version = "1.13.1"
......@@ -2631,9 +2638,9 @@ dependencies = [
[[package]]
name = "pulldown-cmark"
version = "0.13.0"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0"
checksum = "83c41efbf8f90ac44de7f3a868f0867851d261b56291732d0cbf7cceaaeb55a6"
dependencies = [
"bitflags 2.11.0",
"memchr",
......@@ -2809,9 +2816,9 @@ dependencies = [
[[package]]
name = "redox_syscall"
version = "0.7.1"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35985aa610addc02e24fc232012c86fd11f14111180f902b67e2d5331f8ebf2b"
checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16"
dependencies = [
"bitflags 2.11.0",
]
......@@ -2861,9 +2868,9 @@ dependencies = [
[[package]]
name = "regex-syntax"
version = "0.8.9"
version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c"
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]]
name = "reqwest"
......@@ -2957,9 +2964,9 @@ dependencies = [
[[package]]
name = "rustix"
version = "1.1.3"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34"
checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
dependencies = [
"bitflags 2.11.0",
"errno",
......@@ -2970,9 +2977,9 @@ dependencies = [
[[package]]
name = "rustls"
version = "0.23.36"
version = "0.23.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b"
checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4"
dependencies = [
"aws-lc-rs",
"log",
......@@ -3483,9 +3490,9 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
[[package]]
name = "tempfile"
version = "3.25.0"
version = "3.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1"
checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0"
dependencies = [
"fastrand",
"getrandom 0.4.1",
......@@ -4141,9 +4148,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen"
version = "0.2.109"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ff9c7baef35ac3c0e17d8bfc9ad75eb62f85a2f02bccc906699dadb0aa9c622"
checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
dependencies = [
"cfg-if 1.0.4",
"once_cell",
......@@ -4154,9 +4161,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.59"
version = "0.4.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d24699cd39db9966cf6e2ef10d2f72779c961ad905911f395ea201c3ec9f545d"
checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8"
dependencies = [
"cfg-if 1.0.4",
"futures-util",
......@@ -4168,9 +4175,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.109"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39455e84ad887a0bbc93c116d72403f1bb0a39e37dd6f235a43e2128a0c7f1fd"
checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
......@@ -4178,9 +4185,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.109"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dff4761f60b0b51fd13fec8764167b7bbcc34498ce3e52805fe1db6f2d56b6d6"
checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
dependencies = [
"bumpalo",
"proc-macro2",
......@@ -4191,9 +4198,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.109"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc6a171c53d98021a93a474c4a4579d76ba97f9517d871bc12e27640f218b6dd"
checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
dependencies = [
"unicode-ident",
]
......@@ -4247,9 +4254,9 @@ dependencies = [
[[package]]
name = "web-sys"
version = "0.3.86"
version = "0.3.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "668fa5d00434e890a452ab060d24e3904d1be93f7bb01b70e5603baa2b8ab23b"
checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9"
dependencies = [
"js-sys",
"wasm-bindgen",
......@@ -4757,18 +4764,18 @@ dependencies = [
[[package]]
name = "zerocopy"
version = "0.8.39"
version = "0.8.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a"
checksum = "a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.39"
version = "0.8.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517"
checksum = "f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953"
dependencies = [
"proc-macro2",
"quote",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment