mod.rs

// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

//! Decoupled layout system for block management.
//!
//! This module provides a simplified layout abstraction that:
//! - Maps block IDs to physical memory regions (address + size)
//! - Decouples memory regions from storage type information
//! - Specifies allocation requirements without performing allocation
//! - Uses trait objects for memory ownership

pub(crate) mod builder;

mod config;
mod fully_contiguous;
mod kv_block_layout;
mod layer_separate;
mod physical;
mod serialize;
mod validation;

#[cfg(all(test, feature = "testing-kvbm"))]
pub(super) mod tests;

// #[cfg(test)]
// mod integration_tests;

pub use builder::PhysicalLayoutBuilder;
pub use config::{BlockDimension, LayoutConfig};
pub(crate) use fully_contiguous::FullyContiguousLayout;
pub use kv_block_layout::{BlockDim, KvBlockLayout};
pub(crate) use layer_separate::LayerSeparateLayout;
pub use physical::NixlMetadata;
pub use physical::PhysicalLayout;
pub(crate) use serialize::LayoutDescriptor;
pub use serialize::{BlockFormat, FullyContiguousDetails, LayerSeparateDetails, LayoutTypeDetails};

// mod registration;
// pub use registration::{RegisteredLayout, RegisteredStorageMetadata, RegistrationManager};

use anyhow::Result;
use serde::{Deserialize, Serialize};

pub(crate) use dynamo_memory::MemoryDescriptor;
pub use dynamo_memory::{Buffer, MemoryRegion};

/// Core layout trait for mapping block IDs to memory regions.
///
/// Layouts specify how KV cache blocks are organized in memory without
/// performing allocation themselves. They provide:
/// - Memory region lookup for specific blocks
/// - Allocation requirements for external allocators
/// - Metadata about block organization
pub trait Layout: Send + Sync + std::fmt::Debug {
    /// Get the configuration for this layout.
    fn config(&self) -> &LayoutConfig;

    /// Get the root memory regions backing this layout.
    ///
    /// These regions correspond to the concrete allocations that store the layout's data.
    /// Implementations that derive memory procedurally can return an empty slice.
    fn memory_regions(&self) -> &[Buffer];

    /// Get memory regions for a specific block_id, layer_id, outer_id.
    ///
    /// Returns a [MemoryRegion] for the continuous region specified by the given block_id,
    /// layer_id, outer_id.
    ///
    /// # Arguments
    /// * `block_id` - The ID of the block to query (0..num_blocks)
    /// * `layer_id` - The ID of the layer to query (0..num_layers)
    /// * `outer_id` - The ID of the outer dimension to query (0..outer_dim)
    fn memory_region(
        &self,
        block_id: usize,
        layer_id: usize,
        outer_id: usize,
    ) -> Result<MemoryRegion>;

    /// Get the allocation requirements for this layout.
    ///
    /// Returns a vector of allocation sizes needed to back this layout.
    /// For fully contiguous layouts, this will be a single size.
    /// For layer-separate layouts, this will contain one size per layer.
    ///
    /// # Returns
    /// Vector of allocation sizes in bytes.
    fn required_allocations(&self) -> Vec<usize>;

    /// Check if this layout uses fully contiguous memory.
    ///
    /// Fully contiguous layouts have all blocks in a single allocation,
    /// which enables certain optimizations.
    fn is_fully_contiguous(&self) -> bool;

    /// Get the total number of blocks in this layout.
    fn num_blocks(&self) -> usize;

    /// Get the number of layers per block.
    fn num_layers(&self) -> usize;

    /// Get the outer dimension size.
    ///
    /// In typical KV cache layouts, this is often 2 (for K and V),
    /// but can be 1 for architectures like MLA.
    fn outer_dim(&self) -> usize;

    /// Get the page size (often corresponds to block size in tokens).
    fn page_size(&self) -> usize;

    /// Get the inner dimension size.
    ///
    /// This is typically the hidden size divided by tensor parallel size.
    fn inner_dim(&self) -> usize;

    /// Get the data type width in bytes.
    fn dtype_width_bytes(&self) -> usize;

    /// Get serialization details for this layout type.
    ///
    /// This provides the layout-type-specific information needed to serialize
    /// and reconstruct the layout on a remote node.
    fn serialization_details(&self) -> serialize::LayoutTypeDetails;

    /// Get the KV block layout describing how dimensions are permuted within blocks.
    ///
    /// Returns the internal tensor ordering for blocks in this layout.
    /// For layer-separate layouts, this describes the inner tensor format.
    /// For fully contiguous layouts, this describes the full block format.
    fn block_layout(&self) -> KvBlockLayout;
}

/// Inner shape format for tensor layout
#[allow(clippy::upper_case_acronyms)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub(crate) enum InnerShape {
    /// Unknown shape - fallback when we can't determine the format
    Unknown,
    /// NHD format: [block_size, num_heads, head_dim]
    /// Common for attention layers where N=tokens, H=heads, D=dimension
    NHD,
    /// HND format: [num_heads, block_size, head_dim]
    /// Alternative layout with heads first
    HND,
}

/// Trait for layouts that provide contiguous per-block memory regions.
///
/// This trait enables direct access to entire blocks as contiguous memory,
/// without requiring layer/outer indexing. It is implemented by
/// [`FullyContiguousLayout`] but NOT by [`LayerSeparateLayout`] (which
/// stores each layer separately).
///
/// Use this trait when you need to:
/// - Access raw block memory for transformation kernels
/// - Reinterpret block memory under different [`KvBlockLayout`] formats
/// - Perform whole-block operations without layer decomposition
pub trait ContiguousBlockLayout: Send + Sync + std::fmt::Debug {
    /// Get the total number of blocks in this layout.
    fn num_blocks(&self) -> usize;

    /// Get the size of each block in bytes.
    fn bytes_per_block(&self) -> usize;

    /// Get the contiguous memory region for a specific block.
    ///
    /// # Arguments
    /// * `block_id` - The ID of the block to query (0..num_blocks)
    ///
    /// # Returns
    /// A [`MemoryRegion`] covering the entire block's memory.
    ///
    /// # Errors
    /// Returns an error if `block_id` is out of range.
    fn raw_block(&self, block_id: usize) -> Result<MemoryRegion>;

    /// Get the KV block layout for this contiguous layout.
    fn block_layout(&self) -> KvBlockLayout;
}