mod.rs 2.67 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

//! KV RadixTree
//!
//! This module implements a key-value (KV) store using a Radix Tree structure to efficiently manage and retrieve data blocks.
//! It is designed to support LLM (Large Language Model) inference by re-using a global KV cache.
//!
//! # Overview
//!
//! The main components of this module include:
//!
//! - **Radix Tree Structure**:
//!   - The `RadixTree` struct represents the main data structure, with nodes (`RadixBlock`) containing children and associated worker IDs.
//!   - It allows efficient storage and retrieval of data blocks based on their hashes.
//!
//! - **Event Handling**:
//!   - The `RouterEvent` struct represents events emitted by LLM workers, which can be applied to the Radix Tree to update its state.
//!   - The `KvIndexer` struct manages these events and match requests asynchronously using Tokio channels.
//!
//! - **Hash Computation**:
//!   - Functions like `compute_block_hash` and `compute_block_hash_for_seq` compute hashes for data blocks and sequences of tokens, facilitating quick lookups.
//!
//! - **Concurrency and Asynchronous Operations**:
//!   - The `KvIndexer` uses a single-threaded Tokio runtime to handle events and match requests concurrently, ensuring efficient processing without blocking.
//!
//! - **Match Requests**:
//!   - The `MatchRequest` struct represents requests to find matches in the Radix Tree, returning overlap scores indicating the best matches.
//!
//! # Purpose
//!
//! This module provides a scalable and efficient way to manage and retrieve data blocks for LLM inference, leveraging a global KV cache to optimize performance.

34
35
mod branch_sharded;

36
37
38
39
40
41
42
43
44
fn warn_on_unit_block_size(indexer_type: &'static str, kv_block_size: u32) {
    if kv_block_size == 1 {
        tracing::warn!(
            indexer_type,
            kv_block_size,
            "block_size=1 is supported for KV indexers, but consider avoiding it because KV events may saturate network bandwidth",
        );
    }
}
45
46
mod kv_indexer;
mod local;
47
mod lower_tier;
48
49
50
51
52
53
mod metrics;
mod thread_pool;
mod traits;
mod types;

pub mod concurrent_radix_tree;
54
pub mod concurrent_radix_tree_compressed;
55
56
57
58
59
60
61
62
pub mod positional;
pub mod pruning;
pub mod radix_tree;

#[cfg(test)]
mod tests;

// Re-export everything that was public in the old single-file module.
63
pub use branch_sharded::*;
64
65
pub use kv_indexer::*;
pub use local::*;
66
pub use lower_tier::*;
67
68
69
70
71
72
73
pub use metrics::*;
pub use thread_pool::*;
pub use traits::*;
pub use types::*;

// Re-export RadixTree (was `pub use crate::radix_tree::RadixTree` in old indexer.rs)
pub use radix_tree::RadixTree;