lib.rs 2.38 KB
Newer Older
1
2
3
4
5
6
7
8
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

//! KV Router - Radix tree data structures for LLM KV cache routing.
//!
//! This crate provides the core radix tree implementation and protocols for
//! efficient KV cache lookup and routing in distributed LLM inference systems.

9
mod active_set;
10
pub(crate) mod cleanup;
11

12
13
pub mod indexer;
pub mod protocols;
14
pub mod recovery;
15
16
pub mod scheduling;
pub mod sequences;
17
pub mod zmq_wire;
18

19
20
// Backward-compat re-exports: old top-level module paths still work
pub use indexer::concurrent_radix_tree;
21
pub use indexer::concurrent_radix_tree_compressed;
22
23
24
25
pub use indexer::positional as nested_map;
pub use indexer::pruning as approx;
pub use indexer::radix_tree;

26
27
28
29
30
pub use scheduling::config;
pub use scheduling::queue;
pub use scheduling::selector;
pub use sequences::multi_worker as multi_worker_sequence;
pub use sequences::single as sequence;
31

32
33
34
#[cfg(feature = "standalone-indexer")]
pub mod standalone_indexer;

35
36
37
#[cfg(feature = "standalone-indexer")]
pub mod standalone_shared_cache;

38
39
#[cfg(any(test, feature = "bench"))]
pub mod test_utils;
Yan Ru Pei's avatar
Yan Ru Pei committed
40

41
// Re-export key types for convenience
42
pub use self::multi_worker_sequence::{
43
44
45
    ActiveSequencesMultiWorker, SequenceError, SequencePublisher, SequenceRequest,
    SequenceSubscriber,
};
46
47
pub use self::sequence::{ActiveSequences, RequestId};
pub use concurrent_radix_tree::ConcurrentRadixTree;
48
pub use concurrent_radix_tree_compressed::ConcurrentRadixTreeCompressed;
49
50
51
52
53
pub use config::{
    KvRouterConfig, RouterConfigOverride, RouterPrefillLoadModel, RouterQueuePolicy,
    SharedCacheType,
};
pub use indexer::{
54
55
    BranchShardedIndexer, LowerTierContinuation, LowerTierIndexer, MaybeError, SharedKvCache,
    SyncIndexer, ThreadPoolIndexer,
56
};
Yan Ru Pei's avatar
Yan Ru Pei committed
57
pub use nested_map::PositionalIndexer;
58
pub use protocols::{
59
    KvCacheEventError, LocalBlockHash, OverlapScores, RouterEvent, RouterEventSink,
60
    SharedCacheHits, WorkerConfigLike, WorkerId, compute_block_hash_for_seq,
61
};
62
pub use queue::SchedulerQueue;
63
pub use radix_tree::RadixTree;
64
pub use scheduling::LocalScheduler;
65
pub use scheduling::PrefillLoadEstimator;
66
pub use scheduling::policy::{FcfsPolicy, RouterSchedulingPolicy, SchedulingPolicy, WsptPolicy};
67
68
pub use scheduling::{KvSchedulerError, PotentialLoad, SchedulingRequest, SchedulingResponse};
pub use selector::{DefaultWorkerSelector, WorkerSelector};