Unverified Commit 36b4208e authored by Yongming Ding's avatar Yongming Ding Committed by GitHub
Browse files

refactor(mocker): replace vllm block manager with kvbm-logical (#8451)


Signed-off-by: default avatarYongming Ding <yongmingd@nvidia.com>
Co-authored-by: default avatarRyan Olson <rolson@nvidia.com>
parent 1dc0975b
...@@ -2511,6 +2511,7 @@ dependencies = [ ...@@ -2511,6 +2511,7 @@ dependencies = [
"dynamo-kv-router", "dynamo-kv-router",
"dynamo-tokens", "dynamo-tokens",
"indicatif 0.18.4", "indicatif 0.18.4",
"kvbm-logical",
"ndarray 0.16.1", "ndarray 0.16.1",
"ndarray-interp", "ndarray-interp",
"ndarray-npy", "ndarray-npy",
......
...@@ -1634,6 +1634,7 @@ dependencies = [ ...@@ -1634,6 +1634,7 @@ dependencies = [
"dynamo-kv-router", "dynamo-kv-router",
"dynamo-tokens", "dynamo-tokens",
"indicatif 0.18.4", "indicatif 0.18.4",
"kvbm-logical",
"ndarray", "ndarray",
"ndarray-interp", "ndarray-interp",
"ndarray-npy", "ndarray-npy",
...@@ -1714,7 +1715,7 @@ dependencies = [ ...@@ -1714,7 +1715,7 @@ dependencies = [
"libc", "libc",
"local-ip-address", "local-ip-address",
"log", "log",
"lru", "lru 0.12.5",
"mio 1.1.1", "mio 1.1.1",
"notify", "notify",
"nuid", "nuid",
...@@ -2112,6 +2113,12 @@ version = "0.1.5" ...@@ -2112,6 +2113,12 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "foldhash"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
[[package]] [[package]]
name = "form_urlencoded" name = "form_urlencoded"
version = "1.2.2" version = "1.2.2"
...@@ -2396,7 +2403,18 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" ...@@ -2396,7 +2403,18 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [ dependencies = [
"allocator-api2", "allocator-api2",
"equivalent", "equivalent",
"foldhash", "foldhash 0.1.5",
]
[[package]]
name = "hashbrown"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash 0.2.0",
] ]
[[package]] [[package]]
...@@ -3228,6 +3246,31 @@ dependencies = [ ...@@ -3228,6 +3246,31 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "kvbm-logical"
version = "1.1.0"
dependencies = [
"anyhow",
"async-stream",
"bincode 2.0.1",
"bytes",
"derive_builder",
"dynamo-tokens",
"futures",
"indexmap 2.14.0",
"lru 0.16.4",
"parking_lot",
"prometheus",
"rmp-serde",
"serde",
"serde_json",
"thiserror 2.0.18",
"tokio",
"tokio-stream",
"tracing",
"xxhash-rust",
]
[[package]] [[package]]
name = "kvbm-py3" name = "kvbm-py3"
version = "1.1.0" version = "1.1.0"
...@@ -3389,6 +3432,15 @@ dependencies = [ ...@@ -3389,6 +3432,15 @@ dependencies = [
"hashbrown 0.15.5", "hashbrown 0.15.5",
] ]
[[package]]
name = "lru"
version = "0.16.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39"
dependencies = [
"hashbrown 0.16.1",
]
[[package]] [[package]]
name = "lru-slab" name = "lru-slab"
version = "0.1.2" version = "0.1.2"
...@@ -6628,6 +6680,7 @@ dependencies = [ ...@@ -6628,6 +6680,7 @@ dependencies = [
"futures-core", "futures-core",
"pin-project-lite", "pin-project-lite",
"tokio", "tokio",
"tokio-util",
] ]
[[package]] [[package]]
......
...@@ -1649,6 +1649,7 @@ dependencies = [ ...@@ -1649,6 +1649,7 @@ dependencies = [
"dynamo-kv-router", "dynamo-kv-router",
"dynamo-tokens", "dynamo-tokens",
"indicatif 0.18.4", "indicatif 0.18.4",
"kvbm-logical",
"ndarray", "ndarray",
"ndarray-interp", "ndarray-interp",
"ndarray-npy", "ndarray-npy",
...@@ -1761,7 +1762,7 @@ dependencies = [ ...@@ -1761,7 +1762,7 @@ dependencies = [
"libc", "libc",
"local-ip-address", "local-ip-address",
"log", "log",
"lru", "lru 0.12.5",
"mio 1.1.1", "mio 1.1.1",
"notify", "notify",
"nuid", "nuid",
...@@ -2184,6 +2185,12 @@ version = "0.1.5" ...@@ -2184,6 +2185,12 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "foldhash"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
[[package]] [[package]]
name = "form_urlencoded" name = "form_urlencoded"
version = "1.2.2" version = "1.2.2"
...@@ -2468,7 +2475,18 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" ...@@ -2468,7 +2475,18 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [ dependencies = [
"allocator-api2", "allocator-api2",
"equivalent", "equivalent",
"foldhash", "foldhash 0.1.5",
]
[[package]]
name = "hashbrown"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash 0.2.0",
] ]
[[package]] [[package]]
...@@ -3300,6 +3318,31 @@ dependencies = [ ...@@ -3300,6 +3318,31 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "kvbm-logical"
version = "1.1.0"
dependencies = [
"anyhow",
"async-stream",
"bincode 2.0.1",
"bytes",
"derive_builder",
"dynamo-tokens",
"futures",
"indexmap 2.14.0",
"lru 0.16.4",
"parking_lot",
"prometheus",
"rmp-serde",
"serde",
"serde_json",
"thiserror 2.0.18",
"tokio",
"tokio-stream",
"tracing",
"xxhash-rust",
]
[[package]] [[package]]
name = "lalrpop-util" name = "lalrpop-util"
version = "0.20.2" version = "0.20.2"
...@@ -3440,6 +3483,15 @@ dependencies = [ ...@@ -3440,6 +3483,15 @@ dependencies = [
"hashbrown 0.15.5", "hashbrown 0.15.5",
] ]
[[package]]
name = "lru"
version = "0.16.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39"
dependencies = [
"hashbrown 0.16.1",
]
[[package]] [[package]]
name = "lru-slab" name = "lru-slab"
version = "0.1.2" version = "0.1.2"
...@@ -6698,6 +6750,7 @@ dependencies = [ ...@@ -6698,6 +6750,7 @@ dependencies = [
"futures-core", "futures-core",
"pin-project-lite", "pin-project-lite",
"tokio", "tokio",
"tokio-util",
] ]
[[package]] [[package]]
......
...@@ -78,6 +78,20 @@ impl<T: BlockMetadata> BlockManager<T> { ...@@ -78,6 +78,20 @@ impl<T: BlockMetadata> BlockManager<T> {
/// ///
/// Returns `None` if fewer than `count` blocks are available across both pools. /// Returns `None` if fewer than `count` blocks are available across both pools.
pub fn allocate_blocks(&self, count: usize) -> Option<Vec<MutableBlock<T>>> { pub fn allocate_blocks(&self, count: usize) -> Option<Vec<MutableBlock<T>>> {
self.allocate_blocks_with_evictions(count)
.map(|(blocks, _evicted)| blocks)
}
/// Like [`allocate_blocks`](Self::allocate_blocks) but also reports the
/// [`SequenceHash`] of each block evicted from the inactive pool to
/// satisfy the allocation. Callers maintaining a shadow view of which
/// registrations are alive (e.g. the mocker's router-event bridge) can
/// translate these hashes into cache-invalidation events directly,
/// avoiding an O(N) presence scan over the registry.
pub fn allocate_blocks_with_evictions(
&self,
count: usize,
) -> Option<(Vec<MutableBlock<T>>, Vec<SequenceHash>)> {
let _guard = self.allocate_mutex.lock(); let _guard = self.allocate_mutex.lock();
let from_reset = self.reset_pool.allocate_blocks(count); let from_reset = self.reset_pool.allocate_blocks(count);
let from_reset_count = from_reset.len(); let from_reset_count = from_reset.len();
...@@ -85,7 +99,7 @@ impl<T: BlockMetadata> BlockManager<T> { ...@@ -85,7 +99,7 @@ impl<T: BlockMetadata> BlockManager<T> {
let remaining_needed = count - blocks.len(); let remaining_needed = count - blocks.len();
match self.inactive_pool.allocate_blocks(remaining_needed) { match self.inactive_pool.allocate_blocks(remaining_needed) {
Some(remaining) => { Some((remaining, evicted)) => {
let eviction_count = remaining.len() as u64; let eviction_count = remaining.len() as u64;
blocks.extend(remaining); blocks.extend(remaining);
...@@ -94,7 +108,7 @@ impl<T: BlockMetadata> BlockManager<T> { ...@@ -94,7 +108,7 @@ impl<T: BlockMetadata> BlockManager<T> {
.inc_allocations_from_reset(from_reset_count as u64); .inc_allocations_from_reset(from_reset_count as u64);
self.metrics.inc_evictions(eviction_count); self.metrics.inc_evictions(eviction_count);
Some(blocks) Some((blocks, evicted))
} }
None => None, None => None,
} }
......
...@@ -181,10 +181,18 @@ impl<T: BlockMetadata + Sync> InactivePool<T> { ...@@ -181,10 +181,18 @@ impl<T: BlockMetadata + Sync> InactivePool<T> {
.collect() .collect()
} }
/// Allocate blocks from registered pool, converting them to MutableBlocks for ResetPool /// Allocate blocks from registered pool, converting them to
pub(crate) fn allocate_blocks(&self, count: usize) -> Option<Vec<MutableBlock<T>>> { /// [`MutableBlock`]s for the [`ResetPool`]. Also reports the
/// [`SequenceHash`] of each evicted block so upstream layers can
/// propagate cache-invalidation events without a secondary presence scan.
///
/// Returns `None` if fewer than `count` evictable blocks are available.
pub(crate) fn allocate_blocks(
&self,
count: usize,
) -> Option<(Vec<MutableBlock<T>>, Vec<SequenceHash>)> {
if count == 0 { if count == 0 {
return Some(Vec::new()); return Some((Vec::new(), Vec::new()));
} }
let mut inner = self.inner.write(); let mut inner = self.inner.write();
...@@ -202,15 +210,19 @@ impl<T: BlockMetadata + Sync> InactivePool<T> { ...@@ -202,15 +210,19 @@ impl<T: BlockMetadata + Sync> InactivePool<T> {
} }
} }
let mut mutable_blocks = Vec::with_capacity(count); let mut mutable_blocks = Vec::with_capacity(count);
mutable_blocks.extend(allocated_blocks.into_iter().map(|registered_block| { let mut evicted = Vec::with_capacity(count);
for registered_block in allocated_blocks {
// Capture the identity BEFORE `reset()` drops the
// registration handle and marks the block absent.
evicted.push(registered_block.sequence_hash());
let reset_block = registered_block.reset(); let reset_block = registered_block.reset();
MutableBlock::new( mutable_blocks.push(MutableBlock::new(
reset_block, reset_block,
self.reset_return_fn.clone(), self.reset_return_fn.clone(),
self.metrics.clone(), self.metrics.clone(),
) ));
})); }
Some(mutable_blocks) Some((mutable_blocks, evicted))
} else { } else {
for block in allocated_blocks { for block in allocated_blocks {
inner.backend.insert(block); inner.backend.insert(block);
...@@ -395,17 +407,27 @@ mod tests { ...@@ -395,17 +407,27 @@ mod tests {
fn test_allocate_blocks() { fn test_allocate_blocks() {
let (pool, reset_pool) = create_test_pool(); let (pool, reset_pool) = create_test_pool();
let (block1, _) = create_registered_block::<TestMeta>(1, &tokens_for_id(1)); let (block1, seq_hash1) = create_registered_block::<TestMeta>(1, &tokens_for_id(1));
let (block2, _) = create_registered_block::<TestMeta>(2, &tokens_for_id(2)); let (block2, seq_hash2) = create_registered_block::<TestMeta>(2, &tokens_for_id(2));
let (block3, _) = create_registered_block::<TestMeta>(3, &tokens_for_id(3)); let (block3, seq_hash3) = create_registered_block::<TestMeta>(3, &tokens_for_id(3));
pool.insert(block1); pool.insert(block1);
pool.insert(block2); pool.insert(block2);
pool.insert(block3); pool.insert(block3);
assert_eq!(pool.len(), 3); assert_eq!(pool.len(), 3);
let mutable_blocks = pool.allocate_blocks(1).expect("Should allocate 1 block"); let (mutable_blocks, evicted) = pool.allocate_blocks(1).expect("Should allocate 1 block");
assert_eq!(mutable_blocks.len(), 1); assert_eq!(mutable_blocks.len(), 1);
assert_eq!(
evicted.len(),
1,
"one sequence hash should be reported as evicted"
);
assert!(
[seq_hash1, seq_hash2, seq_hash3].contains(&evicted[0]),
"evicted hash must match one of the inserted blocks; got {:?}",
evicted[0]
);
assert_eq!(pool.len(), 2); assert_eq!(pool.len(), 2);
drop(mutable_blocks); drop(mutable_blocks);
...@@ -414,6 +436,35 @@ mod tests { ...@@ -414,6 +436,35 @@ mod tests {
assert_eq!(reset_pool.available_blocks(), 11); assert_eq!(reset_pool.available_blocks(), 11);
} }
/// Sanity: asking for multiple evictions returns that many distinct hashes,
/// each matching an inserted block.
#[test]
fn test_allocate_blocks_reports_all_evicted_hashes() {
let (pool, _reset_pool) = create_test_pool();
let (block1, seq_hash1) = create_registered_block::<TestMeta>(1, &tokens_for_id(1));
let (block2, seq_hash2) = create_registered_block::<TestMeta>(2, &tokens_for_id(2));
let (block3, seq_hash3) = create_registered_block::<TestMeta>(3, &tokens_for_id(3));
pool.insert(block1);
pool.insert(block2);
pool.insert(block3);
let inserted = [seq_hash1, seq_hash2, seq_hash3];
let (mutable_blocks, evicted) = pool
.allocate_blocks(3)
.expect("Should allocate all three blocks");
assert_eq!(mutable_blocks.len(), 3);
assert_eq!(evicted.len(), 3);
for h in &evicted {
assert!(
inserted.contains(h),
"evicted hash {h:?} not in inserted set"
);
}
let unique: std::collections::HashSet<_> = evicted.iter().copied().collect();
assert_eq!(unique.len(), 3, "evicted hashes must all be distinct");
}
#[test] #[test]
fn test_allocate_more_than_available_fails() { fn test_allocate_more_than_available_fails() {
let (pool, _reset_pool) = create_test_pool(); let (pool, _reset_pool) = create_test_pool();
......
...@@ -16,6 +16,7 @@ readme = "README.md" ...@@ -16,6 +16,7 @@ readme = "README.md"
# repo # repo
dynamo-kv-router = { workspace = true } dynamo-kv-router = { workspace = true }
dynamo-tokens = { workspace = true } dynamo-tokens = { workspace = true }
kvbm-logical = { workspace = true }
# workspace # workspace
anyhow = { workspace = true } anyhow = { workspace = true }
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use crate::common::evictor::LRUEvictor;
use dynamo_tokens::blocks::UniqueBlock;
use rustc_hash::FxHashMap;
/// Hash-based KV cache with O(1) block lookups, maintaining active (ref-counted) and
/// inactive (LRU-evictable) pools.
pub struct HashCache {
active_blocks: FxHashMap<UniqueBlock, usize>,
inactive_blocks: LRUEvictor<UniqueBlock>,
max_capacity: usize,
}
impl HashCache {
/// Create a new HashCache with the given maximum block capacity.
pub fn new(max_capacity: usize) -> Self {
Self {
active_blocks: FxHashMap::default(),
inactive_blocks: LRUEvictor::default(),
max_capacity,
}
}
/// Get the reference count of an active block, if it exists.
pub fn get_active_ref_count(&self, block: &UniqueBlock) -> Option<usize> {
self.active_blocks.get(block).copied()
}
/// Increment the reference count of an active block. Returns the new count.
pub fn increment_ref(&mut self, block: &UniqueBlock) -> usize {
let ref_count = self
.active_blocks
.get_mut(block)
.expect("block must be active to increment ref");
*ref_count += 1;
*ref_count
}
/// Decrement the reference count of an active block. Returns the new count.
pub fn decrement_ref(&mut self, block: &UniqueBlock) -> usize {
let ref_count = self
.active_blocks
.get_mut(block)
.expect("block must be active to decrement ref");
*ref_count -= 1;
*ref_count
}
/// Insert a block into the active pool with the given reference count.
pub fn insert_active(&mut self, block: UniqueBlock, ref_count: usize) {
self.active_blocks.insert(block, ref_count);
}
/// Remove a block from the active pool. Returns the reference count, or None if not found.
pub fn remove_active(&mut self, block: &UniqueBlock) -> Option<usize> {
self.active_blocks.remove(block)
}
/// Check if a block is in the active pool.
pub fn contains_active(&self, block: &UniqueBlock) -> bool {
self.active_blocks.contains_key(block)
}
/// Insert a block into the inactive pool (LRU order).
pub fn insert_inactive(&mut self, block: UniqueBlock) {
self.inactive_blocks.insert(block);
}
/// Remove a block from the inactive pool. Returns true if it was found.
pub fn remove_inactive(&mut self, block: &UniqueBlock) -> bool {
self.inactive_blocks.remove(block)
}
/// Evict the least-recently-used block from the inactive pool.
pub fn evict_inactive(&mut self) -> Option<UniqueBlock> {
self.inactive_blocks.evict()
}
/// Check if a block is in the inactive pool.
pub fn contains_inactive(&self, block: &UniqueBlock) -> bool {
self.inactive_blocks.contains(block)
}
/// Check if a block exists in either active or inactive pool.
pub fn contains(&self, block: &UniqueBlock) -> bool {
self.active_blocks.contains_key(block) || self.inactive_blocks.contains(block)
}
/// Move block from active to inactive (ref_count reached 0).
pub fn deactivate(&mut self, block: &UniqueBlock) {
debug_assert!(
self.active_blocks.contains_key(block),
"deactivate called on non-active block"
);
debug_assert!(
!self.inactive_blocks.contains(block),
"deactivate called on already-inactive block"
);
self.active_blocks.remove(block);
self.inactive_blocks.insert(block.clone());
}
/// Move block from inactive to active with ref_count=1. Returns true if found.
pub fn reactivate(&mut self, block: &UniqueBlock) -> bool {
if self.inactive_blocks.remove(block) {
self.active_blocks.insert(block.clone(), 1);
true
} else {
false
}
}
/// Check if total blocks (active + inactive) has reached max_capacity.
pub fn is_at_capacity(&self) -> bool {
self.active_blocks.len() + self.inactive_blocks.len() >= self.max_capacity
}
/// Get the number of active blocks.
pub fn num_active(&self) -> usize {
self.active_blocks.len()
}
/// Get the number of inactive blocks.
pub fn num_inactive(&self) -> usize {
self.inactive_blocks.len()
}
/// Get the maximum block capacity.
pub fn max_capacity(&self) -> usize {
self.max_capacity
}
/// Get the current capacity (active + inactive blocks).
pub fn current_capacity(&self) -> usize {
self.active_blocks.len() + self.inactive_blocks.len()
}
/// Iterate over active block keys.
pub fn active_keys(&self) -> impl Iterator<Item = &UniqueBlock> {
self.active_blocks.keys()
}
/// Iterate over inactive block keys.
pub fn inactive_keys(&self) -> impl Iterator<Item = &UniqueBlock> {
self.inactive_blocks.keys()
}
/// Direct access to active blocks map (for tests that check ref counts).
pub fn active_blocks(&self) -> &FxHashMap<UniqueBlock, usize> {
&self.active_blocks
}
}
...@@ -3,8 +3,6 @@ ...@@ -3,8 +3,6 @@
//! Cache data structures for KV block management. //! Cache data structures for KV block management.
pub mod hash_cache;
pub mod radix_cache; pub mod radix_cache;
pub use hash_cache::HashCache;
pub use radix_cache::RadixCache; pub use radix_cache::RadixCache;
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use std::cmp::{Eq, Ordering};
use std::collections::BTreeSet;
use std::hash::Hash;
use rustc_hash::FxHashMap;
/// A wrapper for (T, counter) that implements Ord based only on counter
#[derive(Debug, Clone, Eq, PartialEq)]
struct PriorityItem<T> {
item: T,
counter: i64,
}
impl<T: Eq> Ord for PriorityItem<T> {
fn cmp(&self, other: &Self) -> Ordering {
self.counter.cmp(&other.counter)
}
}
impl<T: Eq> PartialOrd for PriorityItem<T> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
/// An LRU evictor that maintains objects and evicts them based on their
/// priority counter. Lower counter values are evicted first.
#[derive(Debug)]
pub struct LRUEvictor<T: Clone + Eq + Hash> {
free_table: FxHashMap<T, i64>,
priority_queue: BTreeSet<PriorityItem<T>>,
positive_counter: i64,
negative_counter: i64,
}
impl<T: Clone + Eq + Hash> Default for LRUEvictor<T> {
fn default() -> Self {
Self {
free_table: FxHashMap::default(),
priority_queue: BTreeSet::new(),
positive_counter: 0,
negative_counter: 0,
}
}
}
impl<T: Clone + Eq + Hash> LRUEvictor<T> {
pub fn new(_cleanup_threshold: usize) -> Self {
Self::default()
}
pub fn keys(&self) -> std::collections::hash_map::Keys<'_, T, i64> {
self.free_table.keys()
}
fn update(&mut self, object: T, counter: i64) {
self.free_table.insert(object.clone(), counter);
self.priority_queue.insert(PriorityItem {
item: object,
counter,
});
}
pub fn insert(&mut self, object: T) {
// Remove old entry if it exists
if let Some(&old_counter) = self.free_table.get(&object) {
self.priority_queue.remove(&PriorityItem {
item: object.clone(),
counter: old_counter,
});
}
// Increment positive counter and insert
self.positive_counter += 1;
let counter = self.positive_counter;
self.update(object, counter);
}
/// Push an object to the front with negative counter (highest priority for eviction)
pub fn push_front(&mut self, object: T) {
// Remove old entry if it exists
if let Some(&old_counter) = self.free_table.get(&object) {
self.priority_queue.remove(&PriorityItem {
item: object.clone(),
counter: old_counter,
});
}
// Decrement negative counter and insert
self.negative_counter -= 1;
let counter = self.negative_counter;
self.update(object, counter);
}
pub fn contains(&self, object: &T) -> bool {
self.free_table.contains_key(object)
}
/// Evict an object based on LRU policy (lowest counter value)
/// Returns the evicted object or None if no objects are available
pub fn evict(&mut self) -> Option<T> {
self.priority_queue.pop_first().map(|item| {
self.free_table.remove(&item.item);
item.item
})
}
pub fn remove(&mut self, object: &T) -> bool {
let Some(&counter) = self.free_table.get(object) else {
return false;
};
self.free_table.remove(object);
self.priority_queue.remove(&PriorityItem {
item: object.clone(),
counter,
});
true
}
pub fn len(&self) -> usize {
self.free_table.len()
}
pub fn is_empty(&self) -> bool {
self.free_table.is_empty()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lru_evictor_eviction_order() {
// Create a new LRUEvictor
let mut evictor = LRUEvictor::<i32>::new(1); // threshold value doesn't matter anymore
// Add items in the specified order
evictor.insert(4);
evictor.insert(3);
evictor.insert(2);
evictor.insert(1);
evictor.insert(5);
evictor.insert(1); // Updates counter for 1
evictor.insert(4); // Updates counter for 4
evictor.insert(2); // Updates counter for 2
evictor.push_front(4);
// Verify the eviction order
let evicted = evictor.evict().unwrap();
assert_eq!(evicted, 4);
let evicted = evictor.evict().unwrap();
assert_eq!(evicted, 3);
let evicted = evictor.evict().unwrap();
assert_eq!(evicted, 5);
let evicted = evictor.evict().unwrap();
assert_eq!(evicted, 1);
let evicted = evictor.evict().unwrap();
assert_eq!(evicted, 2);
let evicted = evictor.evict();
assert_eq!(evicted, None);
assert_eq!(evictor.len(), 0);
}
// ... existing test_push_front test ...
}
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
//! Shared components used across all engine implementations. //! Shared components used across all engine implementations.
pub mod bootstrap; pub mod bootstrap;
pub mod evictor;
pub mod kv_cache_trace; pub mod kv_cache_trace;
pub mod perf_model; pub mod perf_model;
pub mod protocols; pub mod protocols;
......
...@@ -13,7 +13,24 @@ use validator::Validate; ...@@ -13,7 +13,24 @@ use validator::Validate;
use crate::common::perf_model::PerfModel; use crate::common::perf_model::PerfModel;
use dynamo_kv_router::protocols::KvCacheEvent; use dynamo_kv_router::protocols::KvCacheEvent;
use dynamo_tokens::blocks::UniqueBlock; use dynamo_tokens::blocks::UniqueBlock;
use dynamo_tokens::{BlockHash, SequenceHash, Token}; use dynamo_tokens::{BlockHash, PositionalLineageHash, SequenceHash, Token};
/// Metadata marker type for kvbm-logical blocks in the mocker's G1 pool.
#[derive(Clone, Debug)]
pub struct G1;
/// Eviction strategy for the kvbm-logical inactive pool.
///
/// `Lineage` is the default and matches kvbm-logical's own default — it evicts
/// leaf blocks first, which subsumes the preemption-priority behaviour that the
/// mocker's old `LRUEvictor::push_front` provided.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
pub enum MockerEvictionBackend {
Lru,
MultiLru,
#[default]
Lineage,
}
/// Trait for publishing KV cache events. /// Trait for publishing KV cache events.
/// This abstracts the runtime dependency so mocker components can remain generic. /// This abstracts the runtime dependency so mocker components can remain generic.
...@@ -142,12 +159,20 @@ pub enum MoveBlock { ...@@ -142,12 +159,20 @@ pub enum MoveBlock {
Use( Use(
Vec<UniqueBlock>, Vec<UniqueBlock>,
Vec<BlockHash>, Vec<BlockHash>,
Vec<PositionalLineageHash>,
Option<Vec<Vec<u32>>>, Option<Vec<Vec<u32>>>,
Option<UniqueBlock>, Option<UniqueBlock>,
), ),
Destroy(Vec<UniqueBlock>), Destroy(Vec<UniqueBlock>),
Deref(Vec<UniqueBlock>), Deref(Vec<UniqueBlock>),
Promote(Uuid, SequenceHash, Option<u64>, BlockHash, Option<Vec<u32>>), Promote(
Uuid,
SequenceHash,
Option<u64>,
BlockHash,
PositionalLineageHash,
Option<Vec<u32>>,
),
} }
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
......
...@@ -4,35 +4,41 @@ ...@@ -4,35 +4,41 @@
use crate::common::protocols::MoveBlock; use crate::common::protocols::MoveBlock;
use derive_getters::Getters; use derive_getters::Getters;
use dynamo_tokens::blocks::UniqueBlock; use dynamo_tokens::blocks::UniqueBlock;
use dynamo_tokens::{TokenBlockSequence, Tokens}; use dynamo_tokens::{PositionalLineageHash, TokenBlockSequence, Tokens};
use rand::random; use rand::random;
use validator::Validate; use validator::Validate;
/// Create unique blocks and block hashes from a TokenBlockSequence. /// Create unique blocks, block hashes, and positional-lineage hashes from a
/// [`TokenBlockSequence`].
fn create_sequence_cache( fn create_sequence_cache(
tokens: &TokenBlockSequence, tokens: &TokenBlockSequence,
block_size: usize, block_size: usize,
enable_prefix_caching: bool, enable_prefix_caching: bool,
) -> (Vec<UniqueBlock>, Vec<u64>) { ) -> (Vec<UniqueBlock>, Vec<u64>, Vec<PositionalLineageHash>) {
let mut unique_blocks = Vec::with_capacity(tokens.blocks().len() + 1); let mut unique_blocks = Vec::with_capacity(tokens.blocks().len() + 1);
let mut block_hashes = Vec::with_capacity(tokens.blocks().len()); let mut block_hashes = Vec::with_capacity(tokens.blocks().len());
let mut plhs = Vec::with_capacity(tokens.blocks().len());
for block in tokens.blocks() { for (pos, block) in tokens.blocks().iter().enumerate() {
block_hashes.push(block.block_hash()); block_hashes.push(block.block_hash());
unique_blocks.push({ if enable_prefix_caching {
if enable_prefix_caching { unique_blocks.push(UniqueBlock::FullBlock(block.sequence_hash()));
UniqueBlock::FullBlock(block.sequence_hash()) plhs.push(block.positional_lineage_hash());
} else { } else {
UniqueBlock::FullBlock(random::<u64>()) unique_blocks.push(UniqueBlock::FullBlock(random::<u64>()));
} plhs.push(PositionalLineageHash::new(
}); random::<u64>(),
None,
pos as u64,
));
}
} }
// Only push the partial block if tokens count isn't a multiple of block_size // Only push the partial block if tokens count isn't a multiple of block_size
if !tokens.total_tokens().is_multiple_of(block_size) { if !tokens.total_tokens().is_multiple_of(block_size) {
unique_blocks.push(UniqueBlock::default()); unique_blocks.push(UniqueBlock::default());
} }
(unique_blocks, block_hashes) (unique_blocks, block_hashes, plhs)
} }
/// A sequence that is actively being built, with the ability to add tokens and commit to hashes /// A sequence that is actively being built, with the ability to add tokens and commit to hashes
...@@ -41,6 +47,7 @@ fn create_sequence_cache( ...@@ -41,6 +47,7 @@ fn create_sequence_cache(
pub struct ActiveSequence { pub struct ActiveSequence {
unique_blocks: Vec<UniqueBlock>, unique_blocks: Vec<UniqueBlock>,
block_hashes: Vec<u64>, block_hashes: Vec<u64>,
plhs: Vec<PositionalLineageHash>,
tokens: TokenBlockSequence, tokens: TokenBlockSequence,
...@@ -80,12 +87,13 @@ impl ActiveSequence { ...@@ -80,12 +87,13 @@ impl ActiveSequence {
let num_input_tokens = tokens.len(); let num_input_tokens = tokens.len();
let tokens = Tokens::from(tokens).into_sequence(block_size as u32, Some(1337)); let tokens = Tokens::from(tokens).into_sequence(block_size as u32, Some(1337));
let (unique_blocks, block_hashes) = let (unique_blocks, block_hashes, plhs) =
create_sequence_cache(&tokens, block_size, enable_prefix_caching); create_sequence_cache(&tokens, block_size, enable_prefix_caching);
let seq = Self { let seq = Self {
unique_blocks, unique_blocks,
block_hashes, block_hashes,
plhs,
tokens, tokens,
block_size, block_size,
max_output_tokens, max_output_tokens,
...@@ -132,6 +140,8 @@ impl ActiveSequence { ...@@ -132,6 +140,8 @@ impl ActiveSequence {
let hash_start = prev_blocks.min(self.block_hashes.len()); let hash_start = prev_blocks.min(self.block_hashes.len());
let hash_end = target_blocks.min(self.block_hashes.len()); let hash_end = target_blocks.min(self.block_hashes.len());
let hashes = self.block_hashes[hash_start..hash_end].to_vec(); let hashes = self.block_hashes[hash_start..hash_end].to_vec();
// Cached per-sequence PLHs (stable across calls).
let plhs = self.plhs[hash_start..hash_end].to_vec();
let token_ids = if self.emit_token_ids && hash_start < hash_end { let token_ids = if self.emit_token_ids && hash_start < hash_end {
Some( Some(
...@@ -149,7 +159,17 @@ impl ActiveSequence { ...@@ -149,7 +159,17 @@ impl ActiveSequence {
} else { } else {
None None
}; };
Some(MoveBlock::Use(blocks, hashes, token_ids, parent)) Some(MoveBlock::Use(blocks, hashes, plhs, token_ids, parent))
}
/// Positional lineage hashes for all fully-tokenised blocks in the sequence.
/// Mirrors `block_hashes()` but returns the PLH identity used by kvbm-logical.
pub fn positional_lineage_hashes(&self) -> Vec<PositionalLineageHash> {
self.tokens
.blocks()
.iter()
.map(|block| block.positional_lineage_hash())
.collect()
} }
/// Commit a successful allocation by advancing `num_allocated_tokens`. /// Commit a successful allocation by advancing `num_allocated_tokens`.
...@@ -209,12 +229,22 @@ impl ActiveSequence { ...@@ -209,12 +229,22 @@ impl ActiveSequence {
random::<u64>() random::<u64>()
}; };
let last_block_hash = last_complete.block_hash(); let last_block_hash = last_complete.block_hash();
// Same randomization story as `last_seq_hash`: with prefix caching off,
// two identical prompts must not share blocks, so the PLH we promote
// with must also be unique — otherwise `process_promote`'s
// `match_blocks(&[plh])` lookup would reuse another request's block.
let last_plh = if self.enable_prefix_caching {
last_complete.positional_lineage_hash()
} else {
PositionalLineageHash::new(random::<u64>(), None, self.block_hashes.len() as u64)
};
let promote_token_ids = if self.emit_token_ids { let promote_token_ids = if self.emit_token_ids {
Some(last_complete.tokens().to_vec()) Some(last_complete.tokens().to_vec())
} else { } else {
None None
}; };
self.block_hashes.push(last_block_hash); self.block_hashes.push(last_block_hash);
self.plhs.push(last_plh);
self.unique_blocks.pop(); self.unique_blocks.pop();
// After pop, the last element is the parent block // After pop, the last element is the parent block
...@@ -230,13 +260,20 @@ impl ActiveSequence { ...@@ -230,13 +260,20 @@ impl ActiveSequence {
last_seq_hash, last_seq_hash,
second_to_last_hash, second_to_last_hash,
last_block_hash, last_block_hash,
last_plh,
promote_token_ids, promote_token_ids,
)); ));
} }
let new_partial_block = UniqueBlock::default(); let new_partial_block = UniqueBlock::default();
self.unique_blocks.push(new_partial_block.clone()); self.unique_blocks.push(new_partial_block.clone());
signals.push(MoveBlock::Use(vec![new_partial_block], vec![], None, None)); signals.push(MoveBlock::Use(
vec![new_partial_block],
vec![],
vec![],
None,
None,
));
Some(signals) Some(signals)
} }
......
This diff is collapsed.
...@@ -3,8 +3,8 @@ ...@@ -3,8 +3,8 @@
//! Pluggable KV cache block managers. //! Pluggable KV cache block managers.
pub mod kvbm_backend;
pub mod sglang_backend; pub mod sglang_backend;
pub mod vllm_backend;
pub use kvbm_backend::KvManager;
pub use sglang_backend::SglangKvManager; pub use sglang_backend::SglangKvManager;
pub use vllm_backend::KvManager;
This diff is collapsed.
...@@ -579,11 +579,21 @@ mod live_scheduler { ...@@ -579,11 +579,21 @@ mod live_scheduler {
.build() .build()
.unwrap(); .unwrap();
// Side-channel router indexer: the mocker's emitted KV event stream is
// forwarded in real time into `LocalKvIndexer`, which applies Stored/
// Removed events against its own radix tree. If the mocker ever emits
// an invalid event (dangling parent, re-Stored of a present block, or
// Removed of an unknown block), the indexer's per-status counters tick
// — `assert_no_event_errors()` turns those into a test failure.
let harness = RouterIndexerHarness::new(64, ROUTER_TEST_WORKER_ID);
let (forwarder_sink, forwarder_task) = harness.spawn_forwarder();
let publishers = KvEventPublishers::new(Some(forwarder_sink as _), None);
let scheduler = Scheduler::new( let scheduler = Scheduler::new(
args, args,
0, 0,
Some(output_tx), Some(output_tx),
KvEventPublishers::default(), publishers,
None, None,
FpmPublisher::default(), FpmPublisher::default(),
); );
...@@ -597,6 +607,17 @@ mod live_scheduler { ...@@ -597,6 +607,17 @@ mod live_scheduler {
use_shared_tokens, use_shared_tokens,
) )
.await; .await;
// Stop the scheduler so no new events fire, then drop the forwarder's
// sender by dropping the scheduler → forwarder task drains and exits.
drop(scheduler);
let _ = tokio::time::timeout(Duration::from_secs(2), forwarder_task).await;
harness.flush().await;
harness.assert_no_event_errors();
// NOTE: we do NOT assert `dump_events().is_empty()` here because
// mocker's protocol does not emit router `Removed` events on
// request completion.
harness.shutdown();
} }
#[tokio::test] #[tokio::test]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment