refactor(mocker): replace vllm block manager with kvbm-logical (#8451)

Signed-off-by: Yongming Ding <yongmingd@nvidia.com> Co-authored-by: Ryan Olson <rolson@nvidia.com>

refactor(mocker): replace vllm block manager with kvbm-logical (#8451)
Signed-off-by: Yongming Ding <yongmingd@nvidia.com> Co-authored-by: Ryan Olson <rolson@nvidia.com>
36b4208e · Yongming Ding · GitHub · 1dc0975b · 36b4208e · 36b4208e
Unverified Commit 36b4208e authored Apr 22, 2026 by Yongming Ding Committed by GitHub Apr 22, 2026
16 changed files
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2511,6 +2511,7 @@ dependencies = [
 "dynamo-kv-router",
 "dynamo-tokens",
 "indicatif 0.18.4",
+ "kvbm-logical",
 "ndarray 0.16.1",
 "ndarray-interp",
 "ndarray-npy",

--- a/lib/bindings/kvbm/Cargo.lock
+++ b/lib/bindings/kvbm/Cargo.lock
@@ -1634,6 +1634,7 @@ dependencies = [
 "dynamo-kv-router",
 "dynamo-tokens",
 "indicatif 0.18.4",
+ "kvbm-logical",
 "ndarray",
 "ndarray-interp",
 "ndarray-npy",
@@ -1714,7 +1715,7 @@ dependencies = [
 "libc",
 "local-ip-address",
 "log",
- "lru",
+ "lru 0.12.5",
 "mio 1.1.1",
 "notify",
 "nuid",
@@ -2112,6 +2113,12 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+[[package]]
+name = "foldhash"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
 [[package]]
 name = "form_urlencoded"
 version = "1.2.2"
@@ -2396,7 +2403,18 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
 dependencies = [
 "allocator-api2",
 "equivalent",
- "foldhash",
+ "foldhash 0.1.5",
+]
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+dependencies = [
+ "allocator-api2",
+ "equivalent",
+ "foldhash 0.2.0",
 ]
 [[package]]
@@ -3228,6 +3246,31 @@ dependencies = [
 "tracing",
 ]
+[[package]]
+name = "kvbm-logical"
+version = "1.1.0"
+dependencies = [
+ "anyhow",
+ "async-stream",
+ "bincode 2.0.1",
+ "bytes",
+ "derive_builder",
+ "dynamo-tokens",
+ "futures",
+ "indexmap 2.14.0",
+ "lru 0.16.4",
+ "parking_lot",
+ "prometheus",
+ "rmp-serde",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tokio",
+ "tokio-stream",
+ "tracing",
+ "xxhash-rust",
+]
 [[package]]
 name = "kvbm-py3"
 version = "1.1.0"
@@ -3389,6 +3432,15 @@ dependencies = [
 "hashbrown 0.15.5",
 ]
+[[package]]
+name = "lru"
+version = "0.16.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39"
+dependencies = [
+ "hashbrown 0.16.1",
+]
 [[package]]
 name = "lru-slab"
 version = "0.1.2"
@@ -6628,6 +6680,7 @@ dependencies = [
 "futures-core",
 "pin-project-lite",
 "tokio",
+ "tokio-util",
 ]
 [[package]]

--- a/lib/bindings/python/Cargo.lock
+++ b/lib/bindings/python/Cargo.lock
@@ -1649,6 +1649,7 @@ dependencies = [
 "dynamo-kv-router",
 "dynamo-tokens",
 "indicatif 0.18.4",
+ "kvbm-logical",
 "ndarray",
 "ndarray-interp",
 "ndarray-npy",
@@ -1761,7 +1762,7 @@ dependencies = [
 "libc",
 "local-ip-address",
 "log",
- "lru",
+ "lru 0.12.5",
 "mio 1.1.1",
 "notify",
 "nuid",
@@ -2184,6 +2185,12 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+[[package]]
+name = "foldhash"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
 [[package]]
 name = "form_urlencoded"
 version = "1.2.2"
@@ -2468,7 +2475,18 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
 dependencies = [
 "allocator-api2",
 "equivalent",
- "foldhash",
+ "foldhash 0.1.5",
+]
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+dependencies = [
+ "allocator-api2",
+ "equivalent",
+ "foldhash 0.2.0",
 ]
 [[package]]
@@ -3300,6 +3318,31 @@ dependencies = [
 "tracing",
 ]
+[[package]]
+name = "kvbm-logical"
+version = "1.1.0"
+dependencies = [
+ "anyhow",
+ "async-stream",
+ "bincode 2.0.1",
+ "bytes",
+ "derive_builder",
+ "dynamo-tokens",
+ "futures",
+ "indexmap 2.14.0",
+ "lru 0.16.4",
+ "parking_lot",
+ "prometheus",
+ "rmp-serde",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tokio",
+ "tokio-stream",
+ "tracing",
+ "xxhash-rust",
+]
 [[package]]
 name = "lalrpop-util"
 version = "0.20.2"
@@ -3440,6 +3483,15 @@ dependencies = [
 "hashbrown 0.15.5",
 ]
+[[package]]
+name = "lru"
+version = "0.16.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39"
+dependencies = [
+ "hashbrown 0.16.1",
+]
 [[package]]
 name = "lru-slab"
 version = "0.1.2"
@@ -6698,6 +6750,7 @@ dependencies = [
 "futures-core",
 "pin-project-lite",
 "tokio",
+ "tokio-util",
 ]
 [[package]]

--- a/lib/kvbm-logical/src/manager/mod.rs
+++ b/lib/kvbm-logical/src/manager/mod.rs
@@ -78,6 +78,20 @@ impl<T: BlockMetadata> BlockManager<T> {
    ///
    /// Returns `None` if fewer than `count` blocks are available across both pools.
    pub fn allocate_blocks(&self, count: usize) -> Option<Vec<MutableBlock<T>>> {
+        self.allocate_blocks_with_evictions(count)
+            .map(|(blocks, _evicted)| blocks)
+    }
+    /// Like [`allocate_blocks`](Self::allocate_blocks) but also reports the
+    /// [`SequenceHash`] of each block evicted from the inactive pool to
+    /// satisfy the allocation. Callers maintaining a shadow view of which
+    /// registrations are alive (e.g. the mocker's router-event bridge) can
+    /// translate these hashes into cache-invalidation events directly,
+    /// avoiding an O(N) presence scan over the registry.
+    pub fn allocate_blocks_with_evictions(
+        &self,
+        count: usize,
+    ) -> Option<(Vec<MutableBlock<T>>, Vec<SequenceHash>)> {
        let _guard = self.allocate_mutex.lock();
        let from_reset = self.reset_pool.allocate_blocks(count);
        let from_reset_count = from_reset.len();
@@ -85,7 +99,7 @@ impl<T: BlockMetadata> BlockManager<T> {
        let remaining_needed = count - blocks.len();
        match self.inactive_pool.allocate_blocks(remaining_needed) {
-            Some(remaining) => {
+            Some((remaining, evicted)) => {
                let eviction_count = remaining.len() as u64;
                blocks.extend(remaining);
@@ -94,7 +108,7 @@ impl<T: BlockMetadata> BlockManager<T> {
                    .inc_allocations_from_reset(from_reset_count as u64);
                self.metrics.inc_evictions(eviction_count);
-                Some(blocks)
+                Some((blocks, evicted))
            }
            None => None,
        }

--- a/lib/kvbm-logical/src/pools/inactive/mod.rs
+++ b/lib/kvbm-logical/src/pools/inactive/mod.rs
@@ -181,10 +181,18 @@ impl<T: BlockMetadata + Sync> InactivePool<T> {
            .collect()
    }
-    /// Allocate blocks from registered pool, converting them to MutableBlocks for ResetPool
+    /// Allocate blocks from registered pool, converting them to
-    pub(crate) fn allocate_blocks(&self, count: usize) -> Option<Vec<MutableBlock<T>>> {
+    /// [`MutableBlock`]s for the [`ResetPool`]. Also reports the
+    /// [`SequenceHash`] of each evicted block so upstream layers can
+    /// propagate cache-invalidation events without a secondary presence scan.
+    ///
+    /// Returns `None` if fewer than `count` evictable blocks are available.
+    pub(crate) fn allocate_blocks(
+        &self,
+        count: usize,
+    ) -> Option<(Vec<MutableBlock<T>>, Vec<SequenceHash>)> {
        if count == 0 {
-            return Some(Vec::new());
+            return Some((Vec::new(), Vec::new()));
        }
        let mut inner = self.inner.write();
@@ -202,15 +210,19 @@ impl<T: BlockMetadata + Sync> InactivePool<T> {
                }
            }
            let mut mutable_blocks = Vec::with_capacity(count);
-            mutable_blocks.extend(allocated_blocks.into_iter().map(|registered_block| {
+            let mut evicted = Vec::with_capacity(count);
+            for registered_block in allocated_blocks {
+                // Capture the identity BEFORE `reset()` drops the
+                // registration handle and marks the block absent.
+                evicted.push(registered_block.sequence_hash());
                let reset_block = registered_block.reset();
-                MutableBlock::new(
+                mutable_blocks.push(MutableBlock::new(
                    reset_block,
                    self.reset_return_fn.clone(),
                    self.metrics.clone(),
-                )
+                ));
-            }));
+            }
-            Some(mutable_blocks)
+            Some((mutable_blocks, evicted))
        } else {
            for block in allocated_blocks {
                inner.backend.insert(block);
@@ -395,17 +407,27 @@ mod tests {
    fn test_allocate_blocks() {
        let (pool, reset_pool) = create_test_pool();
-        let (block1, _) = create_registered_block::<TestMeta>(1, &tokens_for_id(1));
+        let (block1, seq_hash1) = create_registered_block::<TestMeta>(1, &tokens_for_id(1));
-        let (block2, _) = create_registered_block::<TestMeta>(2, &tokens_for_id(2));
+        let (block2, seq_hash2) = create_registered_block::<TestMeta>(2, &tokens_for_id(2));
-        let (block3, _) = create_registered_block::<TestMeta>(3, &tokens_for_id(3));
+        let (block3, seq_hash3) = create_registered_block::<TestMeta>(3, &tokens_for_id(3));
        pool.insert(block1);
        pool.insert(block2);
        pool.insert(block3);
        assert_eq!(pool.len(), 3);
-        let mutable_blocks = pool.allocate_blocks(1).expect("Should allocate 1 block");
+        let (mutable_blocks, evicted) = pool.allocate_blocks(1).expect("Should allocate 1 block");
        assert_eq!(mutable_blocks.len(), 1);
+        assert_eq!(
+            evicted.len(),
+            1,
+            "one sequence hash should be reported as evicted"
+        );
+        assert!(
+            [seq_hash1, seq_hash2, seq_hash3].contains(&evicted[0]),
+            "evicted hash must match one of the inserted blocks; got {:?}",
+            evicted[0]
+        );
        assert_eq!(pool.len(), 2);
        drop(mutable_blocks);
@@ -414,6 +436,35 @@ mod tests {
        assert_eq!(reset_pool.available_blocks(), 11);
    }
+    /// Sanity: asking for multiple evictions returns that many distinct hashes,
+    /// each matching an inserted block.
+    #[test]
+    fn test_allocate_blocks_reports_all_evicted_hashes() {
+        let (pool, _reset_pool) = create_test_pool();
+        let (block1, seq_hash1) = create_registered_block::<TestMeta>(1, &tokens_for_id(1));
+        let (block2, seq_hash2) = create_registered_block::<TestMeta>(2, &tokens_for_id(2));
+        let (block3, seq_hash3) = create_registered_block::<TestMeta>(3, &tokens_for_id(3));
+        pool.insert(block1);
+        pool.insert(block2);
+        pool.insert(block3);
+        let inserted = [seq_hash1, seq_hash2, seq_hash3];
+        let (mutable_blocks, evicted) = pool
+            .allocate_blocks(3)
+            .expect("Should allocate all three blocks");
+        assert_eq!(mutable_blocks.len(), 3);
+        assert_eq!(evicted.len(), 3);
+        for h in &evicted {
+            assert!(
+                inserted.contains(h),
+                "evicted hash {h:?} not in inserted set"
+            );
+        }
+        let unique: std::collections::HashSet<_> = evicted.iter().copied().collect();
+        assert_eq!(unique.len(), 3, "evicted hashes must all be distinct");
+    }
    #[test]
    fn test_allocate_more_than_available_fails() {
        let (pool, _reset_pool) = create_test_pool();

--- a/lib/mocker/Cargo.toml
+++ b/lib/mocker/Cargo.toml
@@ -16,6 +16,7 @@ readme = "README.md"
 # repo
 dynamo-kv-router = { workspace = true }
 dynamo-tokens = { workspace = true }
+kvbm-logical = { workspace = true }
 # workspace
 anyhow = { workspace = true }

--- a/lib/mocker/src/cache/hash_cache.rs
+++ b/lib/mocker/src/cache/hash_cache.rs
-// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-use crate::common::evictor::LRUEvictor;
-use dynamo_tokens::blocks::UniqueBlock;
-use rustc_hash::FxHashMap;
-/// Hash-based KV cache with O(1) block lookups, maintaining active (ref-counted) and
-/// inactive (LRU-evictable) pools.
-pub struct HashCache {
-    active_blocks: FxHashMap<UniqueBlock, usize>,
-    inactive_blocks: LRUEvictor<UniqueBlock>,
-    max_capacity: usize,
-}
-impl HashCache {
-    /// Create a new HashCache with the given maximum block capacity.
-    pub fn new(max_capacity: usize) -> Self {
-        Self {
-            active_blocks: FxHashMap::default(),
-            inactive_blocks: LRUEvictor::default(),
-            max_capacity,
-        }
-    }
-    /// Get the reference count of an active block, if it exists.
-    pub fn get_active_ref_count(&self, block: &UniqueBlock) -> Option<usize> {
-        self.active_blocks.get(block).copied()
-    }
-    /// Increment the reference count of an active block. Returns the new count.
-    pub fn increment_ref(&mut self, block: &UniqueBlock) -> usize {
-        let ref_count = self
-            .active_blocks
-            .get_mut(block)
-            .expect("block must be active to increment ref");
-        *ref_count += 1;
-        *ref_count
-    }
-    /// Decrement the reference count of an active block. Returns the new count.
-    pub fn decrement_ref(&mut self, block: &UniqueBlock) -> usize {
-        let ref_count = self
-            .active_blocks
-            .get_mut(block)
-            .expect("block must be active to decrement ref");
-        *ref_count -= 1;
-        *ref_count
-    }
-    /// Insert a block into the active pool with the given reference count.
-    pub fn insert_active(&mut self, block: UniqueBlock, ref_count: usize) {
-        self.active_blocks.insert(block, ref_count);
-    }
-    /// Remove a block from the active pool. Returns the reference count, or None if not found.
-    pub fn remove_active(&mut self, block: &UniqueBlock) -> Option<usize> {
-        self.active_blocks.remove(block)
-    }
-    /// Check if a block is in the active pool.
-    pub fn contains_active(&self, block: &UniqueBlock) -> bool {
-        self.active_blocks.contains_key(block)
-    }
-    /// Insert a block into the inactive pool (LRU order).
-    pub fn insert_inactive(&mut self, block: UniqueBlock) {
-        self.inactive_blocks.insert(block);
-    }
-    /// Remove a block from the inactive pool. Returns true if it was found.
-    pub fn remove_inactive(&mut self, block: &UniqueBlock) -> bool {
-        self.inactive_blocks.remove(block)
-    }
-    /// Evict the least-recently-used block from the inactive pool.
-    pub fn evict_inactive(&mut self) -> Option<UniqueBlock> {
-        self.inactive_blocks.evict()
-    }
-    /// Check if a block is in the inactive pool.
-    pub fn contains_inactive(&self, block: &UniqueBlock) -> bool {
-        self.inactive_blocks.contains(block)
-    }
-    /// Check if a block exists in either active or inactive pool.
-    pub fn contains(&self, block: &UniqueBlock) -> bool {
-        self.active_blocks.contains_key(block) || self.inactive_blocks.contains(block)
-    }
-    /// Move block from active to inactive (ref_count reached 0).
-    pub fn deactivate(&mut self, block: &UniqueBlock) {
-        debug_assert!(
-            self.active_blocks.contains_key(block),
-            "deactivate called on non-active block"
-        );
-        debug_assert!(
-            !self.inactive_blocks.contains(block),
-            "deactivate called on already-inactive block"
-        );
-        self.active_blocks.remove(block);
-        self.inactive_blocks.insert(block.clone());
-    }
-    /// Move block from inactive to active with ref_count=1. Returns true if found.
-    pub fn reactivate(&mut self, block: &UniqueBlock) -> bool {
-        if self.inactive_blocks.remove(block) {
-            self.active_blocks.insert(block.clone(), 1);
-            true
-        } else {
-            false
-        }
-    }
-    /// Check if total blocks (active + inactive) has reached max_capacity.
-    pub fn is_at_capacity(&self) -> bool {
-        self.active_blocks.len() + self.inactive_blocks.len() >= self.max_capacity
-    }
-    /// Get the number of active blocks.
-    pub fn num_active(&self) -> usize {
-        self.active_blocks.len()
-    }
-    /// Get the number of inactive blocks.
-    pub fn num_inactive(&self) -> usize {
-        self.inactive_blocks.len()
-    }
-    /// Get the maximum block capacity.
-    pub fn max_capacity(&self) -> usize {
-        self.max_capacity
-    }
-    /// Get the current capacity (active + inactive blocks).
-    pub fn current_capacity(&self) -> usize {
-        self.active_blocks.len() + self.inactive_blocks.len()
-    }
-    /// Iterate over active block keys.
-    pub fn active_keys(&self) -> impl Iterator<Item = &UniqueBlock> {
-        self.active_blocks.keys()
-    }
-    /// Iterate over inactive block keys.
-    pub fn inactive_keys(&self) -> impl Iterator<Item = &UniqueBlock> {
-        self.inactive_blocks.keys()
-    }
-    /// Direct access to active blocks map (for tests that check ref counts).
-    pub fn active_blocks(&self) -> &FxHashMap<UniqueBlock, usize> {
-        &self.active_blocks
-    }
-}
--- a/lib/mocker/src/cache/mod.rs
+++ b/lib/mocker/src/cache/mod.rs
@@ -3,8 +3,6 @@
 //! Cache data structures for KV block management.
-pub mod hash_cache;
 pub mod radix_cache;
-pub use hash_cache::HashCache;
 pub use radix_cache::RadixCache;
--- a/lib/mocker/src/common/evictor.rs
+++ b/lib/mocker/src/common/evictor.rs
-// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-use std::cmp::{Eq, Ordering};
-use std::collections::BTreeSet;
-use std::hash::Hash;
-use rustc_hash::FxHashMap;
-/// A wrapper for (T, counter) that implements Ord based only on counter
-#[derive(Debug, Clone, Eq, PartialEq)]
-struct PriorityItem<T> {
-    item: T,
-    counter: i64,
-}
-impl<T: Eq> Ord for PriorityItem<T> {
-    fn cmp(&self, other: &Self) -> Ordering {
-        self.counter.cmp(&other.counter)
-    }
-}
-impl<T: Eq> PartialOrd for PriorityItem<T> {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-/// An LRU evictor that maintains objects and evicts them based on their
-/// priority counter. Lower counter values are evicted first.
-#[derive(Debug)]
-pub struct LRUEvictor<T: Clone + Eq + Hash> {
-    free_table: FxHashMap<T, i64>,
-    priority_queue: BTreeSet<PriorityItem<T>>,
-    positive_counter: i64,
-    negative_counter: i64,
-}
-impl<T: Clone + Eq + Hash> Default for LRUEvictor<T> {
-    fn default() -> Self {
-        Self {
-            free_table: FxHashMap::default(),
-            priority_queue: BTreeSet::new(),
-            positive_counter: 0,
-            negative_counter: 0,
-        }
-    }
-}
-impl<T: Clone + Eq + Hash> LRUEvictor<T> {
-    pub fn new(_cleanup_threshold: usize) -> Self {
-        Self::default()
-    }
-    pub fn keys(&self) -> std::collections::hash_map::Keys<'_, T, i64> {
-        self.free_table.keys()
-    }
-    fn update(&mut self, object: T, counter: i64) {
-        self.free_table.insert(object.clone(), counter);
-        self.priority_queue.insert(PriorityItem {
-            item: object,
-            counter,
-        });
-    }
-    pub fn insert(&mut self, object: T) {
-        // Remove old entry if it exists
-        if let Some(&old_counter) = self.free_table.get(&object) {
-            self.priority_queue.remove(&PriorityItem {
-                item: object.clone(),
-                counter: old_counter,
-            });
-        }
-        // Increment positive counter and insert
-        self.positive_counter += 1;
-        let counter = self.positive_counter;
-        self.update(object, counter);
-    }
-    /// Push an object to the front with negative counter (highest priority for eviction)
-    pub fn push_front(&mut self, object: T) {
-        // Remove old entry if it exists
-        if let Some(&old_counter) = self.free_table.get(&object) {
-            self.priority_queue.remove(&PriorityItem {
-                item: object.clone(),
-                counter: old_counter,
-            });
-        }
-        // Decrement negative counter and insert
-        self.negative_counter -= 1;
-        let counter = self.negative_counter;
-        self.update(object, counter);
-    }
-    pub fn contains(&self, object: &T) -> bool {
-        self.free_table.contains_key(object)
-    }
-    /// Evict an object based on LRU policy (lowest counter value)
-    /// Returns the evicted object or None if no objects are available
-    pub fn evict(&mut self) -> Option<T> {
-        self.priority_queue.pop_first().map(|item| {
-            self.free_table.remove(&item.item);
-            item.item
-        })
-    }
-    pub fn remove(&mut self, object: &T) -> bool {
-        let Some(&counter) = self.free_table.get(object) else {
-            return false;
-        };
-        self.free_table.remove(object);
-        self.priority_queue.remove(&PriorityItem {
-            item: object.clone(),
-            counter,
-        });
-        true
-    }
-    pub fn len(&self) -> usize {
-        self.free_table.len()
-    }
-    pub fn is_empty(&self) -> bool {
-        self.free_table.is_empty()
-    }
-}
-#[cfg(test)]
-mod tests {
-    use super::*;
-    #[test]
-    fn test_lru_evictor_eviction_order() {
-        // Create a new LRUEvictor
-        let mut evictor = LRUEvictor::<i32>::new(1); // threshold value doesn't matter anymore
-        // Add items in the specified order
-        evictor.insert(4);
-        evictor.insert(3);
-        evictor.insert(2);
-        evictor.insert(1);
-        evictor.insert(5);
-        evictor.insert(1); // Updates counter for 1
-        evictor.insert(4); // Updates counter for 4
-        evictor.insert(2); // Updates counter for 2
-        evictor.push_front(4);
-        // Verify the eviction order
-        let evicted = evictor.evict().unwrap();
-        assert_eq!(evicted, 4);
-        let evicted = evictor.evict().unwrap();
-        assert_eq!(evicted, 3);
-        let evicted = evictor.evict().unwrap();
-        assert_eq!(evicted, 5);
-        let evicted = evictor.evict().unwrap();
-        assert_eq!(evicted, 1);
-        let evicted = evictor.evict().unwrap();
-        assert_eq!(evicted, 2);
-        let evicted = evictor.evict();
-        assert_eq!(evicted, None);
-        assert_eq!(evictor.len(), 0);
-    }
-    // ... existing test_push_front test ...
-}
--- a/lib/mocker/src/common/mod.rs
+++ b/lib/mocker/src/common/mod.rs
@@ -4,7 +4,6 @@
 //! Shared components used across all engine implementations.
 pub mod bootstrap;
-pub mod evictor;
 pub mod kv_cache_trace;
 pub mod perf_model;
 pub mod protocols;

--- a/lib/mocker/src/common/protocols.rs
+++ b/lib/mocker/src/common/protocols.rs
@@ -13,7 +13,24 @@ use validator::Validate;
 use crate::common::perf_model::PerfModel;
 use dynamo_kv_router::protocols::KvCacheEvent;
 use dynamo_tokens::blocks::UniqueBlock;
-use dynamo_tokens::{BlockHash, SequenceHash, Token};
+use dynamo_tokens::{BlockHash, PositionalLineageHash, SequenceHash, Token};
+/// Metadata marker type for kvbm-logical blocks in the mocker's G1 pool.
+#[derive(Clone, Debug)]
+pub struct G1;
+/// Eviction strategy for the kvbm-logical inactive pool.
+///
+/// `Lineage` is the default and matches kvbm-logical's own default — it evicts
+/// leaf blocks first, which subsumes the preemption-priority behaviour that the
+/// mocker's old `LRUEvictor::push_front` provided.
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
+pub enum MockerEvictionBackend {
+    Lru,
+    MultiLru,
+    #[default]
+    Lineage,
+}
 /// Trait for publishing KV cache events.
 /// This abstracts the runtime dependency so mocker components can remain generic.
@@ -142,12 +159,20 @@ pub enum MoveBlock {
    Use(
        Vec<UniqueBlock>,
        Vec<BlockHash>,
+        Vec<PositionalLineageHash>,
        Option<Vec<Vec<u32>>>,
        Option<UniqueBlock>,
    ),
    Destroy(Vec<UniqueBlock>),
    Deref(Vec<UniqueBlock>),
-    Promote(Uuid, SequenceHash, Option<u64>, BlockHash, Option<Vec<u32>>),
+    Promote(
+        Uuid,
+        SequenceHash,
+        Option<u64>,
+        BlockHash,
+        PositionalLineageHash,
+        Option<Vec<u32>>,
+    ),
 }
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]

--- a/lib/mocker/src/common/sequence.rs
+++ b/lib/mocker/src/common/sequence.rs
@@ -4,35 +4,41 @@
 use crate::common::protocols::MoveBlock;
 use derive_getters::Getters;
 use dynamo_tokens::blocks::UniqueBlock;
-use dynamo_tokens::{TokenBlockSequence, Tokens};
+use dynamo_tokens::{PositionalLineageHash, TokenBlockSequence, Tokens};
 use rand::random;
 use validator::Validate;
-/// Create unique blocks and block hashes from a TokenBlockSequence.
+/// Create unique blocks, block hashes, and positional-lineage hashes from a
+/// [`TokenBlockSequence`].
 fn create_sequence_cache(
    tokens: &TokenBlockSequence,
    block_size: usize,
    enable_prefix_caching: bool,
-) -> (Vec<UniqueBlock>, Vec<u64>) {
+) -> (Vec<UniqueBlock>, Vec<u64>, Vec<PositionalLineageHash>) {
    let mut unique_blocks = Vec::with_capacity(tokens.blocks().len() + 1);
    let mut block_hashes = Vec::with_capacity(tokens.blocks().len());
+    let mut plhs = Vec::with_capacity(tokens.blocks().len());
-    for block in tokens.blocks() {
+    for (pos, block) in tokens.blocks().iter().enumerate() {
        block_hashes.push(block.block_hash());
-        unique_blocks.push({
+        if enable_prefix_caching {
-            if enable_prefix_caching {
+            unique_blocks.push(UniqueBlock::FullBlock(block.sequence_hash()));
-                UniqueBlock::FullBlock(block.sequence_hash())
+            plhs.push(block.positional_lineage_hash());
-            } else {
+        } else {
-                UniqueBlock::FullBlock(random::<u64>())
+            unique_blocks.push(UniqueBlock::FullBlock(random::<u64>()));
-            }
+            plhs.push(PositionalLineageHash::new(
-        });
+                random::<u64>(),
+                None,
+                pos as u64,
+            ));
+        }
    }
    // Only push the partial block if tokens count isn't a multiple of block_size
    if !tokens.total_tokens().is_multiple_of(block_size) {
        unique_blocks.push(UniqueBlock::default());
    }
-    (unique_blocks, block_hashes)
+    (unique_blocks, block_hashes, plhs)
 }
 /// A sequence that is actively being built, with the ability to add tokens and commit to hashes
@@ -41,6 +47,7 @@ fn create_sequence_cache(
 pub struct ActiveSequence {
    unique_blocks: Vec<UniqueBlock>,
    block_hashes: Vec<u64>,
+    plhs: Vec<PositionalLineageHash>,
    tokens: TokenBlockSequence,
@@ -80,12 +87,13 @@ impl ActiveSequence {
        let num_input_tokens = tokens.len();
        let tokens = Tokens::from(tokens).into_sequence(block_size as u32, Some(1337));
-        let (unique_blocks, block_hashes) =
+        let (unique_blocks, block_hashes, plhs) =
            create_sequence_cache(&tokens, block_size, enable_prefix_caching);
        let seq = Self {
            unique_blocks,
            block_hashes,
+            plhs,
            tokens,
            block_size,
            max_output_tokens,
@@ -132,6 +140,8 @@ impl ActiveSequence {
        let hash_start = prev_blocks.min(self.block_hashes.len());
        let hash_end = target_blocks.min(self.block_hashes.len());
        let hashes = self.block_hashes[hash_start..hash_end].to_vec();
+        // Cached per-sequence PLHs (stable across calls).
+        let plhs = self.plhs[hash_start..hash_end].to_vec();
        let token_ids = if self.emit_token_ids && hash_start < hash_end {
            Some(
@@ -149,7 +159,17 @@ impl ActiveSequence {
        } else {
            None
        };
-        Some(MoveBlock::Use(blocks, hashes, token_ids, parent))
+        Some(MoveBlock::Use(blocks, hashes, plhs, token_ids, parent))
+    }
+    /// Positional lineage hashes for all fully-tokenised blocks in the sequence.
+    /// Mirrors `block_hashes()` but returns the PLH identity used by kvbm-logical.
+    pub fn positional_lineage_hashes(&self) -> Vec<PositionalLineageHash> {
+        self.tokens
+            .blocks()
+            .iter()
+            .map(|block| block.positional_lineage_hash())
+            .collect()
    }
    /// Commit a successful allocation by advancing `num_allocated_tokens`.
@@ -209,12 +229,22 @@ impl ActiveSequence {
                random::<u64>()
            };
            let last_block_hash = last_complete.block_hash();
+            // Same randomization story as `last_seq_hash`: with prefix caching off,
+            // two identical prompts must not share blocks, so the PLH we promote
+            // with must also be unique — otherwise `process_promote`'s
+            // `match_blocks(&[plh])` lookup would reuse another request's block.
+            let last_plh = if self.enable_prefix_caching {
+                last_complete.positional_lineage_hash()
+            } else {
+                PositionalLineageHash::new(random::<u64>(), None, self.block_hashes.len() as u64)
+            };
            let promote_token_ids = if self.emit_token_ids {
                Some(last_complete.tokens().to_vec())
            } else {
                None
            };
            self.block_hashes.push(last_block_hash);
+            self.plhs.push(last_plh);
            self.unique_blocks.pop();
            // After pop, the last element is the parent block
@@ -230,13 +260,20 @@ impl ActiveSequence {
                last_seq_hash,
                second_to_last_hash,
                last_block_hash,
+                last_plh,
                promote_token_ids,
            ));
        }
        let new_partial_block = UniqueBlock::default();
        self.unique_blocks.push(new_partial_block.clone());
-        signals.push(MoveBlock::Use(vec![new_partial_block], vec![], None, None));
+        signals.push(MoveBlock::Use(
+            vec![new_partial_block],
+            vec![],
+            vec![],
+            None,
+            None,
+        ));
        Some(signals)
    }

--- a/lib/mocker/src/kv_manager/kvbm_backend.rs
+++ b/lib/mocker/src/kv_manager/kvbm_backend.rs
--- a/lib/mocker/src/kv_manager/mod.rs
+++ b/lib/mocker/src/kv_manager/mod.rs
@@ -3,8 +3,8 @@
 //! Pluggable KV cache block managers.
+pub mod kvbm_backend;
 pub mod sglang_backend;
-pub mod vllm_backend;
+pub use kvbm_backend::KvManager;
 pub use sglang_backend::SglangKvManager;
-pub use vllm_backend::KvManager;
--- a/lib/mocker/src/kv_manager/vllm_backend.rs
+++ b/lib/mocker/src/kv_manager/vllm_backend.rs
--- a/lib/mocker/src/scheduler/vllm/tests.rs
+++ b/lib/mocker/src/scheduler/vllm/tests.rs
@@ -579,11 +579,21 @@ mod live_scheduler {
            .build()
            .unwrap();
+        // Side-channel router indexer: the mocker's emitted KV event stream is
+        // forwarded in real time into `LocalKvIndexer`, which applies Stored/
+        // Removed events against its own radix tree. If the mocker ever emits
+        // an invalid event (dangling parent, re-Stored of a present block, or
+        // Removed of an unknown block), the indexer's per-status counters tick
+        // — `assert_no_event_errors()` turns those into a test failure.
+        let harness = RouterIndexerHarness::new(64, ROUTER_TEST_WORKER_ID);
+        let (forwarder_sink, forwarder_task) = harness.spawn_forwarder();
+        let publishers = KvEventPublishers::new(Some(forwarder_sink as _), None);
        let scheduler = Scheduler::new(
            args,
            0,
            Some(output_tx),
-            KvEventPublishers::default(),
+            publishers,
            None,
            FpmPublisher::default(),
        );
@@ -597,6 +607,17 @@ mod live_scheduler {
            use_shared_tokens,
        )
        .await;
+        // Stop the scheduler so no new events fire, then drop the forwarder's
+        // sender by dropping the scheduler → forwarder task drains and exits.
+        drop(scheduler);
+        let _ = tokio::time::timeout(Duration::from_secs(2), forwarder_task).await;
+        harness.flush().await;
+        harness.assert_no_event_errors();
+        // NOTE: we do NOT assert `dump_events().is_empty()` here because
+        // mocker's protocol does not emit router `Removed` events on
+        // request completion.
+        harness.shutdown();
    }
    #[tokio::test]