Unverified Commit 3ea22fcf authored by Waël Boukhobza's avatar Waël Boukhobza Committed by GitHub
Browse files

feat(router): max tree size based pruning (#4057)


Signed-off-by: default avatarWael Boukhobza <wawa_wael@live.fr>
parent a207b4be
...@@ -726,10 +726,15 @@ impl ApproxKvIndexer { ...@@ -726,10 +726,15 @@ impl ApproxKvIndexer {
#[new] #[new]
fn new(component: Component, kv_block_size: usize, ttl_secs: f64) -> PyResult<Self> { fn new(component: Component, kv_block_size: usize, ttl_secs: f64) -> PyResult<Self> {
let ttl = tokio::time::Duration::from_secs_f64(ttl_secs); let ttl = tokio::time::Duration::from_secs_f64(ttl_secs);
let prune_config = Some(llm_rs::kv_router::approx::PruneConfig {
max_tree_size: 2usize.pow(14), // 2** 14 = 16384
prune_target_ratio: 0.8,
});
let inner = Arc::new(llm_rs::kv_router::approx::ApproxKvIndexer::new( let inner = Arc::new(llm_rs::kv_router::approx::ApproxKvIndexer::new(
component.inner.drt().runtime().child_token(), component.inner.drt().runtime().child_token(),
kv_block_size as u32, kv_block_size as u32,
ttl, ttl,
prune_config,
)); ));
Ok(Self { inner }) Ok(Self { inner })
} }
......
...@@ -36,6 +36,7 @@ pub use prefill_router::PrefillRouter; ...@@ -36,6 +36,7 @@ pub use prefill_router::PrefillRouter;
use crate::{ use crate::{
kv_router::{ kv_router::{
approx::ApproxKvIndexer, approx::ApproxKvIndexer,
approx::PruneConfig,
indexer::{ indexer::{
KvIndexer, KvIndexerInterface, KvRouterError, OverlapScores, RouterEvent, KvIndexer, KvIndexerInterface, KvRouterError, OverlapScores, RouterEvent,
compute_block_hash_for_seq, compute_seq_hash_for_block, compute_block_hash_for_seq, compute_seq_hash_for_block,
...@@ -259,6 +260,10 @@ impl KvRouter { ...@@ -259,6 +260,10 @@ impl KvRouter {
cancellation_token.clone(), cancellation_token.clone(),
block_size, block_size,
Duration::from_secs(120), Duration::from_secs(120),
Some(PruneConfig {
max_tree_size: 2usize.pow(14), // 2** 14 = 16384
prune_target_ratio: 0.8,
}),
)) ))
}; };
......
This diff is collapsed.
...@@ -68,6 +68,9 @@ pub enum KvRouterError { ...@@ -68,6 +68,9 @@ pub enum KvRouterError {
#[error("Indexer is dropped request")] #[error("Indexer is dropped request")]
IndexerDroppedRequest, IndexerDroppedRequest,
#[error("Prune operation failed: {0}")]
PruneFailed(String),
} }
/// Errors that can occur during KV Cache Event processing. /// Errors that can occur during KV Cache Event processing.
...@@ -235,6 +238,8 @@ pub struct RadixTree { ...@@ -235,6 +238,8 @@ pub struct RadixTree {
lookup: HashMap<WorkerWithDpRank, HashMap<ExternalSequenceBlockHash, SharedRadixBlock>>, lookup: HashMap<WorkerWithDpRank, HashMap<ExternalSequenceBlockHash, SharedRadixBlock>>,
/// The time buffer the radix tree should check when considering frequence of block accesses /// The time buffer the radix tree should check when considering frequence of block accesses
expiration_duration: Option<Duration>, expiration_duration: Option<Duration>,
/// The tree current size.
current_size: usize,
} }
impl Default for RadixTree { impl Default for RadixTree {
...@@ -254,6 +259,7 @@ impl RadixTree { ...@@ -254,6 +259,7 @@ impl RadixTree {
root: Rc::new(RefCell::new(RadixBlock::new())), root: Rc::new(RefCell::new(RadixBlock::new())),
lookup: HashMap::new(), lookup: HashMap::new(),
expiration_duration, expiration_duration,
current_size: 0,
} }
} }
...@@ -380,6 +386,9 @@ impl RadixTree { ...@@ -380,6 +386,9 @@ impl RadixTree {
.children .children
.insert(block_id.tokens_hash, new_block.clone()); .insert(block_id.tokens_hash, new_block.clone());
// increment the current size when creating a new block
self.current_size = self.current_size.saturating_add(1);
new_block new_block
} }
}; };
...@@ -428,6 +437,9 @@ impl RadixTree { ...@@ -428,6 +437,9 @@ impl RadixTree {
if guard.workers.is_empty() { if guard.workers.is_empty() {
// if no workers are using this block, that is true for all children // if no workers are using this block, that is true for all children
guard.children.clear(); guard.children.clear();
// Decrement the current size when removing the last worker from a node
self.current_size = self.current_size.saturating_sub(1);
} }
// remove the block from the lookup table // remove the block from the lookup table
worker_lookup.remove(&block); worker_lookup.remove(&block);
...@@ -460,6 +472,9 @@ impl RadixTree { ...@@ -460,6 +472,9 @@ impl RadixTree {
// If no workers are using this block, that is true for all children // If no workers are using this block, that is true for all children
if block.borrow().workers.is_empty() { if block.borrow().workers.is_empty() {
block.borrow_mut().children.clear(); block.borrow_mut().children.clear();
// Decrement the current size when removing the last worker from a node
self.current_size = self.current_size.saturating_sub(1);
} }
}); });
...@@ -560,6 +575,10 @@ impl RadixTree { ...@@ -560,6 +575,10 @@ impl RadixTree {
events events
} }
pub fn current_size(&self) -> usize {
self.current_size
}
} }
/// Metrics for the KV Indexer. /// Metrics for the KV Indexer.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment