Unverified Commit 3ea22fcf authored by Waël Boukhobza's avatar Waël Boukhobza Committed by GitHub
Browse files

feat(router): max tree size based pruning (#4057)


Signed-off-by: default avatarWael Boukhobza <wawa_wael@live.fr>
parent a207b4be
......@@ -726,10 +726,15 @@ impl ApproxKvIndexer {
#[new]
fn new(component: Component, kv_block_size: usize, ttl_secs: f64) -> PyResult<Self> {
let ttl = tokio::time::Duration::from_secs_f64(ttl_secs);
let prune_config = Some(llm_rs::kv_router::approx::PruneConfig {
max_tree_size: 2usize.pow(14), // 2** 14 = 16384
prune_target_ratio: 0.8,
});
let inner = Arc::new(llm_rs::kv_router::approx::ApproxKvIndexer::new(
component.inner.drt().runtime().child_token(),
kv_block_size as u32,
ttl,
prune_config,
));
Ok(Self { inner })
}
......
......@@ -36,6 +36,7 @@ pub use prefill_router::PrefillRouter;
use crate::{
kv_router::{
approx::ApproxKvIndexer,
approx::PruneConfig,
indexer::{
KvIndexer, KvIndexerInterface, KvRouterError, OverlapScores, RouterEvent,
compute_block_hash_for_seq, compute_seq_hash_for_block,
......@@ -259,6 +260,10 @@ impl KvRouter {
cancellation_token.clone(),
block_size,
Duration::from_secs(120),
Some(PruneConfig {
max_tree_size: 2usize.pow(14), // 2** 14 = 16384
prune_target_ratio: 0.8,
}),
))
};
......
This diff is collapsed.
......@@ -68,6 +68,9 @@ pub enum KvRouterError {
#[error("Indexer is dropped request")]
IndexerDroppedRequest,
#[error("Prune operation failed: {0}")]
PruneFailed(String),
}
/// Errors that can occur during KV Cache Event processing.
......@@ -235,6 +238,8 @@ pub struct RadixTree {
lookup: HashMap<WorkerWithDpRank, HashMap<ExternalSequenceBlockHash, SharedRadixBlock>>,
/// The time buffer the radix tree should check when considering frequence of block accesses
expiration_duration: Option<Duration>,
/// The tree current size.
current_size: usize,
}
impl Default for RadixTree {
......@@ -254,6 +259,7 @@ impl RadixTree {
root: Rc::new(RefCell::new(RadixBlock::new())),
lookup: HashMap::new(),
expiration_duration,
current_size: 0,
}
}
......@@ -380,6 +386,9 @@ impl RadixTree {
.children
.insert(block_id.tokens_hash, new_block.clone());
// increment the current size when creating a new block
self.current_size = self.current_size.saturating_add(1);
new_block
}
};
......@@ -428,6 +437,9 @@ impl RadixTree {
if guard.workers.is_empty() {
// if no workers are using this block, that is true for all children
guard.children.clear();
// Decrement the current size when removing the last worker from a node
self.current_size = self.current_size.saturating_sub(1);
}
// remove the block from the lookup table
worker_lookup.remove(&block);
......@@ -460,6 +472,9 @@ impl RadixTree {
// If no workers are using this block, that is true for all children
if block.borrow().workers.is_empty() {
block.borrow_mut().children.clear();
// Decrement the current size when removing the last worker from a node
self.current_size = self.current_size.saturating_sub(1);
}
});
......@@ -560,6 +575,10 @@ impl RadixTree {
events
}
pub fn current_size(&self) -> usize {
self.current_size
}
}
/// Metrics for the KV Indexer.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment