Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
3ea22fcf
Unverified
Commit
3ea22fcf
authored
Nov 12, 2025
by
Waël Boukhobza
Committed by
GitHub
Nov 12, 2025
Browse files
feat(router): max tree size based pruning (#4057)
Signed-off-by:
Wael Boukhobza
<
wawa_wael@live.fr
>
parent
a207b4be
Changes
4
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
431 additions
and
58 deletions
+431
-58
lib/bindings/python/rust/llm/kv.rs
lib/bindings/python/rust/llm/kv.rs
+5
-0
lib/llm/src/kv_router.rs
lib/llm/src/kv_router.rs
+5
-0
lib/llm/src/kv_router/approx.rs
lib/llm/src/kv_router/approx.rs
+402
-58
lib/llm/src/kv_router/indexer.rs
lib/llm/src/kv_router/indexer.rs
+19
-0
No files found.
lib/bindings/python/rust/llm/kv.rs
View file @
3ea22fcf
...
@@ -726,10 +726,15 @@ impl ApproxKvIndexer {
...
@@ -726,10 +726,15 @@ impl ApproxKvIndexer {
#[new]
#[new]
fn
new
(
component
:
Component
,
kv_block_size
:
usize
,
ttl_secs
:
f64
)
->
PyResult
<
Self
>
{
fn
new
(
component
:
Component
,
kv_block_size
:
usize
,
ttl_secs
:
f64
)
->
PyResult
<
Self
>
{
let
ttl
=
tokio
::
time
::
Duration
::
from_secs_f64
(
ttl_secs
);
let
ttl
=
tokio
::
time
::
Duration
::
from_secs_f64
(
ttl_secs
);
let
prune_config
=
Some
(
llm_rs
::
kv_router
::
approx
::
PruneConfig
{
max_tree_size
:
2u
size
.pow
(
14
),
// 2** 14 = 16384
prune_target_ratio
:
0.8
,
});
let
inner
=
Arc
::
new
(
llm_rs
::
kv_router
::
approx
::
ApproxKvIndexer
::
new
(
let
inner
=
Arc
::
new
(
llm_rs
::
kv_router
::
approx
::
ApproxKvIndexer
::
new
(
component
.inner
.drt
()
.runtime
()
.child_token
(),
component
.inner
.drt
()
.runtime
()
.child_token
(),
kv_block_size
as
u32
,
kv_block_size
as
u32
,
ttl
,
ttl
,
prune_config
,
));
));
Ok
(
Self
{
inner
})
Ok
(
Self
{
inner
})
}
}
...
...
lib/llm/src/kv_router.rs
View file @
3ea22fcf
...
@@ -36,6 +36,7 @@ pub use prefill_router::PrefillRouter;
...
@@ -36,6 +36,7 @@ pub use prefill_router::PrefillRouter;
use
crate
::{
use
crate
::{
kv_router
::{
kv_router
::{
approx
::
ApproxKvIndexer
,
approx
::
ApproxKvIndexer
,
approx
::
PruneConfig
,
indexer
::{
indexer
::{
KvIndexer
,
KvIndexerInterface
,
KvRouterError
,
OverlapScores
,
RouterEvent
,
KvIndexer
,
KvIndexerInterface
,
KvRouterError
,
OverlapScores
,
RouterEvent
,
compute_block_hash_for_seq
,
compute_seq_hash_for_block
,
compute_block_hash_for_seq
,
compute_seq_hash_for_block
,
...
@@ -259,6 +260,10 @@ impl KvRouter {
...
@@ -259,6 +260,10 @@ impl KvRouter {
cancellation_token
.clone
(),
cancellation_token
.clone
(),
block_size
,
block_size
,
Duration
::
from_secs
(
120
),
Duration
::
from_secs
(
120
),
Some
(
PruneConfig
{
max_tree_size
:
2u
size
.pow
(
14
),
// 2** 14 = 16384
prune_target_ratio
:
0.8
,
}),
))
))
};
};
...
...
lib/llm/src/kv_router/approx.rs
View file @
3ea22fcf
This diff is collapsed.
Click to expand it.
lib/llm/src/kv_router/indexer.rs
View file @
3ea22fcf
...
@@ -68,6 +68,9 @@ pub enum KvRouterError {
...
@@ -68,6 +68,9 @@ pub enum KvRouterError {
#[error(
"Indexer is dropped request"
)]
#[error(
"Indexer is dropped request"
)]
IndexerDroppedRequest
,
IndexerDroppedRequest
,
#[error(
"Prune operation failed: {0}"
)]
PruneFailed
(
String
),
}
}
/// Errors that can occur during KV Cache Event processing.
/// Errors that can occur during KV Cache Event processing.
...
@@ -235,6 +238,8 @@ pub struct RadixTree {
...
@@ -235,6 +238,8 @@ pub struct RadixTree {
lookup
:
HashMap
<
WorkerWithDpRank
,
HashMap
<
ExternalSequenceBlockHash
,
SharedRadixBlock
>>
,
lookup
:
HashMap
<
WorkerWithDpRank
,
HashMap
<
ExternalSequenceBlockHash
,
SharedRadixBlock
>>
,
/// The time buffer the radix tree should check when considering frequence of block accesses
/// The time buffer the radix tree should check when considering frequence of block accesses
expiration_duration
:
Option
<
Duration
>
,
expiration_duration
:
Option
<
Duration
>
,
/// The tree current size.
current_size
:
usize
,
}
}
impl
Default
for
RadixTree
{
impl
Default
for
RadixTree
{
...
@@ -254,6 +259,7 @@ impl RadixTree {
...
@@ -254,6 +259,7 @@ impl RadixTree {
root
:
Rc
::
new
(
RefCell
::
new
(
RadixBlock
::
new
())),
root
:
Rc
::
new
(
RefCell
::
new
(
RadixBlock
::
new
())),
lookup
:
HashMap
::
new
(),
lookup
:
HashMap
::
new
(),
expiration_duration
,
expiration_duration
,
current_size
:
0
,
}
}
}
}
...
@@ -380,6 +386,9 @@ impl RadixTree {
...
@@ -380,6 +386,9 @@ impl RadixTree {
.children
.children
.insert
(
block_id
.tokens_hash
,
new_block
.clone
());
.insert
(
block_id
.tokens_hash
,
new_block
.clone
());
// increment the current size when creating a new block
self
.current_size
=
self
.current_size
.saturating_add
(
1
);
new_block
new_block
}
}
};
};
...
@@ -428,6 +437,9 @@ impl RadixTree {
...
@@ -428,6 +437,9 @@ impl RadixTree {
if
guard
.workers
.is_empty
()
{
if
guard
.workers
.is_empty
()
{
// if no workers are using this block, that is true for all children
// if no workers are using this block, that is true for all children
guard
.children
.clear
();
guard
.children
.clear
();
// Decrement the current size when removing the last worker from a node
self
.current_size
=
self
.current_size
.saturating_sub
(
1
);
}
}
// remove the block from the lookup table
// remove the block from the lookup table
worker_lookup
.remove
(
&
block
);
worker_lookup
.remove
(
&
block
);
...
@@ -460,6 +472,9 @@ impl RadixTree {
...
@@ -460,6 +472,9 @@ impl RadixTree {
// If no workers are using this block, that is true for all children
// If no workers are using this block, that is true for all children
if
block
.borrow
()
.workers
.is_empty
()
{
if
block
.borrow
()
.workers
.is_empty
()
{
block
.borrow_mut
()
.children
.clear
();
block
.borrow_mut
()
.children
.clear
();
// Decrement the current size when removing the last worker from a node
self
.current_size
=
self
.current_size
.saturating_sub
(
1
);
}
}
});
});
...
@@ -560,6 +575,10 @@ impl RadixTree {
...
@@ -560,6 +575,10 @@ impl RadixTree {
events
events
}
}
pub
fn
current_size
(
&
self
)
->
usize
{
self
.current_size
}
}
}
/// Metrics for the KV Indexer.
/// Metrics for the KV Indexer.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment