Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
cdb40ec6
Unverified
Commit
cdb40ec6
authored
Apr 20, 2026
by
Yan Ru Pei
Committed by
GitHub
Apr 20, 2026
Browse files
fix(kv-router): allow unit block size in slot tracking (#8395)
Signed-off-by:
PeaBrane
<
yanrpei@gmail.com
>
parent
e71f1d2b
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
119 additions
and
3 deletions
+119
-3
lib/kv-router/src/indexer/kv_indexer.rs
lib/kv-router/src/indexer/kv_indexer.rs
+2
-0
lib/kv-router/src/indexer/mod.rs
lib/kv-router/src/indexer/mod.rs
+10
-0
lib/kv-router/src/indexer/thread_pool.rs
lib/kv-router/src/indexer/thread_pool.rs
+1
-0
lib/kv-router/src/sequences/multi_worker.rs
lib/kv-router/src/sequences/multi_worker.rs
+105
-2
lib/kv-router/src/sequences/single.rs
lib/kv-router/src/sequences/single.rs
+1
-1
No files found.
lib/kv-router/src/indexer/kv_indexer.rs
View file @
cdb40ec6
...
...
@@ -132,6 +132,8 @@ impl KvIndexer {
metrics
:
Arc
<
KvIndexerMetrics
>
,
prune_config
:
Option
<
PruneConfig
>
,
)
->
Self
{
super
::
warn_on_unit_block_size
(
"single"
,
kv_block_size
);
let
(
event_tx
,
event_rx
)
=
mpsc
::
channel
::
<
RouterEvent
>
(
16384
);
let
(
match_tx
,
match_rx
)
=
mpsc
::
channel
::
<
MatchRequest
>
(
128
);
let
(
remove_worker_tx
,
remove_worker_rx
)
=
mpsc
::
channel
::
<
WorkerId
>
(
16
);
...
...
lib/kv-router/src/indexer/mod.rs
View file @
cdb40ec6
...
...
@@ -31,6 +31,16 @@
//!
//! This module provides a scalable and efficient way to manage and retrieve data blocks for LLM inference, leveraging a global KV cache to optimize performance.
fn
warn_on_unit_block_size
(
indexer_type
:
&
'static
str
,
kv_block_size
:
u32
)
{
if
kv_block_size
==
1
{
tracing
::
warn!
(
indexer_type
,
kv_block_size
,
"block_size=1 is supported for KV indexers, but consider avoiding it because KV events may saturate network bandwidth"
,
);
}
}
mod
kv_indexer
;
mod
local
;
mod
metrics
;
...
...
lib/kv-router/src/indexer/thread_pool.rs
View file @
cdb40ec6
...
...
@@ -99,6 +99,7 @@ impl<T: SyncIndexer> ThreadPoolIndexer<T> {
metrics
:
Option
<
Arc
<
KvIndexerMetrics
>>
,
)
->
Self
{
assert
!
(
num_workers
>
0
,
"Number of workers must be greater than 0"
);
super
::
warn_on_unit_block_size
(
"thread_pool"
,
kv_block_size
);
let
backend
=
Arc
::
new
(
backend
);
let
mut
worker_event_senders
=
Vec
::
new
();
...
...
lib/kv-router/src/sequences/multi_worker.rs
View file @
cdb40ec6
...
...
@@ -136,7 +136,7 @@ impl<P: SequencePublisher + 'static> ActiveSequencesMultiWorker<P> {
router_id
:
u64
,
worker_type
:
&
'static
str
,
)
->
Self
{
assert
!
(
block_size
>
1
,
"block_size must be greater than
1
"
);
assert
!
(
block_size
>
0
,
"block_size must be greater than
0
"
);
let
(
remote_state_updates
,
_
)
=
watch
::
channel
(());
let
workers
=
WorkerTable
::
new
(
block_size
,
&
dp_range
);
let
prompt_registry
=
PromptRegistry
::
new
(
workers
.workers
());
...
...
@@ -975,6 +975,19 @@ mod tests {
)
}
fn
make_multi_sequences_with_block_size
(
block_size
:
usize
,
)
->
ActiveSequencesMultiWorker
<
NoopSequencePublisher
>
{
ActiveSequencesMultiWorker
::
new
(
NoopSequencePublisher
,
block_size
,
HashMap
::
from
([(
1_u64
,
(
0_u32
,
1_u32
)),
(
2_u64
,
(
0_u32
,
1_u32
))]),
false
,
0
,
"test"
,
)
}
fn
naive_potential_loads
(
sequences
:
&
ActiveSequencesMultiWorker
<
NoopSequencePublisher
>
,
token_sequence
:
Option
<&
[
SequenceHash
]
>
,
...
...
@@ -1013,9 +1026,17 @@ mod tests {
}
fn
seq_hashes_for_tokens
(
tokens
:
&
[
u32
],
lora_name
:
Option
<&
str
>
)
->
Vec
<
SequenceHash
>
{
seq_hashes_for_tokens_with_block_size
(
tokens
,
4
,
lora_name
)
}
fn
seq_hashes_for_tokens_with_block_size
(
tokens
:
&
[
u32
],
block_size
:
u32
,
lora_name
:
Option
<&
str
>
,
)
->
Vec
<
SequenceHash
>
{
let
block_hashes
=
compute_block_hash_for_seq
(
tokens
,
4
,
block_size
,
BlockHashOptions
{
lora_name
,
..
Default
::
default
()
...
...
@@ -1208,6 +1229,88 @@ mod tests {
);
}
#[test]
fn
unit_block_size_repeated_tokens_preserve_membership_and_trim
()
{
let
sequences
=
make_multi_sequences_with_block_size
(
1
);
let
worker_a
=
WorkerWithDpRank
::
new
(
1
,
0
);
let
worker_b
=
WorkerWithDpRank
::
new
(
2
,
0
);
let
decay_now
=
Instant
::
now
();
let
prompt_a
=
seq_hashes_for_tokens_with_block_size
(
&
[
7_u32
,
7
,
7
],
1
,
None
);
let
prompt_b
=
seq_hashes_for_tokens_with_block_size
(
&
[
7_u32
,
7
,
8
],
1
,
None
);
sequences
.add_request
(
SequenceRequest
{
request_id
:
"req-a"
.to_string
(),
token_sequence
:
Some
(
prompt_a
.clone
()),
track_prefill_tokens
:
false
,
expected_output_tokens
:
None
,
prefill_load_hint
:
None
,
worker
:
worker_a
,
lora_name
:
None
,
},
decay_now
,
)
.unwrap
();
sequences
.add_request
(
SequenceRequest
{
request_id
:
"req-b"
.to_string
(),
token_sequence
:
Some
(
prompt_b
.clone
()),
track_prefill_tokens
:
false
,
expected_output_tokens
:
None
,
prefill_load_hint
:
None
,
worker
:
worker_b
,
lora_name
:
None
,
},
decay_now
,
)
.unwrap
();
let
expected
=
naive_potential_loads
(
&
sequences
,
Some
(
&
prompt_b
),
3
,
&
OverlapScores
::
default
(),
false
,
decay_now
,
);
let
actual
=
sequences
.potential_blocks_and_tokens_with_prefill_tracking
(
Some
(
&
prompt_b
),
3
,
OverlapScores
::
default
(),
false
,
decay_now
,
);
assert_eq!
(
actual
,
expected
);
assert_eq!
(
actual
.0
.get
(
&
worker_a
)
.copied
(),
Some
(
4
));
assert_eq!
(
actual
.0
.get
(
&
worker_b
)
.copied
(),
Some
(
3
));
sequences
.free
(
&
"req-b"
.to_string
(),
decay_now
)
.unwrap
();
let
expected_after_free
=
naive_potential_loads
(
&
sequences
,
Some
(
&
prompt_b
),
3
,
&
OverlapScores
::
default
(),
false
,
decay_now
,
);
let
actual_after_free
=
sequences
.potential_blocks_and_tokens_with_prefill_tracking
(
Some
(
&
prompt_b
),
3
,
OverlapScores
::
default
(),
false
,
decay_now
,
);
assert_eq!
(
actual_after_free
,
expected_after_free
);
assert_eq!
(
actual_after_free
.0
.get
(
&
worker_a
)
.copied
(),
Some
(
4
));
assert_eq!
(
actual_after_free
.0
.get
(
&
worker_b
)
.copied
(),
Some
(
3
));
sequences
.free
(
&
"req-a"
.to_string
(),
decay_now
)
.unwrap
();
sequences
.assert_completely_drained
(
decay_now
);
}
#[tokio::test(start_paused
=
true
)]
async
fn
force_expiry_clears_block_membership_index
()
{
let
sequences
=
make_multi_sequences
();
...
...
lib/kv-router/src/sequences/single.rs
View file @
cdb40ec6
...
...
@@ -117,7 +117,7 @@ pub struct ActiveSequences {
impl
ActiveSequences
{
/// Create a new SharedSequenceManager instance
pub
(
super
)
fn
new
(
block_size
:
usize
)
->
Self
{
assert
!
(
block_size
>
1
,
"block_size must be greater than
1
"
);
assert
!
(
block_size
>
0
,
"block_size must be greater than
0
"
);
Self
{
requests
:
HashMap
::
new
(),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment