Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
bba70a41
Unverified
Commit
bba70a41
authored
Mar 14, 2026
by
Thomas Montfort
Committed by
GitHub
Mar 14, 2026
Browse files
feat: standalone KV indexer runtime integration (#7295)
parent
3718da8c
Changes
24
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
121 additions
and
17 deletions
+121
-17
lib/llm/src/discovery/watcher.rs
lib/llm/src/discovery/watcher.rs
+3
-2
lib/llm/src/kv_router.rs
lib/llm/src/kv_router.rs
+52
-15
lib/llm/src/kv_router/prefill_router.rs
lib/llm/src/kv_router/prefill_router.rs
+1
-0
lib/llm/src/kv_router/remote_indexer.rs
lib/llm/src/kv_router/remote_indexer.rs
+65
-0
No files found.
lib/llm/src/discovery/watcher.rs
View file @
bba70a41
...
@@ -462,8 +462,9 @@ impl ModelWatcher {
...
@@ -462,8 +462,9 @@ impl ModelWatcher {
.kv_chooser_for
(
.kv_chooser_for
(
&
endpoint
,
&
endpoint
,
card
.kv_cache_block_size
,
card
.kv_cache_block_size
,
Some
(
self
.router_config.kv_router_config
),
Some
(
self
.router_config.kv_router_config
.clone
()
),
WORKER_TYPE_DECODE
,
// This is the decode router
WORKER_TYPE_DECODE
,
// This is the decode router
Some
(
card
.display_name
.clone
()),
)
)
.await
?
,
.await
?
,
)
)
...
@@ -482,7 +483,7 @@ impl ModelWatcher {
...
@@ -482,7 +483,7 @@ impl ModelWatcher {
.register_prefill_router
(
&
model_name
,
&
namespace
)
.register_prefill_router
(
&
model_name
,
&
namespace
)
.map
(|
rx
|
{
.map
(|
rx
|
{
// Create prefill-specific config with track_active_blocks disabled
// Create prefill-specific config with track_active_blocks disabled
let
mut
prefill_config
=
self
.router_config.kv_router_config
;
let
mut
prefill_config
=
self
.router_config.kv_router_config
.clone
()
;
prefill_config
.router_track_active_blocks
=
false
;
prefill_config
.router_track_active_blocks
=
false
;
PrefillRouter
::
new
(
PrefillRouter
::
new
(
...
...
lib/llm/src/kv_router.rs
View file @
bba70a41
...
@@ -38,6 +38,7 @@ pub mod publisher;
...
@@ -38,6 +38,7 @@ pub mod publisher;
pub
mod
push_router
;
pub
mod
push_router
;
pub
mod
queue
;
pub
mod
queue
;
pub
mod
recorder
;
pub
mod
recorder
;
pub
mod
remote_indexer
;
pub
mod
scheduler
;
pub
mod
scheduler
;
pub
mod
sequence
;
pub
mod
sequence
;
pub
mod
subscriber
;
pub
mod
subscriber
;
...
@@ -58,6 +59,7 @@ use crate::{
...
@@ -58,6 +59,7 @@ use crate::{
RouterResponse
,
TokensWithHashes
,
WorkerId
,
WorkerWithDpRank
,
RouterResponse
,
TokensWithHashes
,
WorkerId
,
WorkerWithDpRank
,
compute_block_hash_for_seq
,
compute_block_hash_for_seq
,
},
},
remote_indexer
::
RemoteIndexer
,
scheduler
::{
KvScheduler
,
PotentialLoad
},
scheduler
::{
KvScheduler
,
PotentialLoad
},
sequence
::{
SequenceError
,
SequenceRequest
},
sequence
::{
SequenceError
,
SequenceRequest
},
},
},
...
@@ -73,7 +75,7 @@ use std::collections::HashSet;
...
@@ -73,7 +75,7 @@ use std::collections::HashSet;
pub
const
KV_METRICS_ENDPOINT
:
&
str
=
"load_metrics"
;
pub
const
KV_METRICS_ENDPOINT
:
&
str
=
"load_metrics"
;
// for metric publishing (push-based)
// for metric publishing (push-based)
pub
const
KV_EVENT_SUBJECT
:
&
str
=
"kv-events"
;
pub
use
dynamo_kv_router
::
protocols
::
KV_EVENT_SUBJECT
;
pub
const
KV_METRICS_SUBJECT
:
&
str
=
"kv_metrics"
;
pub
const
KV_METRICS_SUBJECT
:
&
str
=
"kv_metrics"
;
// for inter-router comms
// for inter-router comms
...
@@ -84,8 +86,8 @@ pub const ACTIVE_SEQUENCES_SUBJECT: &str = "active_sequences_events";
...
@@ -84,8 +86,8 @@ pub const ACTIVE_SEQUENCES_SUBJECT: &str = "active_sequences_events";
pub
const
RADIX_STATE_BUCKET
:
&
str
=
"radix-bucket"
;
pub
const
RADIX_STATE_BUCKET
:
&
str
=
"radix-bucket"
;
pub
const
RADIX_STATE_FILE
:
&
str
=
"radix-state"
;
pub
const
RADIX_STATE_FILE
:
&
str
=
"radix-state"
;
// for standalone indexer query
// for standalone indexer query
— re-export from shared crate
pub
const
KV_INDEXER_QUERY_ENDPOINT
:
&
str
=
"kv_indexer_query"
;
pub
use
dynamo_kv_router
::
indexer
::
KV_INDEXER_QUERY_ENDPOINT
;
// for worker-local kvindexer query
// for worker-local kvindexer query
pub
const
WORKER_KV_INDEXER_BUFFER_SIZE
:
usize
=
1024
;
// store 1024 most recent events in worker buffer
pub
const
WORKER_KV_INDEXER_BUFFER_SIZE
:
usize
=
1024
;
// store 1024 most recent events in worker buffer
...
@@ -133,19 +135,40 @@ pub enum Indexer {
...
@@ -133,19 +135,40 @@ pub enum Indexer {
/// Does not support TTL/pruning.
/// Does not support TTL/pruning.
Concurrent
(
Arc
<
ThreadPoolIndexer
<
ConcurrentRadixTree
>>
),
Concurrent
(
Arc
<
ThreadPoolIndexer
<
ConcurrentRadixTree
>>
),
/// Forwards queries to a standalone KV indexer service via the request plane.
/// The standalone indexer manages its own radix tree and event subscription.
Remote
(
Arc
<
RemoteIndexer
>
),
/// Used when we do not wish to use the indexer at all (e.g., when overlap_score_weight is 0).
/// Used when we do not wish to use the indexer at all (e.g., when overlap_score_weight is 0).
/// Note: This will cause KV events to accumulate in JetStream as we do not regularly purge them.
/// Note: This will cause KV events to accumulate in JetStream as we do not regularly purge them.
None
,
None
,
}
}
impl
Indexer
{
impl
Indexer
{
pub
fn
new
(
pub
async
fn
new
(
component
:
&
dynamo_runtime
::
component
::
Component
,
component
:
&
dynamo_runtime
::
component
::
Component
,
kv_router_config
:
&
KvRouterConfig
,
kv_router_config
:
&
KvRouterConfig
,
block_size
:
u32
,
block_size
:
u32
,
)
->
Self
{
model_name
:
Option
<
String
>
,
)
->
Result
<
Self
>
{
if
kv_router_config
.overlap_score_weight
==
0.0
{
if
kv_router_config
.overlap_score_weight
==
0.0
{
return
Indexer
::
None
;
return
Ok
(
Indexer
::
None
);
}
// Remote indexer: forward queries to a standalone KV indexer service.
if
let
Some
(
ref
indexer_component_name
)
=
kv_router_config
.remote_indexer_component
{
let
model_name
=
model_name
.ok_or_else
(||
{
anyhow
::
anyhow!
(
"model_name is required when remote_indexer_component is configured"
)
})
?
;
tracing
::
info!
(
remote_indexer_component
=
%
indexer_component_name
,
model_name
,
"Using remote KV indexer"
);
let
remote
=
RemoteIndexer
::
new
(
component
,
indexer_component_name
,
model_name
)
.await
?
;
return
Ok
(
Indexer
::
Remote
(
Arc
::
new
(
remote
)));
}
}
// Approximate mode (--no-kv-events): always use single-threaded KvIndexer
// Approximate mode (--no-kv-events): always use single-threaded KvIndexer
...
@@ -159,33 +182,33 @@ impl Indexer {
...
@@ -159,33 +182,33 @@ impl Indexer {
max_tree_size
:
kv_router_config
.router_max_tree_size
,
max_tree_size
:
kv_router_config
.router_max_tree_size
,
prune_target_ratio
:
kv_router_config
.router_prune_target_ratio
,
prune_target_ratio
:
kv_router_config
.router_prune_target_ratio
,
});
});
return
Indexer
::
KvIndexer
(
KvIndexer
::
new_with_frequency
(
return
Ok
(
Indexer
::
KvIndexer
(
KvIndexer
::
new_with_frequency
(
cancellation_token
,
cancellation_token
,
None
,
None
,
block_size
,
block_size
,
kv_indexer_metrics
,
kv_indexer_metrics
,
prune_config
,
prune_config
,
));
))
)
;
}
}
if
kv_router_config
.router_event_threads
>
1
{
if
kv_router_config
.router_event_threads
>
1
{
return
Indexer
::
Concurrent
(
Arc
::
new
(
ThreadPoolIndexer
::
new
(
return
Ok
(
Indexer
::
Concurrent
(
Arc
::
new
(
ThreadPoolIndexer
::
new
(
ConcurrentRadixTree
::
new
(),
ConcurrentRadixTree
::
new
(),
kv_router_config
.router_event_threads
as
usize
,
kv_router_config
.router_event_threads
as
usize
,
block_size
,
block_size
,
)));
)))
)
;
}
}
let
kv_indexer_metrics
=
indexer
::
KvIndexerMetrics
::
from_component
(
component
);
let
kv_indexer_metrics
=
indexer
::
KvIndexerMetrics
::
from_component
(
component
);
let
cancellation_token
=
component
.drt
()
.primary_token
();
let
cancellation_token
=
component
.drt
()
.primary_token
();
Indexer
::
KvIndexer
(
KvIndexer
::
new_with_frequency
(
Ok
(
Indexer
::
KvIndexer
(
KvIndexer
::
new_with_frequency
(
cancellation_token
,
cancellation_token
,
None
,
// expiration_duration for frequency tracking
None
,
// expiration_duration for frequency tracking
block_size
,
block_size
,
kv_indexer_metrics
,
kv_indexer_metrics
,
None
,
None
,
))
))
)
}
}
pub
(
crate
)
async
fn
find_matches
(
pub
(
crate
)
async
fn
find_matches
(
...
@@ -195,6 +218,10 @@ impl Indexer {
...
@@ -195,6 +218,10 @@ impl Indexer {
match
self
{
match
self
{
Indexer
::
KvIndexer
(
indexer
)
=>
indexer
.find_matches
(
sequence
)
.await
,
Indexer
::
KvIndexer
(
indexer
)
=>
indexer
.find_matches
(
sequence
)
.await
,
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.find_matches
(
sequence
)
.await
,
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.find_matches
(
sequence
)
.await
,
Indexer
::
Remote
(
remote
)
=>
remote
.find_matches
(
sequence
)
.await
.map_err
(|
e
|
{
tracing
::
warn!
(
error
=
%
e
,
"Remote indexer query failed"
);
KvRouterError
::
IndexerOffline
}),
Indexer
::
None
=>
Ok
(
OverlapScores
::
new
()),
Indexer
::
None
=>
Ok
(
OverlapScores
::
new
()),
}
}
}
}
...
@@ -203,6 +230,7 @@ impl Indexer {
...
@@ -203,6 +230,7 @@ impl Indexer {
match
self
{
match
self
{
Indexer
::
KvIndexer
(
indexer
)
=>
indexer
.dump_events
()
.await
,
Indexer
::
KvIndexer
(
indexer
)
=>
indexer
.dump_events
()
.await
,
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.dump_events
()
.await
,
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.dump_events
()
.await
,
Indexer
::
Remote
(
_
)
=>
Ok
(
Vec
::
new
()),
Indexer
::
None
=>
{
Indexer
::
None
=>
{
panic!
(
panic!
(
"Cannot dump events: indexer does not exist (is overlap_score_weight set to 0?)"
"Cannot dump events: indexer does not exist (is overlap_score_weight set to 0?)"
...
@@ -226,6 +254,7 @@ impl Indexer {
...
@@ -226,6 +254,7 @@ impl Indexer {
tpi
.process_routing_decision_for_request
(
tokens_with_hashes
,
worker
)
tpi
.process_routing_decision_for_request
(
tokens_with_hashes
,
worker
)
.await
.await
}
}
Indexer
::
Remote
(
_
)
=>
Ok
(()),
Indexer
::
None
=>
Ok
(()),
Indexer
::
None
=>
Ok
(()),
}
}
}
}
...
@@ -238,6 +267,7 @@ impl Indexer {
...
@@ -238,6 +267,7 @@ impl Indexer {
}
}
}
}
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.apply_event
(
event
)
.await
,
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.apply_event
(
event
)
.await
,
Indexer
::
Remote
(
_
)
=>
{}
// standalone indexer gets events directly
Indexer
::
None
=>
{}
Indexer
::
None
=>
{}
}
}
}
}
...
@@ -252,6 +282,7 @@ impl Indexer {
...
@@ -252,6 +282,7 @@ impl Indexer {
Indexer
::
Concurrent
(
tpi
)
=>
{
Indexer
::
Concurrent
(
tpi
)
=>
{
KvIndexerInterface
::
remove_worker
(
tpi
.as_ref
(),
worker_id
)
.await
;
KvIndexerInterface
::
remove_worker
(
tpi
.as_ref
(),
worker_id
)
.await
;
}
}
Indexer
::
Remote
(
_
)
=>
{}
// standalone indexer manages its own workers
Indexer
::
None
=>
{}
Indexer
::
None
=>
{}
}
}
}
}
...
@@ -268,6 +299,7 @@ impl Indexer {
...
@@ -268,6 +299,7 @@ impl Indexer {
resp_rx
.await
.unwrap_or_default
()
resp_rx
.await
.unwrap_or_default
()
}
}
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.backend
()
.get_workers
(),
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.backend
()
.get_workers
(),
Indexer
::
Remote
(
_
)
=>
Vec
::
new
(),
Indexer
::
None
=>
Vec
::
new
(),
Indexer
::
None
=>
Vec
::
new
(),
}
}
}
}
...
@@ -285,6 +317,7 @@ pub struct KvRouter {
...
@@ -285,6 +317,7 @@ pub struct KvRouter {
}
}
impl
KvRouter
{
impl
KvRouter
{
#[allow(clippy::too_many_arguments)]
pub
async
fn
new
(
pub
async
fn
new
(
endpoint
:
Endpoint
,
endpoint
:
Endpoint
,
client
:
Client
,
client
:
Client
,
...
@@ -293,13 +326,14 @@ impl KvRouter {
...
@@ -293,13 +326,14 @@ impl KvRouter {
selector
:
Option
<
Box
<
WorkerSelector
>>
,
selector
:
Option
<
Box
<
WorkerSelector
>>
,
kv_router_config
:
Option
<
KvRouterConfig
>
,
kv_router_config
:
Option
<
KvRouterConfig
>
,
worker_type
:
&
'static
str
,
worker_type
:
&
'static
str
,
model_name
:
Option
<
String
>
,
)
->
Result
<
Self
>
{
)
->
Result
<
Self
>
{
let
kv_router_config
=
kv_router_config
.unwrap_or_default
();
let
kv_router_config
=
kv_router_config
.unwrap_or_default
();
kv_router_config
.validate
()
?
;
kv_router_config
.validate
()
?
;
let
component
=
endpoint
.component
();
let
component
=
endpoint
.component
();
let
cancellation_token
=
component
.drt
()
.primary_token
();
let
cancellation_token
=
component
.drt
()
.primary_token
();
let
indexer
=
Indexer
::
new
(
component
,
&
kv_router_config
,
block_size
)
;
let
indexer
=
Indexer
::
new
(
component
,
&
kv_router_config
,
block_size
,
model_name
)
.await
?
;
// Wait for at least one worker with a known runtime config before starting scheduler
// Wait for at least one worker with a known runtime config before starting scheduler
let
_
=
workers_with_configs
let
_
=
workers_with_configs
...
@@ -319,8 +353,11 @@ impl KvRouter {
...
@@ -319,8 +353,11 @@ impl KvRouter {
)
)
.await
?
;
.await
?
;
// Start KV event subscription if needed (use_kv_events=true and overlap_score_weight>0)
// Start KV event subscription if needed — skip when using a remote indexer
if
kv_router_config
.should_subscribe_to_kv_events
()
{
// (the standalone indexer handles its own event subscription).
if
kv_router_config
.remote_indexer_component
.is_some
()
{
tracing
::
info!
(
"Skipping KV event subscription (using remote indexer)"
);
}
else
if
kv_router_config
.should_subscribe_to_kv_events
()
{
subscriber
::
start_subscriber
(
component
.clone
(),
&
kv_router_config
,
indexer
.clone
())
subscriber
::
start_subscriber
(
component
.clone
(),
&
kv_router_config
,
indexer
.clone
())
.await
?
;
.await
?
;
}
else
{
}
else
{
...
...
lib/llm/src/kv_router/prefill_router.rs
View file @
bba70a41
...
@@ -218,6 +218,7 @@ impl PrefillRouter {
...
@@ -218,6 +218,7 @@ impl PrefillRouter {
kv_cache_block_size
,
kv_cache_block_size
,
kv_router_config
,
kv_router_config
,
WORKER_TYPE_PREFILL
,
WORKER_TYPE_PREFILL
,
Some
(
self
.model_name
.clone
()),
)
)
.await
?
;
.await
?
;
...
...
lib/llm/src/kv_router/remote_indexer.rs
0 → 100644
View file @
bba70a41
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
anyhow
::
Result
;
use
futures
::
StreamExt
;
use
dynamo_runtime
::{
component
::
Component
,
pipeline
::{
ManyOut
,
RouterMode
,
SingleIn
,
network
::
egress
::
push_router
::
PushRouter
},
};
use
dynamo_kv_router
::{
indexer
::{
IndexerQueryRequest
,
IndexerQueryResponse
,
KV_INDEXER_QUERY_ENDPOINT
},
protocols
::{
LocalBlockHash
,
OverlapScores
},
};
/// A remote indexer that queries a standalone KV indexer via the request plane.
///
/// Used by the frontend when `remote_indexer_component` is configured. Instead of
/// maintaining a local radix tree, this forwards `find_matches` queries to the
/// standalone indexer service over the Dynamo request plane.
pub
struct
RemoteIndexer
{
router
:
PushRouter
<
IndexerQueryRequest
,
IndexerQueryResponse
>
,
model_name
:
String
,
namespace
:
String
,
}
impl
RemoteIndexer
{
pub
async
fn
new
(
component
:
&
Component
,
indexer_component_name
:
&
str
,
model_name
:
String
,
)
->
Result
<
Self
>
{
let
namespace
=
component
.namespace
()
.name
();
let
indexer_ns
=
component
.namespace
();
let
indexer_component
=
indexer_ns
.component
(
indexer_component_name
)
?
;
let
endpoint
=
indexer_component
.endpoint
(
KV_INDEXER_QUERY_ENDPOINT
);
let
client
=
endpoint
.client
()
.await
?
;
let
router
=
PushRouter
::
from_client_no_fault_detection
(
client
,
RouterMode
::
RoundRobin
)
.await
?
;
Ok
(
Self
{
router
,
model_name
,
namespace
,
})
}
pub
async
fn
find_matches
(
&
self
,
block_hashes
:
Vec
<
LocalBlockHash
>
)
->
Result
<
OverlapScores
>
{
let
request
=
IndexerQueryRequest
{
model_name
:
self
.model_name
.clone
(),
namespace
:
self
.namespace
.clone
(),
block_hashes
,
};
let
mut
stream
:
ManyOut
<
IndexerQueryResponse
>
=
self
.router
.round_robin
(
SingleIn
::
new
(
request
))
.await
?
;
match
stream
.next
()
.await
{
Some
(
IndexerQueryResponse
::
Scores
(
scores
))
=>
Ok
(
scores
.into
()),
Some
(
IndexerQueryResponse
::
Error
(
msg
))
=>
{
Err
(
anyhow
::
anyhow!
(
"Remote indexer error: {}"
,
msg
))
}
None
=>
Err
(
anyhow
::
anyhow!
(
"Remote indexer returned empty response"
)),
}
}
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment