Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
bba70a41
Unverified
Commit
bba70a41
authored
Mar 14, 2026
by
Thomas Montfort
Committed by
GitHub
Mar 14, 2026
Browse files
feat: standalone KV indexer runtime integration (#7295)
parent
3718da8c
Changes
24
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
121 additions
and
17 deletions
+121
-17
lib/llm/src/discovery/watcher.rs
lib/llm/src/discovery/watcher.rs
+3
-2
lib/llm/src/kv_router.rs
lib/llm/src/kv_router.rs
+52
-15
lib/llm/src/kv_router/prefill_router.rs
lib/llm/src/kv_router/prefill_router.rs
+1
-0
lib/llm/src/kv_router/remote_indexer.rs
lib/llm/src/kv_router/remote_indexer.rs
+65
-0
No files found.
lib/llm/src/discovery/watcher.rs
View file @
bba70a41
...
...
@@ -462,8 +462,9 @@ impl ModelWatcher {
.kv_chooser_for
(
&
endpoint
,
card
.kv_cache_block_size
,
Some
(
self
.router_config.kv_router_config
),
Some
(
self
.router_config.kv_router_config
.clone
()
),
WORKER_TYPE_DECODE
,
// This is the decode router
Some
(
card
.display_name
.clone
()),
)
.await
?
,
)
...
...
@@ -482,7 +483,7 @@ impl ModelWatcher {
.register_prefill_router
(
&
model_name
,
&
namespace
)
.map
(|
rx
|
{
// Create prefill-specific config with track_active_blocks disabled
let
mut
prefill_config
=
self
.router_config.kv_router_config
;
let
mut
prefill_config
=
self
.router_config.kv_router_config
.clone
()
;
prefill_config
.router_track_active_blocks
=
false
;
PrefillRouter
::
new
(
...
...
lib/llm/src/kv_router.rs
View file @
bba70a41
...
...
@@ -38,6 +38,7 @@ pub mod publisher;
pub
mod
push_router
;
pub
mod
queue
;
pub
mod
recorder
;
pub
mod
remote_indexer
;
pub
mod
scheduler
;
pub
mod
sequence
;
pub
mod
subscriber
;
...
...
@@ -58,6 +59,7 @@ use crate::{
RouterResponse
,
TokensWithHashes
,
WorkerId
,
WorkerWithDpRank
,
compute_block_hash_for_seq
,
},
remote_indexer
::
RemoteIndexer
,
scheduler
::{
KvScheduler
,
PotentialLoad
},
sequence
::{
SequenceError
,
SequenceRequest
},
},
...
...
@@ -73,7 +75,7 @@ use std::collections::HashSet;
pub
const
KV_METRICS_ENDPOINT
:
&
str
=
"load_metrics"
;
// for metric publishing (push-based)
pub
const
KV_EVENT_SUBJECT
:
&
str
=
"kv-events"
;
pub
use
dynamo_kv_router
::
protocols
::
KV_EVENT_SUBJECT
;
pub
const
KV_METRICS_SUBJECT
:
&
str
=
"kv_metrics"
;
// for inter-router comms
...
...
@@ -84,8 +86,8 @@ pub const ACTIVE_SEQUENCES_SUBJECT: &str = "active_sequences_events";
pub
const
RADIX_STATE_BUCKET
:
&
str
=
"radix-bucket"
;
pub
const
RADIX_STATE_FILE
:
&
str
=
"radix-state"
;
// for standalone indexer query
pub
const
KV_INDEXER_QUERY_ENDPOINT
:
&
str
=
"kv_indexer_query"
;
// for standalone indexer query
— re-export from shared crate
pub
use
dynamo_kv_router
::
indexer
::
KV_INDEXER_QUERY_ENDPOINT
;
// for worker-local kvindexer query
pub
const
WORKER_KV_INDEXER_BUFFER_SIZE
:
usize
=
1024
;
// store 1024 most recent events in worker buffer
...
...
@@ -133,19 +135,40 @@ pub enum Indexer {
/// Does not support TTL/pruning.
Concurrent
(
Arc
<
ThreadPoolIndexer
<
ConcurrentRadixTree
>>
),
/// Forwards queries to a standalone KV indexer service via the request plane.
/// The standalone indexer manages its own radix tree and event subscription.
Remote
(
Arc
<
RemoteIndexer
>
),
/// Used when we do not wish to use the indexer at all (e.g., when overlap_score_weight is 0).
/// Note: This will cause KV events to accumulate in JetStream as we do not regularly purge them.
None
,
}
impl
Indexer
{
pub
fn
new
(
pub
async
fn
new
(
component
:
&
dynamo_runtime
::
component
::
Component
,
kv_router_config
:
&
KvRouterConfig
,
block_size
:
u32
,
)
->
Self
{
model_name
:
Option
<
String
>
,
)
->
Result
<
Self
>
{
if
kv_router_config
.overlap_score_weight
==
0.0
{
return
Indexer
::
None
;
return
Ok
(
Indexer
::
None
);
}
// Remote indexer: forward queries to a standalone KV indexer service.
if
let
Some
(
ref
indexer_component_name
)
=
kv_router_config
.remote_indexer_component
{
let
model_name
=
model_name
.ok_or_else
(||
{
anyhow
::
anyhow!
(
"model_name is required when remote_indexer_component is configured"
)
})
?
;
tracing
::
info!
(
remote_indexer_component
=
%
indexer_component_name
,
model_name
,
"Using remote KV indexer"
);
let
remote
=
RemoteIndexer
::
new
(
component
,
indexer_component_name
,
model_name
)
.await
?
;
return
Ok
(
Indexer
::
Remote
(
Arc
::
new
(
remote
)));
}
// Approximate mode (--no-kv-events): always use single-threaded KvIndexer
...
...
@@ -159,33 +182,33 @@ impl Indexer {
max_tree_size
:
kv_router_config
.router_max_tree_size
,
prune_target_ratio
:
kv_router_config
.router_prune_target_ratio
,
});
return
Indexer
::
KvIndexer
(
KvIndexer
::
new_with_frequency
(
return
Ok
(
Indexer
::
KvIndexer
(
KvIndexer
::
new_with_frequency
(
cancellation_token
,
None
,
block_size
,
kv_indexer_metrics
,
prune_config
,
));
))
)
;
}
if
kv_router_config
.router_event_threads
>
1
{
return
Indexer
::
Concurrent
(
Arc
::
new
(
ThreadPoolIndexer
::
new
(
return
Ok
(
Indexer
::
Concurrent
(
Arc
::
new
(
ThreadPoolIndexer
::
new
(
ConcurrentRadixTree
::
new
(),
kv_router_config
.router_event_threads
as
usize
,
block_size
,
)));
)))
)
;
}
let
kv_indexer_metrics
=
indexer
::
KvIndexerMetrics
::
from_component
(
component
);
let
cancellation_token
=
component
.drt
()
.primary_token
();
Indexer
::
KvIndexer
(
KvIndexer
::
new_with_frequency
(
Ok
(
Indexer
::
KvIndexer
(
KvIndexer
::
new_with_frequency
(
cancellation_token
,
None
,
// expiration_duration for frequency tracking
block_size
,
kv_indexer_metrics
,
None
,
))
))
)
}
pub
(
crate
)
async
fn
find_matches
(
...
...
@@ -195,6 +218,10 @@ impl Indexer {
match
self
{
Indexer
::
KvIndexer
(
indexer
)
=>
indexer
.find_matches
(
sequence
)
.await
,
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.find_matches
(
sequence
)
.await
,
Indexer
::
Remote
(
remote
)
=>
remote
.find_matches
(
sequence
)
.await
.map_err
(|
e
|
{
tracing
::
warn!
(
error
=
%
e
,
"Remote indexer query failed"
);
KvRouterError
::
IndexerOffline
}),
Indexer
::
None
=>
Ok
(
OverlapScores
::
new
()),
}
}
...
...
@@ -203,6 +230,7 @@ impl Indexer {
match
self
{
Indexer
::
KvIndexer
(
indexer
)
=>
indexer
.dump_events
()
.await
,
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.dump_events
()
.await
,
Indexer
::
Remote
(
_
)
=>
Ok
(
Vec
::
new
()),
Indexer
::
None
=>
{
panic!
(
"Cannot dump events: indexer does not exist (is overlap_score_weight set to 0?)"
...
...
@@ -226,6 +254,7 @@ impl Indexer {
tpi
.process_routing_decision_for_request
(
tokens_with_hashes
,
worker
)
.await
}
Indexer
::
Remote
(
_
)
=>
Ok
(()),
Indexer
::
None
=>
Ok
(()),
}
}
...
...
@@ -238,6 +267,7 @@ impl Indexer {
}
}
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.apply_event
(
event
)
.await
,
Indexer
::
Remote
(
_
)
=>
{}
// standalone indexer gets events directly
Indexer
::
None
=>
{}
}
}
...
...
@@ -252,6 +282,7 @@ impl Indexer {
Indexer
::
Concurrent
(
tpi
)
=>
{
KvIndexerInterface
::
remove_worker
(
tpi
.as_ref
(),
worker_id
)
.await
;
}
Indexer
::
Remote
(
_
)
=>
{}
// standalone indexer manages its own workers
Indexer
::
None
=>
{}
}
}
...
...
@@ -268,6 +299,7 @@ impl Indexer {
resp_rx
.await
.unwrap_or_default
()
}
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.backend
()
.get_workers
(),
Indexer
::
Remote
(
_
)
=>
Vec
::
new
(),
Indexer
::
None
=>
Vec
::
new
(),
}
}
...
...
@@ -285,6 +317,7 @@ pub struct KvRouter {
}
impl
KvRouter
{
#[allow(clippy::too_many_arguments)]
pub
async
fn
new
(
endpoint
:
Endpoint
,
client
:
Client
,
...
...
@@ -293,13 +326,14 @@ impl KvRouter {
selector
:
Option
<
Box
<
WorkerSelector
>>
,
kv_router_config
:
Option
<
KvRouterConfig
>
,
worker_type
:
&
'static
str
,
model_name
:
Option
<
String
>
,
)
->
Result
<
Self
>
{
let
kv_router_config
=
kv_router_config
.unwrap_or_default
();
kv_router_config
.validate
()
?
;
let
component
=
endpoint
.component
();
let
cancellation_token
=
component
.drt
()
.primary_token
();
let
indexer
=
Indexer
::
new
(
component
,
&
kv_router_config
,
block_size
)
;
let
indexer
=
Indexer
::
new
(
component
,
&
kv_router_config
,
block_size
,
model_name
)
.await
?
;
// Wait for at least one worker with a known runtime config before starting scheduler
let
_
=
workers_with_configs
...
...
@@ -319,8 +353,11 @@ impl KvRouter {
)
.await
?
;
// Start KV event subscription if needed (use_kv_events=true and overlap_score_weight>0)
if
kv_router_config
.should_subscribe_to_kv_events
()
{
// Start KV event subscription if needed — skip when using a remote indexer
// (the standalone indexer handles its own event subscription).
if
kv_router_config
.remote_indexer_component
.is_some
()
{
tracing
::
info!
(
"Skipping KV event subscription (using remote indexer)"
);
}
else
if
kv_router_config
.should_subscribe_to_kv_events
()
{
subscriber
::
start_subscriber
(
component
.clone
(),
&
kv_router_config
,
indexer
.clone
())
.await
?
;
}
else
{
...
...
lib/llm/src/kv_router/prefill_router.rs
View file @
bba70a41
...
...
@@ -218,6 +218,7 @@ impl PrefillRouter {
kv_cache_block_size
,
kv_router_config
,
WORKER_TYPE_PREFILL
,
Some
(
self
.model_name
.clone
()),
)
.await
?
;
...
...
lib/llm/src/kv_router/remote_indexer.rs
0 → 100644
View file @
bba70a41
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
anyhow
::
Result
;
use
futures
::
StreamExt
;
use
dynamo_runtime
::{
component
::
Component
,
pipeline
::{
ManyOut
,
RouterMode
,
SingleIn
,
network
::
egress
::
push_router
::
PushRouter
},
};
use
dynamo_kv_router
::{
indexer
::{
IndexerQueryRequest
,
IndexerQueryResponse
,
KV_INDEXER_QUERY_ENDPOINT
},
protocols
::{
LocalBlockHash
,
OverlapScores
},
};
/// A remote indexer that queries a standalone KV indexer via the request plane.
///
/// Used by the frontend when `remote_indexer_component` is configured. Instead of
/// maintaining a local radix tree, this forwards `find_matches` queries to the
/// standalone indexer service over the Dynamo request plane.
pub
struct
RemoteIndexer
{
router
:
PushRouter
<
IndexerQueryRequest
,
IndexerQueryResponse
>
,
model_name
:
String
,
namespace
:
String
,
}
impl
RemoteIndexer
{
pub
async
fn
new
(
component
:
&
Component
,
indexer_component_name
:
&
str
,
model_name
:
String
,
)
->
Result
<
Self
>
{
let
namespace
=
component
.namespace
()
.name
();
let
indexer_ns
=
component
.namespace
();
let
indexer_component
=
indexer_ns
.component
(
indexer_component_name
)
?
;
let
endpoint
=
indexer_component
.endpoint
(
KV_INDEXER_QUERY_ENDPOINT
);
let
client
=
endpoint
.client
()
.await
?
;
let
router
=
PushRouter
::
from_client_no_fault_detection
(
client
,
RouterMode
::
RoundRobin
)
.await
?
;
Ok
(
Self
{
router
,
model_name
,
namespace
,
})
}
pub
async
fn
find_matches
(
&
self
,
block_hashes
:
Vec
<
LocalBlockHash
>
)
->
Result
<
OverlapScores
>
{
let
request
=
IndexerQueryRequest
{
model_name
:
self
.model_name
.clone
(),
namespace
:
self
.namespace
.clone
(),
block_hashes
,
};
let
mut
stream
:
ManyOut
<
IndexerQueryResponse
>
=
self
.router
.round_robin
(
SingleIn
::
new
(
request
))
.await
?
;
match
stream
.next
()
.await
{
Some
(
IndexerQueryResponse
::
Scores
(
scores
))
=>
Ok
(
scores
.into
()),
Some
(
IndexerQueryResponse
::
Error
(
msg
))
=>
{
Err
(
anyhow
::
anyhow!
(
"Remote indexer error: {}"
,
msg
))
}
None
=>
Err
(
anyhow
::
anyhow!
(
"Remote indexer returned empty response"
)),
}
}
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment