Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
bba70a41
"examples/custom_backend/vscode:/vscode.git/clone" did not exist on "c6b59045792cbf834ff9e9ae7a5828cab48c453b"
Unverified
Commit
bba70a41
authored
Mar 14, 2026
by
Thomas Montfort
Committed by
GitHub
Mar 14, 2026
Browse files
feat: standalone KV indexer runtime integration (#7295)
parent
3718da8c
Changes
24
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
121 additions
and
17 deletions
+121
-17
lib/llm/src/discovery/watcher.rs
lib/llm/src/discovery/watcher.rs
+3
-2
lib/llm/src/kv_router.rs
lib/llm/src/kv_router.rs
+52
-15
lib/llm/src/kv_router/prefill_router.rs
lib/llm/src/kv_router/prefill_router.rs
+1
-0
lib/llm/src/kv_router/remote_indexer.rs
lib/llm/src/kv_router/remote_indexer.rs
+65
-0
No files found.
lib/llm/src/discovery/watcher.rs
View file @
bba70a41
...
...
@@ -462,8 +462,9 @@ impl ModelWatcher {
.kv_chooser_for
(
&
endpoint
,
card
.kv_cache_block_size
,
Some
(
self
.router_config.kv_router_config
),
Some
(
self
.router_config.kv_router_config
.clone
()
),
WORKER_TYPE_DECODE
,
// This is the decode router
Some
(
card
.display_name
.clone
()),
)
.await
?
,
)
...
...
@@ -482,7 +483,7 @@ impl ModelWatcher {
.register_prefill_router
(
&
model_name
,
&
namespace
)
.map
(|
rx
|
{
// Create prefill-specific config with track_active_blocks disabled
let
mut
prefill_config
=
self
.router_config.kv_router_config
;
let
mut
prefill_config
=
self
.router_config.kv_router_config
.clone
()
;
prefill_config
.router_track_active_blocks
=
false
;
PrefillRouter
::
new
(
...
...
lib/llm/src/kv_router.rs
View file @
bba70a41
...
...
@@ -38,6 +38,7 @@ pub mod publisher;
pub
mod
push_router
;
pub
mod
queue
;
pub
mod
recorder
;
pub
mod
remote_indexer
;
pub
mod
scheduler
;
pub
mod
sequence
;
pub
mod
subscriber
;
...
...
@@ -58,6 +59,7 @@ use crate::{
RouterResponse
,
TokensWithHashes
,
WorkerId
,
WorkerWithDpRank
,
compute_block_hash_for_seq
,
},
remote_indexer
::
RemoteIndexer
,
scheduler
::{
KvScheduler
,
PotentialLoad
},
sequence
::{
SequenceError
,
SequenceRequest
},
},
...
...
@@ -73,7 +75,7 @@ use std::collections::HashSet;
pub
const
KV_METRICS_ENDPOINT
:
&
str
=
"load_metrics"
;
// for metric publishing (push-based)
pub
const
KV_EVENT_SUBJECT
:
&
str
=
"kv-events"
;
pub
use
dynamo_kv_router
::
protocols
::
KV_EVENT_SUBJECT
;
pub
const
KV_METRICS_SUBJECT
:
&
str
=
"kv_metrics"
;
// for inter-router comms
...
...
@@ -84,8 +86,8 @@ pub const ACTIVE_SEQUENCES_SUBJECT: &str = "active_sequences_events";
pub
const
RADIX_STATE_BUCKET
:
&
str
=
"radix-bucket"
;
pub
const
RADIX_STATE_FILE
:
&
str
=
"radix-state"
;
// for standalone indexer query
pub
const
KV_INDEXER_QUERY_ENDPOINT
:
&
str
=
"kv_indexer_query"
;
// for standalone indexer query
— re-export from shared crate
pub
use
dynamo_kv_router
::
indexer
::
KV_INDEXER_QUERY_ENDPOINT
;
// for worker-local kvindexer query
pub
const
WORKER_KV_INDEXER_BUFFER_SIZE
:
usize
=
1024
;
// store 1024 most recent events in worker buffer
...
...
@@ -133,19 +135,40 @@ pub enum Indexer {
/// Does not support TTL/pruning.
Concurrent
(
Arc
<
ThreadPoolIndexer
<
ConcurrentRadixTree
>>
),
/// Forwards queries to a standalone KV indexer service via the request plane.
/// The standalone indexer manages its own radix tree and event subscription.
Remote
(
Arc
<
RemoteIndexer
>
),
/// Used when we do not wish to use the indexer at all (e.g., when overlap_score_weight is 0).
/// Note: This will cause KV events to accumulate in JetStream as we do not regularly purge them.
None
,
}
impl
Indexer
{
pub
fn
new
(
pub
async
fn
new
(
component
:
&
dynamo_runtime
::
component
::
Component
,
kv_router_config
:
&
KvRouterConfig
,
block_size
:
u32
,
)
->
Self
{
model_name
:
Option
<
String
>
,
)
->
Result
<
Self
>
{
if
kv_router_config
.overlap_score_weight
==
0.0
{
return
Indexer
::
None
;
return
Ok
(
Indexer
::
None
);
}
// Remote indexer: forward queries to a standalone KV indexer service.
if
let
Some
(
ref
indexer_component_name
)
=
kv_router_config
.remote_indexer_component
{
let
model_name
=
model_name
.ok_or_else
(||
{
anyhow
::
anyhow!
(
"model_name is required when remote_indexer_component is configured"
)
})
?
;
tracing
::
info!
(
remote_indexer_component
=
%
indexer_component_name
,
model_name
,
"Using remote KV indexer"
);
let
remote
=
RemoteIndexer
::
new
(
component
,
indexer_component_name
,
model_name
)
.await
?
;
return
Ok
(
Indexer
::
Remote
(
Arc
::
new
(
remote
)));
}
// Approximate mode (--no-kv-events): always use single-threaded KvIndexer
...
...
@@ -159,33 +182,33 @@ impl Indexer {
max_tree_size
:
kv_router_config
.router_max_tree_size
,
prune_target_ratio
:
kv_router_config
.router_prune_target_ratio
,
});
return
Indexer
::
KvIndexer
(
KvIndexer
::
new_with_frequency
(
return
Ok
(
Indexer
::
KvIndexer
(
KvIndexer
::
new_with_frequency
(
cancellation_token
,
None
,
block_size
,
kv_indexer_metrics
,
prune_config
,
));
))
)
;
}
if
kv_router_config
.router_event_threads
>
1
{
return
Indexer
::
Concurrent
(
Arc
::
new
(
ThreadPoolIndexer
::
new
(
return
Ok
(
Indexer
::
Concurrent
(
Arc
::
new
(
ThreadPoolIndexer
::
new
(
ConcurrentRadixTree
::
new
(),
kv_router_config
.router_event_threads
as
usize
,
block_size
,
)));
)))
)
;
}
let
kv_indexer_metrics
=
indexer
::
KvIndexerMetrics
::
from_component
(
component
);
let
cancellation_token
=
component
.drt
()
.primary_token
();
Indexer
::
KvIndexer
(
KvIndexer
::
new_with_frequency
(
Ok
(
Indexer
::
KvIndexer
(
KvIndexer
::
new_with_frequency
(
cancellation_token
,
None
,
// expiration_duration for frequency tracking
block_size
,
kv_indexer_metrics
,
None
,
))
))
)
}
pub
(
crate
)
async
fn
find_matches
(
...
...
@@ -195,6 +218,10 @@ impl Indexer {
match
self
{
Indexer
::
KvIndexer
(
indexer
)
=>
indexer
.find_matches
(
sequence
)
.await
,
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.find_matches
(
sequence
)
.await
,
Indexer
::
Remote
(
remote
)
=>
remote
.find_matches
(
sequence
)
.await
.map_err
(|
e
|
{
tracing
::
warn!
(
error
=
%
e
,
"Remote indexer query failed"
);
KvRouterError
::
IndexerOffline
}),
Indexer
::
None
=>
Ok
(
OverlapScores
::
new
()),
}
}
...
...
@@ -203,6 +230,7 @@ impl Indexer {
match
self
{
Indexer
::
KvIndexer
(
indexer
)
=>
indexer
.dump_events
()
.await
,
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.dump_events
()
.await
,
Indexer
::
Remote
(
_
)
=>
Ok
(
Vec
::
new
()),
Indexer
::
None
=>
{
panic!
(
"Cannot dump events: indexer does not exist (is overlap_score_weight set to 0?)"
...
...
@@ -226,6 +254,7 @@ impl Indexer {
tpi
.process_routing_decision_for_request
(
tokens_with_hashes
,
worker
)
.await
}
Indexer
::
Remote
(
_
)
=>
Ok
(()),
Indexer
::
None
=>
Ok
(()),
}
}
...
...
@@ -238,6 +267,7 @@ impl Indexer {
}
}
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.apply_event
(
event
)
.await
,
Indexer
::
Remote
(
_
)
=>
{}
// standalone indexer gets events directly
Indexer
::
None
=>
{}
}
}
...
...
@@ -252,6 +282,7 @@ impl Indexer {
Indexer
::
Concurrent
(
tpi
)
=>
{
KvIndexerInterface
::
remove_worker
(
tpi
.as_ref
(),
worker_id
)
.await
;
}
Indexer
::
Remote
(
_
)
=>
{}
// standalone indexer manages its own workers
Indexer
::
None
=>
{}
}
}
...
...
@@ -268,6 +299,7 @@ impl Indexer {
resp_rx
.await
.unwrap_or_default
()
}
Indexer
::
Concurrent
(
tpi
)
=>
tpi
.backend
()
.get_workers
(),
Indexer
::
Remote
(
_
)
=>
Vec
::
new
(),
Indexer
::
None
=>
Vec
::
new
(),
}
}
...
...
@@ -285,6 +317,7 @@ pub struct KvRouter {
}
impl
KvRouter
{
#[allow(clippy::too_many_arguments)]
pub
async
fn
new
(
endpoint
:
Endpoint
,
client
:
Client
,
...
...
@@ -293,13 +326,14 @@ impl KvRouter {
selector
:
Option
<
Box
<
WorkerSelector
>>
,
kv_router_config
:
Option
<
KvRouterConfig
>
,
worker_type
:
&
'static
str
,
model_name
:
Option
<
String
>
,
)
->
Result
<
Self
>
{
let
kv_router_config
=
kv_router_config
.unwrap_or_default
();
kv_router_config
.validate
()
?
;
let
component
=
endpoint
.component
();
let
cancellation_token
=
component
.drt
()
.primary_token
();
let
indexer
=
Indexer
::
new
(
component
,
&
kv_router_config
,
block_size
)
;
let
indexer
=
Indexer
::
new
(
component
,
&
kv_router_config
,
block_size
,
model_name
)
.await
?
;
// Wait for at least one worker with a known runtime config before starting scheduler
let
_
=
workers_with_configs
...
...
@@ -319,8 +353,11 @@ impl KvRouter {
)
.await
?
;
// Start KV event subscription if needed (use_kv_events=true and overlap_score_weight>0)
if
kv_router_config
.should_subscribe_to_kv_events
()
{
// Start KV event subscription if needed — skip when using a remote indexer
// (the standalone indexer handles its own event subscription).
if
kv_router_config
.remote_indexer_component
.is_some
()
{
tracing
::
info!
(
"Skipping KV event subscription (using remote indexer)"
);
}
else
if
kv_router_config
.should_subscribe_to_kv_events
()
{
subscriber
::
start_subscriber
(
component
.clone
(),
&
kv_router_config
,
indexer
.clone
())
.await
?
;
}
else
{
...
...
lib/llm/src/kv_router/prefill_router.rs
View file @
bba70a41
...
...
@@ -218,6 +218,7 @@ impl PrefillRouter {
kv_cache_block_size
,
kv_router_config
,
WORKER_TYPE_PREFILL
,
Some
(
self
.model_name
.clone
()),
)
.await
?
;
...
...
lib/llm/src/kv_router/remote_indexer.rs
0 → 100644
View file @
bba70a41
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
anyhow
::
Result
;
use
futures
::
StreamExt
;
use
dynamo_runtime
::{
component
::
Component
,
pipeline
::{
ManyOut
,
RouterMode
,
SingleIn
,
network
::
egress
::
push_router
::
PushRouter
},
};
use
dynamo_kv_router
::{
indexer
::{
IndexerQueryRequest
,
IndexerQueryResponse
,
KV_INDEXER_QUERY_ENDPOINT
},
protocols
::{
LocalBlockHash
,
OverlapScores
},
};
/// A remote indexer that queries a standalone KV indexer via the request plane.
///
/// Used by the frontend when `remote_indexer_component` is configured. Instead of
/// maintaining a local radix tree, this forwards `find_matches` queries to the
/// standalone indexer service over the Dynamo request plane.
pub
struct
RemoteIndexer
{
router
:
PushRouter
<
IndexerQueryRequest
,
IndexerQueryResponse
>
,
model_name
:
String
,
namespace
:
String
,
}
impl
RemoteIndexer
{
pub
async
fn
new
(
component
:
&
Component
,
indexer_component_name
:
&
str
,
model_name
:
String
,
)
->
Result
<
Self
>
{
let
namespace
=
component
.namespace
()
.name
();
let
indexer_ns
=
component
.namespace
();
let
indexer_component
=
indexer_ns
.component
(
indexer_component_name
)
?
;
let
endpoint
=
indexer_component
.endpoint
(
KV_INDEXER_QUERY_ENDPOINT
);
let
client
=
endpoint
.client
()
.await
?
;
let
router
=
PushRouter
::
from_client_no_fault_detection
(
client
,
RouterMode
::
RoundRobin
)
.await
?
;
Ok
(
Self
{
router
,
model_name
,
namespace
,
})
}
pub
async
fn
find_matches
(
&
self
,
block_hashes
:
Vec
<
LocalBlockHash
>
)
->
Result
<
OverlapScores
>
{
let
request
=
IndexerQueryRequest
{
model_name
:
self
.model_name
.clone
(),
namespace
:
self
.namespace
.clone
(),
block_hashes
,
};
let
mut
stream
:
ManyOut
<
IndexerQueryResponse
>
=
self
.router
.round_robin
(
SingleIn
::
new
(
request
))
.await
?
;
match
stream
.next
()
.await
{
Some
(
IndexerQueryResponse
::
Scores
(
scores
))
=>
Ok
(
scores
.into
()),
Some
(
IndexerQueryResponse
::
Error
(
msg
))
=>
{
Err
(
anyhow
::
anyhow!
(
"Remote indexer error: {}"
,
msg
))
}
None
=>
Err
(
anyhow
::
anyhow!
(
"Remote indexer returned empty response"
)),
}
}
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment