Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
45be2fdc
Unverified
Commit
45be2fdc
authored
Mar 17, 2026
by
Yan Ru Pei
Committed by
GitHub
Mar 17, 2026
Browse files
chore(kv-router): drop easy llm facade reexports (#7474)
Signed-off-by:
PeaBrane
<
yanrpei@gmail.com
>
parent
0ac9ef9c
Changes
28
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
79 additions
and
95 deletions
+79
-95
Cargo.lock
Cargo.lock
+1
-0
lib/bindings/c/Cargo.toml
lib/bindings/c/Cargo.toml
+1
-0
lib/bindings/c/src/lib.rs
lib/bindings/c/src/lib.rs
+7
-5
lib/bindings/python/Cargo.toml
lib/bindings/python/Cargo.toml
+3
-3
lib/bindings/python/rust/lib.rs
lib/bindings/python/rust/lib.rs
+2
-1
lib/bindings/python/rust/llm/entrypoint.rs
lib/bindings/python/rust/llm/entrypoint.rs
+1
-1
lib/bindings/python/rust/llm/kv.rs
lib/bindings/python/rust/llm/kv.rs
+16
-25
lib/llm/benches/kv_router_bench.rs
lib/llm/benches/kv_router_bench.rs
+1
-1
lib/llm/src/discovery/model_manager.rs
lib/llm/src/discovery/model_manager.rs
+2
-4
lib/llm/src/discovery/runtime_configs.rs
lib/llm/src/discovery/runtime_configs.rs
+1
-1
lib/llm/src/discovery/worker_monitor.rs
lib/llm/src/discovery/worker_monitor.rs
+1
-1
lib/llm/src/entrypoint.rs
lib/llm/src/entrypoint.rs
+2
-1
lib/llm/src/kv_router.rs
lib/llm/src/kv_router.rs
+13
-23
lib/llm/src/kv_router/config.rs
lib/llm/src/kv_router/config.rs
+0
-4
lib/llm/src/kv_router/jetstream.rs
lib/llm/src/kv_router/jetstream.rs
+2
-2
lib/llm/src/kv_router/prefill_router.rs
lib/llm/src/kv_router/prefill_router.rs
+5
-2
lib/llm/src/kv_router/publisher.rs
lib/llm/src/kv_router/publisher.rs
+13
-13
lib/llm/src/kv_router/push_router.rs
lib/llm/src/kv_router/push_router.rs
+1
-1
lib/llm/src/kv_router/recorder.rs
lib/llm/src/kv_router/recorder.rs
+3
-4
lib/llm/src/kv_router/scheduler.rs
lib/llm/src/kv_router/scheduler.rs
+4
-3
No files found.
Cargo.lock
View file @
45be2fdc
...
...
@@ -4067,6 +4067,7 @@ dependencies = [
"anyhow",
"async-once-cell",
"cbindgen",
"dynamo-kv-router",
"dynamo-llm",
"dynamo-runtime",
"libc",
...
...
lib/bindings/c/Cargo.toml
View file @
45be2fdc
...
...
@@ -31,6 +31,7 @@ cbindgen = "0.27"
[dependencies]
dynamo-llm
=
{
path
=
"../../llm"
}
dynamo-kv-router
=
{
path
=
"../../kv-router"
}
dynamo-runtime
=
{
path
=
"../../runtime"
}
anyhow
=
{
workspace
=
true
}
...
...
lib/bindings/c/src/lib.rs
View file @
45be2fdc
...
...
@@ -11,7 +11,11 @@ use std::sync::Arc;
use
std
::
sync
::
atomic
::{
AtomicU32
,
Ordering
};
use
std
::
time
::
Duration
;
use
dynamo_llm
::
kv_router
::{
protocols
::
*
,
publisher
::
KvEventPublisher
};
use
dynamo_kv_router
::{
config
::{
KvRouterConfig
,
RouterConfigOverride
},
protocols
::
*
,
};
use
dynamo_llm
::
kv_router
::
publisher
::
KvEventPublisher
;
use
dynamo_llm
::
preprocessor
::
OpenAIPreprocessor
;
use
dynamo_runtime
::
discovery
::{
DiscoveryQuery
,
hash_pod_name
};
use
dynamo_runtime
::{
DistributedRuntime
,
Worker
};
...
...
@@ -19,9 +23,7 @@ use dynamo_runtime::{DistributedRuntime, Worker};
use
dynamo_runtime
::
Runtime
;
use
dynamo_llm
::
discovery
::{
ModelManager
,
WORKER_TYPE_DECODE
};
use
dynamo_llm
::
kv_router
::
KvRouterConfig
;
use
dynamo_llm
::
kv_router
::
protocols
::
WorkerWithDpRank
;
use
dynamo_llm
::
kv_router
::{
KvRouter
,
PrefillRouter
,
RouterConfigOverride
};
use
dynamo_llm
::
kv_router
::{
KvRouter
,
PrefillRouter
};
use
dynamo_runtime
::
pipeline
::
RouterMode
;
use
std
::
collections
::
HashSet
;
...
...
@@ -433,7 +435,7 @@ impl RouterHandles {
async
fn
query_prefill_worker
(
&
self
,
tokens
:
&
[
u32
],
block_mm_infos
:
Option
<&
[
Option
<
dynamo_
llm
::
kv_router
::
protocols
::
BlockExtraInfo
>
]
>
,
block_mm_infos
:
Option
<&
[
Option
<
dynamo_kv_router
::
protocols
::
BlockExtraInfo
>
]
>
,
update_states
:
bool
,
lora_name
:
Option
<
String
>
,
priority_jump
:
f64
,
...
...
lib/bindings/python/Cargo.toml
View file @
45be2fdc
...
...
@@ -24,7 +24,7 @@ crate-type = ["cdylib", "rlib"]
[features]
default
=
[]
media-ffmpeg
=
["dynamo-llm/media-ffmpeg"]
kv-indexer
=
[
"dep:dynamo-kv-router"
,
"dep:clap"
,
"dep:tracing-subscriber"
]
kv-indexer
=
[
"dep:clap"
,
"dep:tracing-subscriber"
]
kv-indexer-runtime
=
[
"kv-indexer"
,
"dynamo-kv-router/indexer-runtime"
]
kv-indexer-metrics
=
[
"kv-indexer"
,
"dynamo-kv-router/metrics"
]
...
...
@@ -46,8 +46,8 @@ tokio-stream = { version = "0" }
tokio-util
=
{
version
=
"0.7"
,
features
=
["rt"]
}
tracing
=
{
version
=
"0"
}
# kv-indexer
(optional)
dynamo-kv-router
=
{
path
=
"../../kv-router"
,
features
=
["standalone-indexer"]
,
optional
=
true
}
# kv-indexer
/ shared kv-router types
dynamo-kv-router
=
{
path
=
"../../kv-router"
,
features
=
["standalone-indexer"]
}
clap
=
{
version
=
"4.5"
,
features
=
["derive"]
,
optional
=
true
}
tracing-subscriber
=
{
version
=
"0.3"
,
features
=
["env-filter"]
,
optional
=
true
}
...
...
lib/bindings/python/rust/lib.rs
View file @
45be2fdc
...
...
@@ -34,8 +34,9 @@ use dynamo_runtime::{
traits
::
DistributedRuntimeProvider
,
};
use
dynamo_kv_router
::
config
::
KvRouterConfig
;
use
dynamo_llm
::
entrypoint
::
RouterConfig
;
use
dynamo_llm
::{
self
as
llm_rs
};
use
dynamo_llm
::{
entrypoint
::
RouterConfig
,
kv_router
::
KvRouterConfig
};
use
crate
::
llm
::
local_model
::
ModelRuntimeConfig
;
use
crate
::
llm
::
preprocessor
::{
MediaDecoder
,
MediaFetcher
};
...
...
lib/bindings/python/rust/llm/entrypoint.rs
View file @
45be2fdc
...
...
@@ -10,12 +10,12 @@ use std::sync::Arc;
use
pyo3
::{
exceptions
::
PyException
,
prelude
::
*
};
use
pyo3_async_runtimes
::
TaskLocals
;
use
dynamo_kv_router
::
config
::
KvRouterConfig
as
RsKvRouterConfig
;
use
dynamo_llm
::
discovery
::
LoadThresholdConfig
as
RsLoadThresholdConfig
;
use
dynamo_llm
::
entrypoint
::
ChatEngineFactoryCallback
;
use
dynamo_llm
::
entrypoint
::
EngineConfig
as
RsEngineConfig
;
use
dynamo_llm
::
entrypoint
::
RouterConfig
as
RsRouterConfig
;
use
dynamo_llm
::
entrypoint
::
input
::
Input
;
use
dynamo_llm
::
kv_router
::
KvRouterConfig
as
RsKvRouterConfig
;
use
dynamo_llm
::
local_model
::
DEFAULT_HTTP_PORT
;
use
dynamo_llm
::
local_model
::{
LocalModel
,
LocalModelBuilder
};
use
dynamo_llm
::
mocker
::
make_mocker_engine
;
...
...
lib/bindings/python/rust/llm/kv.rs
View file @
45be2fdc
...
...
@@ -13,17 +13,18 @@ use super::*;
use
crate
::
Endpoint
;
#[cfg(feature
=
"kv-indexer"
)]
use
clap
::
Parser
;
use
dynamo_kv_router
::
config
::{
KvRouterConfig
,
RouterConfigOverride
};
use
dynamo_kv_router
::
protocols
::
compute_block_hash_for_seq
;
use
dynamo_kv_router
::
protocols
::
*
;
#[cfg(feature
=
"kv-indexer-runtime"
)]
use
dynamo_kv_router
::
standalone_indexer
::
RuntimeConfig
;
#[cfg(feature
=
"kv-indexer"
)]
use
dynamo_kv_router
::
standalone_indexer
::{
self
,
IndexerConfig
};
use
llm_rs
::
kv_router
::
protocols
::
compute_block_hash_for_seq
;
use
rs
::
pipeline
::{
AsyncEngine
,
SingleIn
};
use
rs
::
protocols
::
annotated
::
Annotated
as
RsAnnotated
;
use
tracing
;
use
llm_rs
::
kv_router
::
KvPushRouter
as
RsKvPushRouter
;
use
llm_rs
::
kv_router
::
protocols
::
*
;
use
llm_rs
::
kv_router
::
publisher
::{
KvEventSourceConfig
,
create_stored_blocks
};
use
llm_rs
::
protocols
::
common
::
timing
::
RequestTracker
;
use
llm_rs
::
protocols
::
common
::{
OutputOptions
,
SamplingOptions
,
StopConditions
};
...
...
@@ -389,7 +390,7 @@ impl KvEventPublisher {
#[pyclass]
#[derive(Clone)]
pub
(
crate
)
struct
OverlapScores
{
inner
:
llm_rs
::
kv_router
::
protocols
::
OverlapScores
,
inner
:
dynamo_
kv_router
::
protocols
::
OverlapScores
,
}
#[pymethods]
...
...
@@ -413,9 +414,9 @@ impl OverlapScores {
#[derive(Debug)]
enum
RadixTreeRequest
{
FindMatches
{
local_block_hashes
:
Vec
<
llm_rs
::
kv_router
::
protocols
::
LocalBlockHash
>
,
local_block_hashes
:
Vec
<
LocalBlockHash
>
,
early_exit
:
bool
,
response_tx
:
mpsc
::
SyncSender
<
llm_rs
::
kv_router
::
protocols
::
OverlapScores
>
,
response_tx
:
mpsc
::
SyncSender
<
dynamo_
kv_router
::
protocols
::
OverlapScores
>
,
},
ApplyEvent
{
worker_id
:
WorkerId
,
...
...
@@ -431,7 +432,7 @@ enum RadixTreeRequest {
response_tx
:
mpsc
::
SyncSender
<
()
>
,
},
DumpTreeAsEvents
{
response_tx
:
mpsc
::
SyncSender
<
Vec
<
llm_rs
::
kv_router
::
protocols
::
RouterEvent
>>
,
response_tx
:
mpsc
::
SyncSender
<
Vec
<
RouterEvent
>>
,
},
Shutdown
,
}
...
...
@@ -454,7 +455,7 @@ impl RadixTree {
// Spawn dedicated thread with simplified sync processing
std
::
thread
::
spawn
(
move
||
{
let
mut
radix_tree
=
llm_rs
::
kv_router
::
indexer
::
RadixTree
::
new_with_frequency
(
expiration_duration
);
dynamo_
kv_router
::
indexer
::
RadixTree
::
new_with_frequency
(
expiration_duration
);
loop
{
match
request_rx
.recv
()
{
...
...
@@ -485,12 +486,8 @@ impl RadixTree {
)
->
PyResult
<
OverlapScores
>
{
let
(
response_tx
,
response_rx
)
=
mpsc
::
sync_channel
(
1
);
let
local_block_hashes
=
py
.allow_threads
(||
{
sequence
.into_iter
()
.map
(
llm_rs
::
kv_router
::
protocols
::
LocalBlockHash
)
.collect
()
});
let
local_block_hashes
=
py
.allow_threads
(||
sequence
.into_iter
()
.map
(
LocalBlockHash
)
.collect
());
let
request
=
RadixTreeRequest
::
FindMatches
{
local_block_hashes
,
...
...
@@ -623,7 +620,7 @@ impl RadixTree {
impl
RadixTree
{
fn
handle_request
(
radix_tree
:
&
mut
llm_rs
::
kv_router
::
indexer
::
RadixTree
,
radix_tree
:
&
mut
dynamo_
kv_router
::
indexer
::
RadixTree
,
request
:
RadixTreeRequest
,
)
{
match
request
{
...
...
@@ -640,15 +637,9 @@ impl RadixTree {
kv_cache_event_bytes
,
response_tx
,
}
=>
{
let
result
=
match
serde_json
::
from_slice
::
<
llm_rs
::
kv_router
::
protocols
::
KvCacheEvent
,
>
(
&
kv_cache_event_bytes
)
{
let
result
=
match
serde_json
::
from_slice
::
<
KvCacheEvent
>
(
&
kv_cache_event_bytes
)
{
Ok
(
kv_cache_event
)
=>
{
let
router_event
=
llm_rs
::
kv_router
::
protocols
::
RouterEvent
::
new
(
worker_id
,
kv_cache_event
,
);
let
router_event
=
RouterEvent
::
new
(
worker_id
,
kv_cache_event
);
match
radix_tree
.apply_event
(
router_event
)
{
Ok
(
_
)
=>
Ok
(()),
Err
(
e
)
=>
Err
(
PyErr
::
new
::
<
pyo3
::
exceptions
::
PyRuntimeError
,
_
>
(
...
...
@@ -705,7 +696,7 @@ impl Drop for RadixTree {
async
fn
create_kv_router_from_endpoint
(
endpoint
:
&
Endpoint
,
block_size
:
usize
,
kv_router_config
:
Option
<
llm_rs
::
kv_router
::
KvRouterConfig
>
,
kv_router_config
:
Option
<
KvRouterConfig
>
,
)
->
Result
<
Arc
<
llm_rs
::
kv_router
::
KvRouter
>
,
PyErr
>
{
// Create ModelManager and use it to create KvRouter (ensures registration)
let
model_manager
=
Arc
::
new
(
llm_rs
::
discovery
::
ModelManager
::
new
());
...
...
@@ -964,7 +955,7 @@ impl KvRouter {
OutputOptions
::
default
()
};
let
router_config_override
:
Option
<
llm_rs
::
kv_router
::
RouterConfigOverride
>
=
let
router_config_override
:
Option
<
RouterConfigOverride
>
=
if
let
Some
(
obj
)
=
router_config_override
{
Some
(
depythonize
(
obj
.bind
(
py
))
.map_err
(
to_pyerr
)
?
)
}
else
{
...
...
@@ -1068,7 +1059,7 @@ impl KvRouter {
lora_name
:
Option
<
String
>
,
)
->
PyResult
<
Bound
<
'p
,
PyAny
>>
{
let
router_config_override
=
if
let
Some
(
obj
)
=
router_config_override
{
let
override_config
:
llm_rs
::
kv_router
::
RouterConfigOverride
=
let
override_config
:
RouterConfigOverride
=
depythonize
(
obj
.bind
(
py
))
.map_err
(
to_pyerr
)
?
;
Some
(
override_config
)
}
else
{
...
...
lib/llm/benches/kv_router_bench.rs
View file @
45be2fdc
...
...
@@ -31,7 +31,7 @@ use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use
tokenizers
::
Tokenizer
;
use
tokio
::
sync
::{
Mutex
,
Semaphore
};
use
dynamo_
llm
::
kv_router
::
protocols
::{
use
dynamo_kv_router
::
protocols
::{
ExternalSequenceBlockHash
,
KvCacheEvent
,
KvCacheEventData
,
KvCacheStoreData
,
KvCacheStoredBlockData
,
LocalBlockHash
,
RouterEvent
,
WorkerId
,
compute_hash
,
compute_seq_hash_for_block
,
...
...
lib/llm/src/discovery/model_manager.rs
View file @
45be2fdc
...
...
@@ -4,6 +4,7 @@
use
std
::{
collections
::
HashSet
,
sync
::
Arc
};
use
dashmap
::{
DashMap
,
mapref
::
entry
::
Entry
};
use
dynamo_kv_router
::{
config
::
KvRouterConfig
,
protocols
::
WorkerId
};
use
tokio
::
sync
::
oneshot
;
use
super
::
worker_monitor
::
LoadThresholdConfig
;
...
...
@@ -17,10 +18,7 @@ use dynamo_runtime::{
};
use
crate
::{
kv_router
::{
KvRouter
,
KvRouterConfig
,
protocols
::
WorkerId
,
router_endpoint_id
,
scheduler
::
DefaultWorkerSelector
,
},
kv_router
::{
KvRouter
,
router_endpoint_id
,
scheduler
::
DefaultWorkerSelector
},
local_model
::
runtime_config
::
DisaggregatedEndpoint
,
model_card
::
ModelDeploymentCard
,
types
::{
...
...
lib/llm/src/discovery/runtime_configs.rs
View file @
45be2fdc
...
...
@@ -9,9 +9,9 @@ use dynamo_runtime::component::Endpoint;
use
dynamo_runtime
::
discovery
::{
DiscoveryQuery
,
watch_and_extract_field
};
use
dynamo_runtime
::
prelude
::
DistributedRuntimeProvider
;
use
crate
::
kv_router
::
protocols
::
WorkerId
;
use
crate
::
local_model
::
runtime_config
::
ModelRuntimeConfig
;
use
crate
::
model_card
::
ModelDeploymentCard
;
use
dynamo_kv_router
::
protocols
::
WorkerId
;
/// Type alias for the runtime config watch receiver.
pub
type
RuntimeConfigWatch
=
watch
::
Receiver
<
HashMap
<
WorkerId
,
ModelRuntimeConfig
>>
;
...
...
lib/llm/src/discovery/worker_monitor.rs
View file @
45be2fdc
...
...
@@ -9,6 +9,7 @@ use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering};
use
tokio
::
sync
::
Notify
;
use
dashmap
::
DashMap
;
use
dynamo_kv_router
::
protocols
::
ActiveLoad
;
use
serde
::{
Deserialize
,
Serialize
};
use
crate
::
http
::
service
::
metrics
::{
...
...
@@ -17,7 +18,6 @@ use crate::http::service::metrics::{
};
use
crate
::
kv_router
::
KV_METRICS_SUBJECT
;
use
crate
::
kv_router
::
metrics
::
WORKER_LOAD_METRICS
;
use
crate
::
kv_router
::
protocols
::
ActiveLoad
;
use
crate
::
model_card
::
ModelDeploymentCard
;
use
dynamo_runtime
::
component
::
Client
;
use
dynamo_runtime
::
discovery
::{
DiscoveryQuery
,
watch_and_extract_field
};
...
...
lib/llm/src/entrypoint.rs
View file @
45be2fdc
...
...
@@ -12,11 +12,12 @@ use std::future::Future;
use
std
::
pin
::
Pin
;
use
std
::
sync
::
Arc
;
use
dynamo_kv_router
::
config
::
KvRouterConfig
;
use
dynamo_runtime
::{
discovery
::
ModelCardInstanceId
,
pipeline
::
RouterMode
};
use
crate
::{
backend
::
ExecutionContext
,
discovery
::
LoadThresholdConfig
,
engines
::
StreamingEngine
,
kv_router
::
KvRouterConfig
,
local_model
::
LocalModel
,
model_card
::
ModelDeploymentCard
,
local_model
::
LocalModel
,
model_card
::
ModelDeploymentCard
,
types
::
openai
::
chat_completions
::
OpenAIChatCompletionsStreamingEngine
,
};
...
...
lib/llm/src/kv_router.rs
View file @
45be2fdc
...
...
@@ -5,7 +5,17 @@ use std::sync::Arc;
use
std
::
time
::{
Duration
,
Instant
};
use
anyhow
::
Result
;
use
dynamo_kv_router
::{
ConcurrentRadixTree
,
ThreadPoolIndexer
};
use
dynamo_kv_router
::{
ConcurrentRadixTree
,
ThreadPoolIndexer
,
approx
::
PruneConfig
,
config
::{
KvRouterConfig
,
RouterConfigOverride
},
indexer
::{
GetWorkersRequest
,
KvIndexer
,
KvIndexerInterface
,
KvIndexerMetrics
,
KvRouterError
},
protocols
::
KV_EVENT_SUBJECT
,
protocols
::{
BlockExtraInfo
,
DpRank
,
LocalBlockHash
,
OverlapScores
,
RouterEvent
,
RouterRequest
,
RouterResponse
,
TokensWithHashes
,
WorkerId
,
WorkerWithDpRank
,
compute_block_hash_for_seq
,
},
};
use
dynamo_runtime
::{
component
::{
Client
,
Endpoint
},
discovery
::
DiscoveryQuery
,
...
...
@@ -22,15 +32,7 @@ use tokio::sync::oneshot;
use
tracing
::
Instrument
;
use
validator
::
Validate
;
// Re-export from dynamo-kv-router crate
pub
use
dynamo_kv_router
::
approx
;
pub
use
dynamo_kv_router
::
indexer
;
pub
use
dynamo_kv_router
::
protocols
;
pub
use
dynamo_kv_router
::
scheduling
;
pub
use
dynamo_kv_router
::
selector
;
pub
mod
cache_control
;
pub
mod
config
;
mod
jetstream
;
pub
mod
metrics
;
pub
mod
prefill_router
;
...
...
@@ -45,20 +47,12 @@ pub mod subscriber;
pub
mod
worker_query
;
pub
use
cache_control
::{
CacheControlClient
,
spawn_pin_prefix
};
pub
use
config
::{
KvRouterConfig
,
RouterConfigOverride
};
pub
use
prefill_router
::
PrefillRouter
;
pub
use
push_router
::{
DirectRoutingRouter
,
KvPushRouter
};
use
crate
::{
discovery
::
RuntimeConfigWatch
,
kv_router
::{
approx
::
PruneConfig
,
indexer
::{
GetWorkersRequest
,
KvIndexer
,
KvIndexerInterface
,
KvRouterError
},
protocols
::{
BlockExtraInfo
,
DpRank
,
LocalBlockHash
,
OverlapScores
,
RouterEvent
,
RouterRequest
,
RouterResponse
,
TokensWithHashes
,
WorkerId
,
WorkerWithDpRank
,
compute_block_hash_for_seq
,
},
remote_indexer
::
RemoteIndexer
,
scheduler
::{
KvScheduler
,
PotentialLoad
},
sequence
::{
SequenceError
,
SequenceRequest
},
...
...
@@ -75,7 +69,6 @@ use std::collections::HashSet;
pub
const
KV_METRICS_ENDPOINT
:
&
str
=
"load_metrics"
;
// for metric publishing (push-based)
pub
use
dynamo_kv_router
::
protocols
::
KV_EVENT_SUBJECT
;
pub
const
KV_METRICS_SUBJECT
:
&
str
=
"kv_metrics"
;
// for inter-router comms
...
...
@@ -86,9 +79,6 @@ pub const ACTIVE_SEQUENCES_SUBJECT: &str = "active_sequences_events";
pub
const
RADIX_STATE_BUCKET
:
&
str
=
"radix-bucket"
;
pub
const
RADIX_STATE_FILE
:
&
str
=
"radix-state"
;
// for standalone indexer query — re-export from shared crate
pub
use
dynamo_kv_router
::
indexer
::
KV_INDEXER_QUERY_ENDPOINT
;
// for worker-local kvindexer query
pub
const
WORKER_KV_INDEXER_BUFFER_SIZE
:
usize
=
1024
;
// store 1024 most recent events in worker buffer
...
...
@@ -175,7 +165,7 @@ impl Indexer {
// with TTL/pruning regardless of event_threads, since updates come from
// routing decisions only, not live KV events from workers.
if
!
kv_router_config
.use_kv_events
{
let
kv_indexer_metrics
=
indexer
::
KvIndexerMetrics
::
from_component
(
component
);
let
kv_indexer_metrics
=
KvIndexerMetrics
::
from_component
(
component
);
let
cancellation_token
=
component
.drt
()
.primary_token
();
let
prune_config
=
Some
(
PruneConfig
{
ttl
:
Duration
::
from_secs_f64
(
kv_router_config
.router_ttl_secs
),
...
...
@@ -199,7 +189,7 @@ impl Indexer {
))));
}
let
kv_indexer_metrics
=
indexer
::
KvIndexerMetrics
::
from_component
(
component
);
let
kv_indexer_metrics
=
KvIndexerMetrics
::
from_component
(
component
);
let
cancellation_token
=
component
.drt
()
.primary_token
();
Ok
(
Indexer
::
KvIndexer
(
KvIndexer
::
new_with_frequency
(
...
...
lib/llm/src/kv_router/config.rs
deleted
100644 → 0
View file @
0ac9ef9c
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
pub
use
dynamo_kv_router
::
config
::{
KvRouterConfig
,
RouterConfigOverride
};
lib/llm/src/kv_router/jetstream.rs
View file @
45be2fdc
...
...
@@ -5,6 +5,7 @@ use std::collections::HashSet;
use
std
::
time
::
Duration
;
use
anyhow
::
Result
;
use
dynamo_kv_router
::{
config
::
KvRouterConfig
,
protocols
::
RouterEvent
};
use
dynamo_runtime
::{
component
::
Component
,
config
::
environment_names
::
nats
as
env_nats
,
...
...
@@ -17,8 +18,7 @@ use rand::Rng;
use
tokio_util
::
sync
::
CancellationToken
;
use
crate
::
kv_router
::{
Indexer
,
KV_EVENT_SUBJECT
,
KvRouterConfig
,
RADIX_STATE_BUCKET
,
RADIX_STATE_FILE
,
protocols
::
RouterEvent
,
router_discovery_query
,
Indexer
,
KV_EVENT_SUBJECT
,
RADIX_STATE_BUCKET
,
RADIX_STATE_FILE
,
router_discovery_query
,
};
/// Helper function to create a KV stream name from a component and subject.
...
...
lib/llm/src/kv_router/prefill_router.rs
View file @
45be2fdc
...
...
@@ -10,6 +10,10 @@ use tokio::sync::{OwnedSemaphorePermit, oneshot};
use
tokio_util
::
sync
::
CancellationToken
;
use
tracing
::
Instrument
;
use
dynamo_kv_router
::{
config
::{
KvRouterConfig
,
RouterConfigOverride
},
protocols
::{
BlockExtraInfo
,
WorkerId
},
};
use
dynamo_runtime
::{
component
::
Endpoint
,
pipeline
::{
...
...
@@ -21,8 +25,7 @@ use dynamo_runtime::{
use
crate
::{
discovery
::
ModelManager
,
kv_router
::
protocols
::
WorkerId
,
kv_router
::{
KvPushRouter
,
KvRouterConfig
,
RouterConfigOverride
,
protocols
::
BlockExtraInfo
},
kv_router
::
KvPushRouter
,
protocols
::
common
::
llm_backend
::{
LLMEngineOutput
,
PreprocessedRequest
},
protocols
::
common
::
preprocessor
::{
BootstrapInfo
,
PrefillResult
},
protocols
::
common
::
timing
::{
RequestPhase
,
RequestTracker
,
WORKER_TYPE_PREFILL
},
...
...
lib/llm/src/kv_router/publisher.rs
View file @
45be2fdc
...
...
@@ -35,13 +35,13 @@ fn create_kv_stream_name(component: &Component, subject: &str) -> String {
.replace
(
"_"
,
"-"
)
}
use
dynamo_kv_router
::
indexer
::{
KvIndexerMetrics
,
LocalKvIndexer
};
use
dynamo_kv_router
::
protocols
::
*
;
pub
use
dynamo_kv_router
::
zmq_wire
::
create_stored_blocks
;
use
dynamo_kv_router
::
zmq_wire
::
*
;
use
crate
::
kv_router
::{
KV_EVENT_SUBJECT
,
KV_METRICS_SUBJECT
,
WORKER_KV_INDEXER_BUFFER_SIZE
,
indexer
::{
KvIndexerMetrics
,
LocalKvIndexer
},
protocols
::
*
,
worker_query
::
start_worker_kv_query_endpoint
,
};
use
dynamo_runtime
::
config
::
environment_names
::
nats
as
env_nats
;
...
...
@@ -1048,7 +1048,7 @@ impl WorkerMetricsPublisher {
#[cfg(test)]
mod
test_event_processing
{
use
super
::
*
;
use
crate
::
kv_router
::
protocols
::
compute_block_hash_for_seq
;
use
dynamo_
kv_router
::
protocols
::
compute_block_hash_for_seq
;
// ---------------------------------------------------------------------
// create_stored_block_from_parts --------------------------------------
...
...
@@ -1452,9 +1452,9 @@ mod test_event_processing {
mod
tests_startup_helpers
{
use
super
::
*
;
use
crate
::
kv_router
::
KvIndexer
;
use
crate
::
kv_router
::
indexer
::
KvIndexerInterface
;
use
crate
::
kv_router
::
protocols
::{
ExternalSequenceBlockHash
,
LocalBlockHash
};
use
bytes
::
Bytes
;
use
dynamo_kv_router
::
indexer
::{
GetWorkersRequest
,
KvIndexerInterface
};
use
dynamo_kv_router
::
protocols
::{
ExternalSequenceBlockHash
,
LocalBlockHash
};
use
std
::
sync
::{
Arc
,
Mutex
};
use
zeromq
::{
PubSocket
,
Socket
,
SocketSend
,
ZmqMessage
};
...
...
@@ -1608,7 +1608,7 @@ mod tests_startup_helpers {
// Try up to 20 times (200ms total)
let
(
resp_tx
,
resp_rx
)
=
tokio
::
sync
::
oneshot
::
channel
();
get_workers_tx
.send
(
crate
::
kv_router
::
indexer
::
GetWorkersRequest
{
resp
:
resp_tx
})
.send
(
GetWorkersRequest
{
resp
:
resp_tx
})
.await
.unwrap
();
let
workers
:
Vec
<
u64
>
=
resp_rx
.await
.unwrap
();
...
...
@@ -2014,7 +2014,7 @@ mod tests_startup_helpers {
for
_
in
0
..
20
{
let
(
resp_tx
,
resp_rx
)
=
tokio
::
sync
::
oneshot
::
channel
();
get_workers_tx
.send
(
crate
::
kv_router
::
indexer
::
GetWorkersRequest
{
resp
:
resp_tx
})
.send
(
GetWorkersRequest
{
resp
:
resp_tx
})
.await
.unwrap
();
let
workers
:
Vec
<
u64
>
=
resp_rx
.await
.unwrap
();
...
...
@@ -2085,7 +2085,7 @@ mod tests_startup_helpers {
.unwrap
();
let
router_overlap
=
overlap
.scores
.get
(
&
crate
::
kv_router
::
protocols
::
WorkerWithDpRank
::
from_worker_id
(
worker_1_id
))
.get
(
&
dynamo_
kv_router
::
protocols
::
WorkerWithDpRank
::
from_worker_id
(
worker_1_id
))
.copied
()
.unwrap_or
(
0
);
assert_eq!
(
...
...
@@ -2101,9 +2101,9 @@ mod tests_startup_helpers {
.get_events_in_id_range
(
Some
(
last_known_id
+
1
),
None
)
.await
;
let
missed_events
=
match
response
{
crate
::
kv_router
::
indexer
::
WorkerKvQueryResponse
::
Events
(
e
)
=>
e
,
crate
::
kv_router
::
indexer
::
WorkerKvQueryResponse
::
TreeDump
{
events
:
e
,
..
}
=>
e
,
crate
::
kv_router
::
indexer
::
WorkerKvQueryResponse
::
Error
(
message
)
=>
{
dynamo_
kv_router
::
indexer
::
WorkerKvQueryResponse
::
Events
(
e
)
=>
e
,
dynamo_
kv_router
::
indexer
::
WorkerKvQueryResponse
::
TreeDump
{
events
:
e
,
..
}
=>
e
,
dynamo_
kv_router
::
indexer
::
WorkerKvQueryResponse
::
Error
(
message
)
=>
{
panic!
(
"Unexpected error response: {message}"
)
}
other
=>
panic!
(
"Unexpected response: {:?}"
,
other
),
...
...
@@ -2129,7 +2129,7 @@ mod tests_startup_helpers {
let
overlap
=
router_indexer
.find_matches
(
block_hashes_2
)
.await
.unwrap
();
let
router_overlap_after
=
overlap
.scores
.get
(
&
crate
::
kv_router
::
protocols
::
WorkerWithDpRank
::
from_worker_id
(
worker_1_id
))
.get
(
&
dynamo_
kv_router
::
protocols
::
WorkerWithDpRank
::
from_worker_id
(
worker_1_id
))
.copied
()
.unwrap_or
(
0
);
assert_eq!
(
...
...
@@ -2193,7 +2193,7 @@ mod test_exponential_backoff {
#[cfg(all(test,
feature
=
"integration"
))]
mod
test_integration_publisher
{
use
super
::
*
;
use
crate
::
kv_router
::
protocols
::
ActiveLoad
;
use
dynamo_
kv_router
::
protocols
::
ActiveLoad
;
use
dynamo_runtime
::
distributed_test_utils
::
create_test_drt_async
;
use
dynamo_runtime
::
transports
::
event_plane
::
EventSubscriber
;
...
...
lib/llm/src/kv_router/push_router.rs
View file @
45be2fdc
...
...
@@ -4,6 +4,7 @@
use
std
::
sync
::
Arc
;
use
anyhow
::
Result
;
use
dynamo_kv_router
::
protocols
::{
TokensWithHashes
,
WorkerWithDpRank
};
use
dynamo_runtime
::{
pipeline
::{
AsyncEngine
,
AsyncEngineContextProvider
,
Error
,
ManyOut
,
PushRouter
,
ResponseStream
,
...
...
@@ -21,7 +22,6 @@ use crate::{
CacheControlClient
,
KvRouter
,
cache_control
::{
PinState
,
create_cache_control_client
,
spawn_pin_prefix
},
metrics
::
RouterRequestMetrics
,
protocols
::{
TokensWithHashes
,
WorkerWithDpRank
},
},
preprocessor
::
PreprocessedRequest
,
protocols
::
common
::{
...
...
lib/llm/src/kv_router/recorder.rs
View file @
45be2fdc
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
crate
::
kv_router
::
protocols
::
RouterEvent
;
use
crate
::
recorder
::
Recorder
;
use
dynamo_kv_router
::
protocols
::
RouterEvent
;
// Type alias for backward compatibility
pub
type
KvRecorder
=
Recorder
<
RouterEvent
>
;
...
...
@@ -10,9 +10,8 @@ pub type KvRecorder = Recorder<RouterEvent>;
#[cfg(test)]
mod
tests
{
use
super
::
*
;
use
crate
::
kv_router
::
indexer
::
KvIndexer
;
use
crate
::
kv_router
::
indexer
::
KvIndexerMetrics
;
use
crate
::
kv_router
::
protocols
::
*
;
use
dynamo_kv_router
::
indexer
::{
KvIndexer
,
KvIndexerMetrics
};
use
dynamo_kv_router
::
protocols
::
*
;
use
std
::
time
::
Duration
;
use
tempfile
::
tempdir
;
use
tokio
::
fs
;
...
...
lib/llm/src/kv_router/scheduler.rs
View file @
45be2fdc
...
...
@@ -7,11 +7,8 @@ pub use dynamo_kv_router::scheduling::{
};
pub
use
dynamo_kv_router
::
selector
::
DefaultWorkerSelector
;
use
super
::
KvRouterConfig
;
use
super
::
RouterConfigOverride
;
use
super
::
WorkerSelector
;
use
super
::
metrics
::
ROUTER_QUEUE_METRICS
;
use
super
::
protocols
::{
OverlapScores
,
WorkerId
};
use
super
::
queue
::
SchedulerQueue
;
use
super
::
sequence
::{
ActiveSequencesMulti
,
SequenceError
,
SequenceRequest
,
create_multi_worker_sequences
,
...
...
@@ -19,6 +16,10 @@ use super::sequence::{
use
crate
::
discovery
::
RuntimeConfigWatch
;
use
crate
::
local_model
::
runtime_config
::
ModelRuntimeConfig
;
use
anyhow
::
Result
;
use
dynamo_kv_router
::{
config
::{
KvRouterConfig
,
RouterConfigOverride
},
protocols
::{
OverlapScores
,
WorkerId
},
};
use
dynamo_runtime
::
component
::
Component
;
use
dynamo_runtime
::
traits
::
DistributedRuntimeProvider
;
use
std
::
collections
::{
HashMap
,
HashSet
};
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment