Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
cdeda221
Unverified
Commit
cdeda221
authored
Jan 05, 2026
by
Yan Ru Pei
Committed by
GitHub
Jan 05, 2026
Browse files
fix: block on notification of at least one runtime config (#5191)
Signed-off-by:
PeaBrane
<
yanrpei@gmail.com
>
parent
e7918716
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
122 additions
and
98 deletions
+122
-98
lib/llm/src/discovery.rs
lib/llm/src/discovery.rs
+1
-1
lib/llm/src/discovery/model_manager.rs
lib/llm/src/discovery/model_manager.rs
+56
-22
lib/llm/src/kv_router.rs
lib/llm/src/kv_router.rs
+18
-35
lib/llm/src/kv_router/scheduler.rs
lib/llm/src/kv_router/scheduler.rs
+11
-19
lib/llm/src/kv_router/subscriber.rs
lib/llm/src/kv_router/subscriber.rs
+36
-21
No files found.
lib/llm/src/discovery.rs
View file @
cdeda221
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: Apache-2.0
mod
model_manager
;
mod
model_manager
;
pub
use
model_manager
::{
ModelManager
,
ModelManagerError
};
pub
use
model_manager
::{
ModelManager
,
ModelManagerError
,
RuntimeConfigsWithNotify
};
mod
watcher
;
mod
watcher
;
pub
use
watcher
::{
ModelUpdate
,
ModelWatcher
};
pub
use
watcher
::{
ModelUpdate
,
ModelWatcher
};
...
...
lib/llm/src/discovery/model_manager.rs
View file @
cdeda221
...
@@ -8,7 +8,7 @@ use std::{
...
@@ -8,7 +8,7 @@ use std::{
use
dashmap
::{
DashMap
,
mapref
::
entry
::
Entry
};
use
dashmap
::{
DashMap
,
mapref
::
entry
::
Entry
};
use
parking_lot
::{
Mutex
,
RwLock
};
use
parking_lot
::{
Mutex
,
RwLock
};
use
tokio
::
sync
::
oneshot
;
use
tokio
::
sync
::
{
Notify
,
oneshot
}
;
use
crate
::
discovery
::
KvWorkerMonitor
;
use
crate
::
discovery
::
KvWorkerMonitor
;
...
@@ -81,8 +81,14 @@ pub struct ModelManager {
...
@@ -81,8 +81,14 @@ pub struct ModelManager {
/// Runtime configs per endpoint using DashMap for lock-free access.
/// Runtime configs per endpoint using DashMap for lock-free access.
/// Outer DashMap: keyed by EndpointId
/// Outer DashMap: keyed by EndpointId
/// Inner Arc<DashMap>: keyed by WorkerId, shared with KvScheduler
/// Inner RuntimeConfigsWithNotify: shared with KvScheduler
runtime_configs
:
DashMap
<
EndpointId
,
Arc
<
DashMap
<
WorkerId
,
Option
<
ModelRuntimeConfig
>>>>
,
runtime_configs
:
DashMap
<
EndpointId
,
Arc
<
RuntimeConfigsWithNotify
>>
,
}
/// Runtime configs for an endpoint with a notify for change notifications.
pub
struct
RuntimeConfigsWithNotify
{
pub
configs
:
DashMap
<
WorkerId
,
Option
<
ModelRuntimeConfig
>>
,
pub
notify
:
Notify
,
}
}
impl
Default
for
ModelManager
{
impl
Default
for
ModelManager
{
...
@@ -619,11 +625,11 @@ impl ModelManager {
...
@@ -619,11 +625,11 @@ impl ModelManager {
/// Get or create a runtime config watcher for an endpoint.
/// Get or create a runtime config watcher for an endpoint.
/// Spawns a background task to watch DiscoveryQuery::EndpointModels.
/// Spawns a background task to watch DiscoveryQuery::EndpointModels.
/// Returns a shared
Arc<DashMap>
that KvScheduler can use directly.
/// Returns a shared
RuntimeConfigsWithNotify
that KvScheduler can use directly.
pub
async
fn
get_or_create_runtime_config_watcher
(
pub
async
fn
get_or_create_runtime_config_watcher
(
&
self
,
&
self
,
endpoint
:
&
Endpoint
,
endpoint
:
&
Endpoint
,
)
->
anyhow
::
Result
<
Arc
<
DashMap
<
WorkerId
,
Option
<
Model
RuntimeConfig
>>
>>
{
)
->
anyhow
::
Result
<
Arc
<
RuntimeConfig
sWithNotify
>>
{
let
endpoint_id
=
endpoint
.id
();
let
endpoint_id
=
endpoint
.id
();
// Fast path: return existing if present
// Fast path: return existing if present
...
@@ -632,22 +638,25 @@ impl ModelManager {
...
@@ -632,22 +638,25 @@ impl ModelManager {
}
}
// Atomic get-or-insert to avoid TOCTOU race
// Atomic get-or-insert to avoid TOCTOU race
let
inner_map
=
Arc
::
new
(
DashMap
::
new
());
let
inner
=
Arc
::
new
(
RuntimeConfigsWithNotify
{
let
(
map
,
is_new
)
=
match
self
.runtime_configs
.entry
(
endpoint_id
)
{
configs
:
DashMap
::
new
(),
notify
:
Notify
::
new
(),
});
let
(
result
,
is_new
)
=
match
self
.runtime_configs
.entry
(
endpoint_id
)
{
Entry
::
Occupied
(
e
)
=>
(
e
.get
()
.clone
(),
false
),
Entry
::
Occupied
(
e
)
=>
(
e
.get
()
.clone
(),
false
),
Entry
::
Vacant
(
e
)
=>
{
Entry
::
Vacant
(
e
)
=>
{
e
.insert
(
inner
_map
.clone
());
e
.insert
(
inner
.clone
());
(
inner
_map
,
true
)
(
inner
,
true
)
}
}
};
};
// Only spawn watcher if we were the one who inserted
// Only spawn watcher if we were the one who inserted
if
is_new
{
if
is_new
{
self
.spawn_runtime_config_watcher
(
endpoint
,
map
.clone
())
self
.spawn_runtime_config_watcher
(
endpoint
,
result
.clone
())
.await
?
;
.await
?
;
}
}
Ok
(
map
)
Ok
(
result
)
}
}
/// Get disaggregated endpoint for a specific worker.
/// Get disaggregated endpoint for a specific worker.
...
@@ -657,16 +666,17 @@ impl ModelManager {
...
@@ -657,16 +666,17 @@ impl ModelManager {
endpoint_id
:
&
EndpointId
,
endpoint_id
:
&
EndpointId
,
worker_id
:
WorkerId
,
worker_id
:
WorkerId
,
)
->
Option
<
DisaggregatedEndpoint
>
{
)
->
Option
<
DisaggregatedEndpoint
>
{
let
inner
_map
=
self
.runtime_configs
.get
(
endpoint_id
)
?
;
let
inner
=
self
.runtime_configs
.get
(
endpoint_id
)
?
;
let
config_ref
=
inner
_map
.get
(
&
worker_id
)
?
;
let
config_ref
=
inner
.configs
.get
(
&
worker_id
)
?
;
config_ref
.as_ref
()
?
.disaggregated_endpoint
.clone
()
config_ref
.as_ref
()
?
.disaggregated_endpoint
.clone
()
}
}
/// Spawn background task to watch runtime configs via discovery.
/// Spawn background task to watch runtime configs via discovery.
/// Blocks until at least one worker with a runtime config is available.
async
fn
spawn_runtime_config_watcher
(
async
fn
spawn_runtime_config_watcher
(
&
self
,
&
self
,
endpoint
:
&
Endpoint
,
endpoint
:
&
Endpoint
,
inner
_map
:
Arc
<
DashMap
<
WorkerId
,
Option
<
Model
RuntimeConfig
>>
>
,
inner
:
Arc
<
RuntimeConfig
sWithNotify
>
,
)
->
anyhow
::
Result
<
()
>
{
)
->
anyhow
::
Result
<
()
>
{
let
component
=
endpoint
.component
();
let
component
=
endpoint
.component
();
let
cancellation_token
=
component
.drt
()
.primary_token
();
let
cancellation_token
=
component
.drt
()
.primary_token
();
...
@@ -693,7 +703,29 @@ impl ModelManager {
...
@@ -693,7 +703,29 @@ impl ModelManager {
let
client
=
endpoint
.client
()
.await
?
;
let
client
=
endpoint
.client
()
.await
?
;
let
mut
instance_ids_rx
=
client
.instance_avail_watcher
();
let
mut
instance_ids_rx
=
client
.instance_avail_watcher
();
// Spawn background task to update inner_map
// Wait for at least one worker with runtime config before proceeding.
// This ensures the DashMap is populated before KvScheduler starts.
tracing
::
info!
(
"ModelManager: Waiting for at least one worker with runtime config..."
);
runtime_configs_rx
.changed
()
.await
.map_err
(|
_
|
anyhow
::
anyhow!
(
"runtime configs watch sender shutdown while waiting"
))
?
;
// Populate initial state
{
let
instance_ids
=
instance_ids_rx
.borrow
();
let
configs
=
runtime_configs_rx
.borrow
();
for
worker_id
in
instance_ids
.iter
()
{
let
config
=
configs
.get
(
worker_id
)
.cloned
();
inner
.configs
.insert
(
*
worker_id
,
config
);
}
tracing
::
info!
(
"ModelManager: Found {} workers, proceeding"
,
inner
.configs
.len
()
);
}
// Spawn background task to update configs for future changes
let
cancel_token
=
cancellation_token
.clone
();
let
cancel_token
=
cancellation_token
.clone
();
tokio
::
spawn
(
async
move
{
tokio
::
spawn
(
async
move
{
tracing
::
trace!
(
"ModelManager runtime config watcher started"
);
tracing
::
trace!
(
"ModelManager runtime config watcher started"
);
...
@@ -725,30 +757,32 @@ impl ModelManager {
...
@@ -725,30 +757,32 @@ impl ModelManager {
// Update the DashMap
// Update the DashMap
// First, remove workers that no longer exist
// First, remove workers that no longer exist
let
current_workers
:
HashSet
<
WorkerId
>
=
let
current_workers
:
HashSet
<
WorkerId
>
=
inner
_map
.iter
()
.map
(|
r
|
*
r
.key
())
.collect
();
inner
.configs
.iter
()
.map
(|
r
|
*
r
.key
())
.collect
();
let
new_workers
:
HashSet
<
WorkerId
>
=
new_instance_ids
.iter
()
.copied
()
.collect
();
let
new_workers
:
HashSet
<
WorkerId
>
=
new_instance_ids
.iter
()
.copied
()
.collect
();
for
removed_worker
in
current_workers
.difference
(
&
new_workers
)
{
for
removed_worker
in
current_workers
.difference
(
&
new_workers
)
{
inner
_map
.remove
(
removed_worker
);
inner
.configs
.remove
(
removed_worker
);
}
}
// Then, add/update workers
// Then, add/update workers
for
worker_id
in
&
new_instance_ids
{
for
worker_id
in
&
new_instance_ids
{
let
config
=
new_configs
.get
(
worker_id
)
.cloned
();
let
config
=
new_configs
.get
(
worker_id
)
.cloned
();
if
config
.is_some
()
{
if
config
.is_some
()
{
let
prev_config
=
inner
_map
.get
(
worker_id
);
let
prev_config
=
inner
.configs
.get
(
worker_id
);
if
prev_config
.as_ref
()
.map
(|
r
|
r
.value
())
!=
Some
(
&
config
)
{
if
prev_config
.as_ref
()
.map
(|
r
|
r
.value
())
!=
Some
(
&
config
)
{
tracing
::
info!
(
tracing
::
info!
(
"ModelManager: Runtime config found for worker_id: {}"
,
"ModelManager: Runtime config found for worker_id: {worker_id}"
worker_id
);
);
}
}
}
}
inner
_map
.insert
(
*
worker_id
,
config
);
inner
.configs
.insert
(
*
worker_id
,
config
);
}
}
// Notify waiters that configs have changed
inner
.notify
.notify_waiters
();
tracing
::
trace!
(
tracing
::
trace!
(
"ModelManager: Updated runtime_configs with {} workers"
,
"ModelManager: Updated runtime_configs with {} workers"
,
inner
_map
.len
()
inner
.configs
.len
()
);
);
}
}
tracing
::
trace!
(
"ModelManager runtime config watcher shutting down"
);
tracing
::
trace!
(
"ModelManager runtime config watcher shutting down"
);
...
...
lib/llm/src/kv_router.rs
View file @
cdeda221
...
@@ -6,7 +6,6 @@ use std::sync::Arc;
...
@@ -6,7 +6,6 @@ use std::sync::Arc;
use
std
::
time
::
Duration
;
use
std
::
time
::
Duration
;
use
anyhow
::
Result
;
use
anyhow
::
Result
;
use
dashmap
::
DashMap
;
use
derive_builder
::
Builder
;
use
derive_builder
::
Builder
;
use
dynamo_runtime
::{
use
dynamo_runtime
::{
component
::{
Client
,
Endpoint
},
component
::{
Client
,
Endpoint
},
...
@@ -39,6 +38,7 @@ pub use prefill_router::PrefillRouter;
...
@@ -39,6 +38,7 @@ pub use prefill_router::PrefillRouter;
use
worker_query
::
WorkerQueryClient
;
use
worker_query
::
WorkerQueryClient
;
use
crate
::{
use
crate
::{
discovery
::
RuntimeConfigsWithNotify
,
kv_router
::{
kv_router
::{
approx
::
PruneConfig
,
approx
::
PruneConfig
,
indexer
::{
KvIndexer
,
KvIndexerInterface
,
KvRouterError
,
OverlapScores
,
RouterEvent
},
indexer
::{
KvIndexer
,
KvIndexerInterface
,
KvRouterError
,
OverlapScores
,
RouterEvent
},
...
@@ -281,7 +281,7 @@ impl KvRouter {
...
@@ -281,7 +281,7 @@ impl KvRouter {
pub
async
fn
new
(
pub
async
fn
new
(
endpoint
:
Endpoint
,
endpoint
:
Endpoint
,
client
:
Client
,
client
:
Client
,
workers_with_configs
:
Arc
<
DashMap
<
protocols
::
WorkerId
,
Option
<
Model
RuntimeConfig
>>
>
,
workers_with_configs
:
Arc
<
RuntimeConfig
sWithNotify
>
,
block_size
:
u32
,
block_size
:
u32
,
selector
:
Option
<
Box
<
dyn
WorkerSelector
+
Send
+
Sync
>>
,
selector
:
Option
<
Box
<
dyn
WorkerSelector
+
Send
+
Sync
>>
,
kv_router_config
:
Option
<
KvRouterConfig
>
,
kv_router_config
:
Option
<
KvRouterConfig
>
,
...
@@ -291,8 +291,6 @@ impl KvRouter {
...
@@ -291,8 +291,6 @@ impl KvRouter {
let
component
=
endpoint
.component
();
let
component
=
endpoint
.component
();
let
cancellation_token
=
component
.drt
()
.primary_token
();
let
cancellation_token
=
component
.drt
()
.primary_token
();
let
instance_ids_rx
=
client
.instance_avail_watcher
();
// Watch for runtime config updates via discovery interface
// Watch for runtime config updates via discovery interface
// (still needed for WorkerQueryClient and background tasks)
// (still needed for WorkerQueryClient and background tasks)
let
discovery
=
component
.drt
()
.discovery
();
let
discovery
=
component
.drt
()
.discovery
();
...
@@ -339,8 +337,7 @@ impl KvRouter {
...
@@ -339,8 +337,7 @@ impl KvRouter {
let
scheduler
=
KvScheduler
::
start
(
let
scheduler
=
KvScheduler
::
start
(
component
.clone
(),
component
.clone
(),
block_size
,
block_size
,
instance_ids_rx
,
workers_with_configs
.clone
(),
workers_with_configs
,
selector
,
selector
,
kv_router_config
.router_replica_sync
,
kv_router_config
.router_replica_sync
,
consumer_id
.clone
(),
consumer_id
.clone
(),
...
@@ -354,39 +351,25 @@ impl KvRouter {
...
@@ -354,39 +351,25 @@ impl KvRouter {
tracing
::
info!
(
"Worker query client initialized"
);
tracing
::
info!
(
"Worker query client initialized"
);
// Start KV event subscriber background process (only when use_kv_events is enabled)
// Start KV event subscriber background process (only when use_kv_events is enabled)
// We block here until at least one worker runtime config is registered,
// model_manager.get_or_create_runtime_config_watcher() guarantees at least one worker exists.
// then spawn the subscriber. This ensures the router is ready before accepting requests.
if
kv_router_config
.use_kv_events
if
kv_router_config
.use_kv_events
&&
let
Indexer
::
KvIndexer
(
ref
kv_indexer
)
=
indexer
&&
let
Indexer
::
KvIndexer
(
ref
kv_indexer
)
=
indexer
{
{
let
mut
runtime_configs_rx_clone
=
runtime_configs_rx
.clone
();
// model_manager guarantees workers_with_configs is populated
// Wait for at least one worker before starting the subscriber
// Wait for at least one worker runtime config to be registered
while
workers_with_configs
.configs
.is_empty
()
{
tracing
::
info!
(
"Waiting for at least one worker runtime config to be registered..."
);
tracing
::
info!
(
"KV router waiting for at least one worker..."
);
let
(
all_local_indexer
,
count
)
=
loop
{
workers_with_configs
.notify
.notified
()
.await
;
{
let
configs
=
runtime_configs_rx_clone
.borrow
();
if
!
configs
.is_empty
()
{
let
all_local_indexer
=
configs
.values
()
.all
(|
c
|
c
.enable_local_indexer
);
break
(
all_local_indexer
,
configs
.len
());
}
}
}
// Wait for changes to runtime_configs
let
count
=
workers_with_configs
.configs
.len
();
tokio
::
select!
{
let
all_local_indexer
=
workers_with_configs
_
=
cancellation_token
.cancelled
()
=>
{
.configs
tracing
::
debug!
(
"KvRouter startup cancelled while waiting for workers"
);
.iter
()
anyhow
::
bail!
(
"KvRouter startup cancelled"
);
.filter_map
(|
r
|
r
.value
()
.as_ref
()
.map
(|
c
|
c
.enable_local_indexer
))
}
.all
(|
b
|
b
);
result
=
runtime_configs_rx_clone
.changed
()
=>
{
if
result
.is_err
()
{
tracing
::
info!
(
"Found {count} worker(s), starting KV event subscriber"
);
tracing
::
debug!
(
"Runtime configs channel closed"
);
anyhow
::
bail!
(
"Runtime configs channel closed before any workers registered"
);
}
}
}
};
tracing
::
info!
(
"Found {count} worker runtime config(s), starting KV event subscriber"
);
// Start subscriber - setup runs synchronously, then spawns background loop internally
// Start subscriber - setup runs synchronously, then spawns background loop internally
if
all_local_indexer
{
if
all_local_indexer
{
...
...
lib/llm/src/kv_router/scheduler.rs
View file @
cdeda221
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: Apache-2.0
use
crate
::
discovery
::
RuntimeConfigsWithNotify
;
use
crate
::
local_model
::
runtime_config
::
ModelRuntimeConfig
;
use
crate
::
local_model
::
runtime_config
::
ModelRuntimeConfig
;
use
anyhow
::
Result
;
use
anyhow
::
Result
;
use
dashmap
::
DashMap
;
use
dynamo_runtime
::
component
::
Component
;
use
dynamo_runtime
::
component
::
Component
;
use
dynamo_runtime
::
traits
::
DistributedRuntimeProvider
;
use
dynamo_runtime
::
traits
::
DistributedRuntimeProvider
;
use
dynamo_runtime
::
traits
::
events
::
EventPublisher
;
use
dynamo_runtime
::
traits
::
events
::
EventPublisher
;
...
@@ -12,7 +12,6 @@ use serde::{Deserialize, Serialize};
...
@@ -12,7 +12,6 @@ use serde::{Deserialize, Serialize};
use
std
::
collections
::{
HashMap
,
HashSet
};
use
std
::
collections
::{
HashMap
,
HashSet
};
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
use
std
::
time
::
Duration
;
use
std
::
time
::
Duration
;
use
tokio
::
sync
::
watch
;
use
super
::
KV_HIT_RATE_SUBJECT
;
use
super
::
KV_HIT_RATE_SUBJECT
;
use
super
::
KvRouterConfig
;
use
super
::
KvRouterConfig
;
...
@@ -97,16 +96,17 @@ impl KvScheduler {
...
@@ -97,16 +96,17 @@ impl KvScheduler {
pub
async
fn
start
(
pub
async
fn
start
(
component
:
Component
,
component
:
Component
,
block_size
:
u32
,
block_size
:
u32
,
instance_ids_rx
:
watch
::
Receiver
<
Vec
<
u64
>>
,
workers_with_configs
:
Arc
<
RuntimeConfigsWithNotify
>
,
workers_with_configs
:
Arc
<
DashMap
<
WorkerId
,
Option
<
ModelRuntimeConfig
>>>
,
selector
:
Option
<
Box
<
dyn
WorkerSelector
+
Send
+
Sync
>>
,
selector
:
Option
<
Box
<
dyn
WorkerSelector
+
Send
+
Sync
>>
,
replica_sync
:
bool
,
replica_sync
:
bool
,
router_uuid
:
String
,
router_uuid
:
String
,
)
->
Result
<
Self
,
KvSchedulerError
>
{
)
->
Result
<
Self
,
KvSchedulerError
>
{
let
selector
=
selector
.unwrap_or
(
Box
::
new
(
DefaultWorkerSelector
::
default
()));
let
selector
=
selector
.unwrap_or
(
Box
::
new
(
DefaultWorkerSelector
::
default
()));
// Get initial workers from DashMap for slot initialization
// Get initial workers from DashMap for slot initialization.
// ModelManager guarantees at least one worker is present before KvRouter::new() is called.
let
initial_workers
:
HashMap
<
WorkerId
,
Option
<
ModelRuntimeConfig
>>
=
workers_with_configs
let
initial_workers
:
HashMap
<
WorkerId
,
Option
<
ModelRuntimeConfig
>>
=
workers_with_configs
.configs
.iter
()
.iter
()
.map
(|
r
|
(
*
r
.key
(),
r
.value
()
.clone
()))
.map
(|
r
|
(
*
r
.key
(),
r
.value
()
.clone
()))
.collect
();
.collect
();
...
@@ -119,33 +119,29 @@ impl KvScheduler {
...
@@ -119,33 +119,29 @@ impl KvScheduler {
router_uuid
,
router_uuid
,
));
));
// Spawn background task to monitor workers_with_configs changes and update slots
// Spawn background task to sync slots with DashMap when notified of changes.
// ModelManager's watcher updates the DashMap and notifies; we wait on notify here.
let
slots_monitor
=
slots
.clone
();
let
slots_monitor
=
slots
.clone
();
let
workers_monitor
=
workers_with_configs
.clone
();
let
workers_monitor
=
workers_with_configs
.clone
();
let
mut
instance_ids_monitor_rx
=
instance_ids_rx
.clone
();
let
monitor_cancel_token
=
component
.drt
()
.child_token
();
let
monitor_cancel_token
=
component
.drt
()
.child_token
();
tokio
::
spawn
(
async
move
{
tokio
::
spawn
(
async
move
{
tracing
::
trace!
(
"KvScheduler workers monitoring task started"
);
tracing
::
trace!
(
"KvScheduler workers monitoring task started"
);
let
mut
last_workers
:
HashSet
<
WorkerId
>
=
HashSet
::
new
();
let
mut
last_workers
:
HashSet
<
WorkerId
>
=
HashSet
::
new
();
loop
{
loop
{
// Wait for
instance changes (ModelManage
r
h
an
dles config updates to the DashMap)
// Wait for
notification o
r
c
an
cellation
tokio
::
select!
{
tokio
::
select!
{
_
=
monitor_cancel_token
.cancelled
()
=>
{
_
=
monitor_cancel_token
.cancelled
()
=>
{
tracing
::
trace!
(
"KvScheduler workers monitoring task shutting down"
);
tracing
::
trace!
(
"KvScheduler workers monitoring task shutting down"
);
break
;
break
;
}
}
result
=
instance_ids_monitor_rx
.changed
()
=>
{
_
=
workers_monitor
.notify
.notified
()
=>
{}
if
result
.is_err
()
{
tracing
::
warn!
(
"instance IDs watch sender shutdown in KvScheduler monitor"
);
break
;
}
}
}
}
// Get current workers from DashMap
// Get current workers from DashMap
let
current_workers
:
HashMap
<
WorkerId
,
Option
<
ModelRuntimeConfig
>>
=
let
current_workers
:
HashMap
<
WorkerId
,
Option
<
ModelRuntimeConfig
>>
=
workers_monitor
workers_monitor
.configs
.iter
()
.iter
()
.map
(|
r
|
(
*
r
.key
(),
r
.value
()
.clone
()))
.map
(|
r
|
(
*
r
.key
(),
r
.value
()
.clone
()))
.collect
();
.collect
();
...
@@ -156,13 +152,8 @@ impl KvScheduler {
...
@@ -156,13 +152,8 @@ impl KvScheduler {
if
current_worker_ids
!=
last_workers
{
if
current_worker_ids
!=
last_workers
{
slots_monitor
.update_workers
(
current_workers
);
slots_monitor
.update_workers
(
current_workers
);
last_workers
=
current_worker_ids
;
last_workers
=
current_worker_ids
;
tracing
::
trace!
(
"KvScheduler: Updated slots with {} workers"
,
last_workers
.len
()
);
}
}
}
}
tracing
::
trace!
(
"KvScheduler workers monitoring task shutting down"
);
});
});
let
slots_clone
=
slots
.clone
();
let
slots_clone
=
slots
.clone
();
...
@@ -202,6 +193,7 @@ impl KvScheduler {
...
@@ -202,6 +193,7 @@ impl KvScheduler {
// Read the current workers configuration from DashMap
// Read the current workers configuration from DashMap
let
workers
:
HashMap
<
WorkerId
,
Option
<
ModelRuntimeConfig
>>
=
workers_scheduler
let
workers
:
HashMap
<
WorkerId
,
Option
<
ModelRuntimeConfig
>>
=
workers_scheduler
.configs
.iter
()
.iter
()
.map
(|
r
|
(
*
r
.key
(),
r
.value
()
.clone
()))
.map
(|
r
|
(
*
r
.key
(),
r
.value
()
.clone
()))
.collect
();
.collect
();
...
...
lib/llm/src/kv_router/subscriber.rs
View file @
cdeda221
...
@@ -36,6 +36,34 @@ const CHECK_INTERVAL_JITTER_MS: i64 = 100;
...
@@ -36,6 +36,34 @@ const CHECK_INTERVAL_JITTER_MS: i64 = 100;
const
WORKER_QUERY_MAX_RETRIES
:
u32
=
8
;
const
WORKER_QUERY_MAX_RETRIES
:
u32
=
8
;
const
WORKER_QUERY_INITIAL_BACKOFF_MS
:
u64
=
200
;
const
WORKER_QUERY_INITIAL_BACKOFF_MS
:
u64
=
200
;
// ============================================================================
// Discovery Helpers
// ============================================================================
/// Wait for at least one worker instance to be discovered.
/// Returns a peekable stream of discovery events for the generate endpoint.
async
fn
wait_for_worker_instance
(
component
:
&
Component
,
cancellation_token
:
&
CancellationToken
,
)
->
Result
<
std
::
pin
::
Pin
<
Box
<
dyn
futures
::
Stream
<
Item
=
Result
<
DiscoveryEvent
>>
+
Send
>>>
{
let
discovery_client
=
component
.drt
()
.discovery
();
let
generate_discovery_key
=
DiscoveryQuery
::
Endpoint
{
namespace
:
component
.namespace
()
.name
()
.to_string
(),
component
:
component
.name
()
.to_string
(),
endpoint
:
"generate"
.to_string
(),
};
let
mut
stream
=
discovery_client
.list_and_watch
(
generate_discovery_key
,
Some
(
cancellation_token
.clone
()))
.await
?
.peekable
();
tracing
::
info!
(
"KV subscriber waiting for at least one worker instance..."
);
std
::
pin
::
Pin
::
new
(
&
mut
stream
)
.peek
()
.await
;
Ok
(
Box
::
pin
(
stream
))
}
// ============================================================================
// ============================================================================
// Local KvIndexer-based Recovery
// Local KvIndexer-based Recovery
// ============================================================================
// ============================================================================
...
@@ -473,19 +501,13 @@ pub async fn start_kv_router_background(
...
@@ -473,19 +501,13 @@ pub async fn start_kv_router_background(
// Cleanup orphaned consumers on startup
// Cleanup orphaned consumers on startup
cleanup_orphaned_consumers
(
&
mut
nats_queue
,
&
component
,
&
consumer_id
)
.await
;
cleanup_orphaned_consumers
(
&
mut
nats_queue
,
&
component
,
&
consumer_id
)
.await
;
// Get the generate endpoint and watch for instance deletions
// Wait for at least one worker instance before proceeding
let
generate_endpoint
=
component
.endpoint
(
"generate"
);
let
mut
instance_event_stream
=
let
discovery_client
=
component
.drt
()
.discovery
();
wait_for_worker_instance
(
&
component
,
&
cancellation_token
)
.await
?
;
let
generate_discovery_key
=
DiscoveryQuery
::
Endpoint
{
namespace
:
component
.namespace
()
.name
()
.to_string
(),
component
:
component
.name
()
.to_string
(),
endpoint
:
"generate"
.to_string
(),
};
let
mut
instance_event_stream
=
discovery_client
.list_and_watch
(
generate_discovery_key
,
Some
(
cancellation_token
.clone
()))
.await
?
;
// Watch for router deletions to clean up orphaned consumers via discovery
// Watch for router deletions to clean up orphaned consumers via discovery
let
generate_endpoint
=
component
.endpoint
(
"generate"
);
let
discovery_client
=
component
.drt
()
.discovery
();
let
router_discovery_key
=
router_discovery_query
(
component
.namespace
()
.name
());
let
router_discovery_key
=
router_discovery_query
(
component
.namespace
()
.name
());
let
mut
router_event_stream
=
discovery_client
let
mut
router_event_stream
=
discovery_client
.list_and_watch
(
router_discovery_key
,
Some
(
cancellation_token
.clone
()))
.list_and_watch
(
router_discovery_key
,
Some
(
cancellation_token
.clone
()))
...
@@ -725,16 +747,9 @@ pub async fn start_kv_router_background_nats_core(
...
@@ -725,16 +747,9 @@ pub async fn start_kv_router_background_nats_core(
"KV Router using NATS Core subscription (local_indexer mode)"
"KV Router using NATS Core subscription (local_indexer mode)"
);
);
// Get the generate endpoint and watch for instance events (add/remove)
// Wait for at least one worker instance before proceeding
let
discovery_client
=
component
.drt
()
.discovery
();
let
mut
instance_event_stream
=
let
generate_discovery_key
=
DiscoveryQuery
::
Endpoint
{
wait_for_worker_instance
(
&
component
,
&
cancellation_token
)
.await
?
;
namespace
:
component
.namespace
()
.name
()
.to_string
(),
component
:
component
.name
()
.to_string
(),
endpoint
:
"generate"
.to_string
(),
};
let
mut
instance_event_stream
=
discovery_client
.list_and_watch
(
generate_discovery_key
,
Some
(
cancellation_token
.clone
()))
.await
?
;
// Drain and process all existing workers before spawning the background loop.
// Drain and process all existing workers before spawning the background loop.
// list_and_watch returns existing instances first, so we poll with a short timeout
// list_and_watch returns existing instances first, so we poll with a short timeout
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment