Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
7731b024
Unverified
Commit
7731b024
authored
Oct 23, 2025
by
Graham King
Committed by
GitHub
Oct 23, 2025
Browse files
chore: Use KeyValueStoreManager instead of etcd::Client (#3822)
Signed-off-by:
Graham King
<
grahamk@nvidia.com
>
parent
6f9be594
Changes
36
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
82 additions
and
168 deletions
+82
-168
lib/bindings/c/src/lib.rs
lib/bindings/c/src/lib.rs
+1
-1
lib/bindings/python/rust/lib.rs
lib/bindings/python/rust/lib.rs
+4
-4
lib/bindings/python/rust/llm/kv.rs
lib/bindings/python/rust/llm/kv.rs
+1
-1
lib/llm/src/block_manager/controller/client.rs
lib/llm/src/block_manager/controller/client.rs
+4
-1
lib/llm/src/discovery/watcher.rs
lib/llm/src/discovery/watcher.rs
+16
-19
lib/llm/src/discovery/worker_monitor.rs
lib/llm/src/discovery/worker_monitor.rs
+3
-3
lib/llm/src/entrypoint/input/http.rs
lib/llm/src/entrypoint/input/http.rs
+3
-13
lib/llm/src/http/service/service_v2.rs
lib/llm/src/http/service/service_v2.rs
+9
-40
lib/llm/src/kv_router/protocols.rs
lib/llm/src/kv_router/protocols.rs
+1
-1
lib/llm/src/kv_router/publisher.rs
lib/llm/src/kv_router/publisher.rs
+3
-3
lib/llm/src/kv_router/scoring.rs
lib/llm/src/kv_router/scoring.rs
+6
-6
lib/llm/src/kv_router/sequence.rs
lib/llm/src/kv_router/sequence.rs
+2
-2
lib/llm/src/kv_router/subscriber.rs
lib/llm/src/kv_router/subscriber.rs
+2
-2
lib/llm/src/local_model.rs
lib/llm/src/local_model.rs
+3
-13
lib/llm/src/protocols/common/preprocessor.rs
lib/llm/src/protocols/common/preprocessor.rs
+1
-1
lib/llm/src/protocols/openai/nvext.rs
lib/llm/src/protocols/openai/nvext.rs
+1
-1
lib/llm/tests/block_manager.rs
lib/llm/tests/block_manager.rs
+4
-4
lib/runtime/src/component.rs
lib/runtime/src/component.rs
+6
-6
lib/runtime/src/component/client.rs
lib/runtime/src/component/client.rs
+10
-10
lib/runtime/src/discovery.rs
lib/runtime/src/discovery.rs
+2
-37
No files found.
lib/bindings/c/src/lib.rs
View file @
7731b024
...
...
@@ -154,7 +154,7 @@ fn dynamo_create_kv_publisher(
{
Ok
(
drt
)
=>
{
let
backend
=
drt
.namespace
(
namespace
)
?
.component
(
component
)
?
;
KvEventPublisher
::
new
(
backend
,
worker_id
,
kv_block_size
,
None
)
KvEventPublisher
::
new
(
backend
,
worker_id
as
u64
,
kv_block_size
,
None
)
}
Err
(
e
)
=>
Err
(
e
),
}
...
...
lib/bindings/python/rust/lib.rs
View file @
7731b024
...
...
@@ -771,7 +771,7 @@ impl Endpoint {
})
}
fn
lease_id
(
&
self
)
->
i
64
{
fn
lease_id
(
&
self
)
->
u
64
{
self
.inner
.drt
()
.primary_lease
()
...
...
@@ -807,7 +807,7 @@ impl Namespace {
impl
Client
{
/// Get list of current instances.
/// Replaces endpoint_ids.
fn
instance_ids
(
&
self
)
->
Vec
<
i
64
>
{
fn
instance_ids
(
&
self
)
->
Vec
<
u
64
>
{
self
.router.client
.instance_ids
()
}
...
...
@@ -819,7 +819,7 @@ impl Client {
inner
.wait_for_instances
()
.await
.map
(|
v
|
v
.into_iter
()
.map
(|
cei
|
cei
.id
())
.collect
::
<
Vec
<
i
64
>>
())
.map
(|
v
|
v
.into_iter
()
.map
(|
cei
|
cei
.id
())
.collect
::
<
Vec
<
u
64
>>
())
.map_err
(
to_pyerr
)
})
}
...
...
@@ -920,7 +920,7 @@ impl Client {
&
self
,
py
:
Python
<
'p
>
,
request
:
PyObject
,
instance_id
:
i
64
,
instance_id
:
u
64
,
annotated
:
Option
<
bool
>
,
context
:
Option
<
context
::
Context
>
,
)
->
PyResult
<
Bound
<
'p
,
PyAny
>>
{
...
...
lib/bindings/python/rust/llm/kv.rs
View file @
7731b024
...
...
@@ -322,7 +322,7 @@ pub(crate) struct OverlapScores {
#[pymethods]
impl
OverlapScores
{
#[getter]
fn
scores
(
&
self
)
->
HashMap
<
(
i
64
,
u32
),
u32
>
{
fn
scores
(
&
self
)
->
HashMap
<
(
u
64
,
u32
),
u32
>
{
// Return scores with full WorkerWithDpRank granularity as (worker_id, dp_rank) tuples
self
.inner
.scores
...
...
lib/llm/src/block_manager/controller/client.rs
View file @
7731b024
...
...
@@ -59,7 +59,10 @@ impl ControlClient {
}
async
fn
execute
<
T
:
DeserializeOwned
>
(
&
self
,
message
:
ControlMessage
)
->
Result
<
T
>
{
let
mut
stream
=
self
.client
.direct
(
message
.into
(),
self
.instance_id
)
.await
?
;
let
mut
stream
=
self
.client
.direct
(
message
.into
(),
self
.instance_id
as
u64
)
.await
?
;
let
resp
=
stream
.next
()
.await
...
...
lib/llm/src/discovery/watcher.rs
View file @
7731b024
...
...
@@ -567,19 +567,22 @@ impl ModelWatcher {
}
/// All the registered ModelDeploymentCard with the EndpointId they are attached to, one per instance
pub
async
fn
all_cards
(
&
self
)
->
anyhow
::
Result
<
Vec
<
(
EndpointId
,
ModelDeploymentCard
)
>>
{
let
Some
(
etcd_client
)
=
self
.drt
.etcd_client
()
else
{
anyhow
::
bail!
(
"all_cards: Missing etcd client"
);
async
fn
all_cards
(
&
self
)
->
anyhow
::
Result
<
Vec
<
(
EndpointId
,
ModelDeploymentCard
)
>>
{
let
store
=
self
.drt
.store
();
//let kvs = etcd_client.kv_get_prefix(model_card::ROOT_PATH).await?;
let
Some
(
card_bucket
)
=
store
.get_bucket
(
model_card
::
ROOT_PATH
)
.await
?
else
{
// no cards
return
Ok
(
vec!
[]);
};
let
kvs
=
etcd_client
.kv_get_prefix
(
model_card
::
ROOT_PATH
)
.await
?
;
let
mut
results
=
Vec
::
with_capacity
(
kvs
.len
());
for
kv
in
kvs
{
let
maybe_convert
=
serde_json
::
from_slice
::
<
ModelDeploymentCard
>
(
kv
.value
());
let
r
=
match
maybe_convert
{
let
entries
=
card_bucket
.entries
(
)
.await
?
;
let
mut
results
=
Vec
::
with_capacity
(
entries
.len
());
for
(
key
,
card_bytes
)
in
entries
{
let
r
=
match
serde_json
::
from_slice
::
<
ModelDeploymentCard
>
(
&
card_bytes
)
{
Ok
(
card
)
=>
{
let
maybe_endpoint_id
=
kv
.key_str
()
.map_err
(|
err
|
err
.into
())
.and_then
(|
k
|
{
etcd_key_extract
(
k
)
.map
(|(
endpoint_id
,
_
instance_id
)|
endpoint_id
)
});
let
maybe_endpoint_id
=
etcd_key_extract
(
&
key
)
.map
(|(
endpoint_id
,
_
instance_id
)|
endpoint_id
);
let
endpoint_id
=
match
maybe_endpoint_id
{
Ok
(
eid
)
=>
eid
,
Err
(
err
)
=>
{
...
...
@@ -590,14 +593,8 @@ impl ModelWatcher {
(
endpoint_id
,
card
)
}
Err
(
err
)
=>
{
match
kv
.value_str
()
{
Ok
(
value
)
=>
{
tracing
::
error!
(
%
err
,
value
,
"Invalid JSON in model card"
);
}
Err
(
value_str_err
)
=>
{
tracing
::
error!
(
original_error
=%
err
,
%
value_str_err
,
"Invalid UTF-8 string in model card, expected JSON"
);
}
}
let
value
=
String
::
from_utf8_lossy
(
&
card_bytes
);
tracing
::
error!
(
%
err
,
%
value
,
"Invalid JSON in model card"
);
continue
;
}
};
...
...
lib/llm/src/discovery/worker_monitor.rs
View file @
7731b024
...
...
@@ -52,7 +52,7 @@ impl WorkerLoadState {
/// Worker monitor for tracking KV cache usage and busy states
pub
struct
KvWorkerMonitor
{
client
:
Arc
<
Client
>
,
worker_load_states
:
Arc
<
RwLock
<
HashMap
<
i
64
,
WorkerLoadState
>>>
,
worker_load_states
:
Arc
<
RwLock
<
HashMap
<
u
64
,
WorkerLoadState
>>>
,
busy_threshold
:
f64
,
}
...
...
@@ -67,7 +67,7 @@ impl KvWorkerMonitor {
}
/// Get the worker load states for external access
pub
fn
load_states
(
&
self
)
->
Arc
<
RwLock
<
HashMap
<
i
64
,
WorkerLoadState
>>>
{
pub
fn
load_states
(
&
self
)
->
Arc
<
RwLock
<
HashMap
<
u
64
,
WorkerLoadState
>>>
{
self
.worker_load_states
.clone
()
}
}
...
...
@@ -154,7 +154,7 @@ impl WorkerLoadMonitor for KvWorkerMonitor {
// Recalculate all busy instances and update
let
states
=
worker_load_states
.read
()
.unwrap
();
let
busy_instances
:
Vec
<
i
64
>
=
states
let
busy_instances
:
Vec
<
u
64
>
=
states
.iter
()
.filter_map
(|(
&
id
,
state
)|
{
state
.is_busy
(
busy_threshold
)
.then_some
(
id
)
...
...
lib/llm/src/entrypoint/input/http.rs
View file @
7731b024
...
...
@@ -64,10 +64,10 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul
let
http_service
=
match
engine_config
{
EngineConfig
::
Dynamic
(
_
)
=>
{
let
distributed_runtime
=
DistributedRuntime
::
from_settings
(
runtime
.clone
())
.await
?
;
let
etcd_client
=
distributed_runtime
.etcd_client
();
// This allows the /health endpoint to query etcd for active instances
http_service_builder
=
http_service_builder
.
with_etcd_client
(
etcd_client
.clone
());
http_service_builder
=
http_service_builder
.
store
(
distributed_runtime
.store
()
.clone
());
let
http_service
=
http_service_builder
.build
()
?
;
let
etcd_client
=
distributed_runtime
.etcd_client
();
match
etcd_client
{
Some
(
ref
etcd_client
)
=>
{
let
router_config
=
engine_config
.local_model
()
.router_config
();
...
...
@@ -241,17 +241,7 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul
http_service
.custom_backend_registry
.as_ref
(),
)
{
// Create DistributedRuntime for polling, matching the engine's mode
// Check if we have etcd_client to determine if we're in dynamic or static mode
let
drt
=
if
http_service
.state
()
.etcd_client
()
.is_some
()
{
// Dynamic mode: use from_settings() which respects environment (includes etcd)
DistributedRuntime
::
from_settings
(
runtime
.clone
())
.await
?
}
else
{
// Static mode: no etcd
let
dst_config
=
dynamo_runtime
::
distributed
::
DistributedConfig
::
from_settings
(
true
);
DistributedRuntime
::
new
(
runtime
.clone
(),
dst_config
)
.await
?
};
let
drt
=
DistributedRuntime
::
from_settings
(
runtime
.clone
())
.await
?
;
tracing
::
info!
(
namespace_component_endpoint
=%
namespace_component_endpoint
,
polling_interval_secs
=
polling_interval
,
...
...
lib/llm/src/http/service/service_v2.rs
View file @
7731b024
...
...
@@ -20,10 +20,7 @@ use axum_server::tls_rustls::RustlsConfig;
use
derive_builder
::
Builder
;
use
dynamo_runtime
::
logging
::
make_request_span
;
use
dynamo_runtime
::
metrics
::
prometheus_names
::
name_prefix
;
use
dynamo_runtime
::
storage
::
key_value_store
::
EtcdStore
;
use
dynamo_runtime
::
storage
::
key_value_store
::
KeyValueStore
;
use
dynamo_runtime
::
storage
::
key_value_store
::
MemoryStore
;
use
dynamo_runtime
::
transports
::
etcd
;
use
dynamo_runtime
::
storage
::
key_value_store
::
KeyValueStoreManager
;
use
std
::
net
::
SocketAddr
;
use
tokio
::
task
::
JoinHandle
;
use
tokio_util
::
sync
::
CancellationToken
;
...
...
@@ -33,8 +30,7 @@ use tower_http::trace::TraceLayer;
pub
struct
State
{
metrics
:
Arc
<
Metrics
>
,
manager
:
Arc
<
ModelManager
>
,
etcd_client
:
Option
<
etcd
::
Client
>
,
store
:
Arc
<
dyn
KeyValueStore
>
,
store
:
KeyValueStoreManager
,
flags
:
StateFlags
,
}
...
...
@@ -75,12 +71,11 @@ impl StateFlags {
}
impl
State
{
pub
fn
new
(
manager
:
Arc
<
ModelManager
>
)
->
Self
{
pub
fn
new
(
manager
:
Arc
<
ModelManager
>
,
store
:
KeyValueStoreManager
)
->
Self
{
Self
{
manager
,
metrics
:
Arc
::
new
(
Metrics
::
default
()),
etcd_client
:
None
,
store
:
Arc
::
new
(
MemoryStore
::
new
()),
store
,
flags
:
StateFlags
{
chat_endpoints_enabled
:
AtomicBool
::
new
(
false
),
cmpl_endpoints_enabled
:
AtomicBool
::
new
(
false
),
...
...
@@ -90,20 +85,6 @@ impl State {
}
}
pub
fn
new_with_etcd
(
manager
:
Arc
<
ModelManager
>
,
etcd_client
:
etcd
::
Client
)
->
Self
{
Self
{
manager
,
metrics
:
Arc
::
new
(
Metrics
::
default
()),
store
:
Arc
::
new
(
EtcdStore
::
new
(
etcd_client
.clone
())),
etcd_client
:
Some
(
etcd_client
),
flags
:
StateFlags
{
chat_endpoints_enabled
:
AtomicBool
::
new
(
false
),
cmpl_endpoints_enabled
:
AtomicBool
::
new
(
false
),
embeddings_endpoints_enabled
:
AtomicBool
::
new
(
false
),
responses_endpoints_enabled
:
AtomicBool
::
new
(
false
),
},
}
}
/// Get the Prometheus [`Metrics`] object which tracks request counts and inflight requests
pub
fn
metrics_clone
(
&
self
)
->
Arc
<
Metrics
>
{
self
.metrics
.clone
()
...
...
@@ -117,12 +98,8 @@ impl State {
self
.manager
.clone
()
}
pub
fn
etcd_client
(
&
self
)
->
Option
<&
etcd
::
Client
>
{
self
.etcd_client
.as_ref
()
}
pub
fn
store
(
&
self
)
->
Arc
<
dyn
KeyValueStore
>
{
self
.store
.clone
()
pub
fn
store
(
&
self
)
->
&
KeyValueStoreManager
{
&
self
.store
}
// TODO
...
...
@@ -186,8 +163,8 @@ pub struct HttpServiceConfig {
#[builder(default
=
"None"
)]
request_template
:
Option
<
RequestTemplate
>
,
#[builder(default
=
"None"
)]
etcd_client
:
Option
<
etcd
::
Client
>
,
#[builder(default)]
store
:
KeyValueStoreManager
,
// DEPRECATED: To be removed after custom backends migrate to Dynamo backend.
#[builder(default
=
"None"
)]
...
...
@@ -335,10 +312,7 @@ impl HttpServiceConfigBuilder {
let
config
:
HttpServiceConfig
=
self
.build_internal
()
?
;
let
model_manager
=
Arc
::
new
(
ModelManager
::
new
());
let
state
=
match
config
.etcd_client
{
Some
(
etcd_client
)
=>
Arc
::
new
(
State
::
new_with_etcd
(
model_manager
,
etcd_client
)),
None
=>
Arc
::
new
(
State
::
new
(
model_manager
)),
};
let
state
=
Arc
::
new
(
State
::
new
(
model_manager
,
config
.store
));
state
.flags
.set
(
&
EndpointType
::
Chat
,
config
.enable_chat_endpoints
);
...
...
@@ -422,11 +396,6 @@ impl HttpServiceConfigBuilder {
self
}
pub
fn
with_etcd_client
(
mut
self
,
etcd_client
:
Option
<
etcd
::
Client
>
)
->
Self
{
self
.etcd_client
=
Some
(
etcd_client
);
self
}
// DEPRECATED: To be removed after custom backends migrate to Dynamo backend.
pub
fn
with_custom_backend_config
(
mut
self
,
...
...
lib/llm/src/kv_router/protocols.rs
View file @
7731b024
...
...
@@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize};
use
uuid
::
Uuid
;
/// A worker identifier.
pub
type
WorkerId
=
i
64
;
pub
type
WorkerId
=
u
64
;
/// A data parallel rank identifier.
pub
type
DpRank
=
u32
;
...
...
lib/llm/src/kv_router/publisher.rs
View file @
7731b024
...
...
@@ -97,7 +97,7 @@ pub struct KvEventPublisher {
impl
KvEventPublisher
{
pub
fn
new
(
component
:
Component
,
worker_id
:
i
64
,
worker_id
:
u
64
,
kv_block_size
:
u32
,
source_config
:
Option
<
KvEventSourceConfig
>
,
)
->
Result
<
Self
>
{
...
...
@@ -174,7 +174,7 @@ impl Drop for KvEventPublisher {
async
fn
start_event_processor
<
P
:
EventPublisher
+
Send
+
Sync
+
'static
>
(
publisher
:
P
,
worker_id
:
i
64
,
worker_id
:
u
64
,
cancellation_token
:
CancellationToken
,
mut
rx
:
mpsc
::
UnboundedReceiver
<
KvCacheEvent
>
,
)
{
...
...
@@ -801,7 +801,7 @@ impl WorkerMetricsPublisher {
///
/// This task monitors metric changes (specifically kv_active_blocks and num_requests_waiting)
/// and publishes stable metrics to NATS after they've been unchanged for 1ms.
fn
start_nats_metrics_publishing
(
&
self
,
namespace
:
Namespace
,
worker_id
:
i
64
)
{
fn
start_nats_metrics_publishing
(
&
self
,
namespace
:
Namespace
,
worker_id
:
u
64
)
{
let
nats_rx
=
self
.rx
.clone
();
tokio
::
spawn
(
async
move
{
...
...
lib/llm/src/kv_router/scoring.rs
View file @
7731b024
...
...
@@ -9,7 +9,7 @@ use std::collections::HashMap;
#[derive(Debug,
Clone,
Serialize,
Deserialize,
PartialEq)]
pub
struct
LoadEvent
{
pub
worker_id
:
i
64
,
pub
worker_id
:
u
64
,
pub
data
:
ForwardPassMetrics
,
}
...
...
@@ -23,8 +23,8 @@ pub struct Endpoint {
}
impl
Endpoint
{
pub
fn
worker_id
(
&
self
)
->
i
64
{
i
64
::
from_str_radix
(
pub
fn
worker_id
(
&
self
)
->
u
64
{
u
64
::
from_str_radix
(
self
.subject
.split
(
"-"
)
.last
()
...
...
@@ -39,7 +39,7 @@ impl Endpoint {
#[derive(Debug,
Default,
Serialize,
Deserialize,
Clone,
PartialEq)]
pub
struct
ProcessedEndpoints
{
pub
endpoints
:
HashMap
<
i
64
,
Endpoint
>
,
pub
endpoints
:
HashMap
<
u
64
,
Endpoint
>
,
pub
load_avg
:
f64
,
pub
load_std
:
f64
,
}
...
...
@@ -68,11 +68,11 @@ impl ProcessedEndpoints {
}
}
pub
fn
worker_ids
(
&
self
)
->
Vec
<
i
64
>
{
pub
fn
worker_ids
(
&
self
)
->
Vec
<
u
64
>
{
self
.endpoints
.keys
()
.copied
()
.collect
()
}
pub
fn
active_blocks
(
&
self
)
->
HashMap
<
i
64
,
usize
>
{
pub
fn
active_blocks
(
&
self
)
->
HashMap
<
u
64
,
usize
>
{
self
.endpoints
.iter
()
.map
(|(
&
worker_id
,
endpoint
)|
(
worker_id
,
endpoint
.data
.kv_active_blocks
()
as
usize
))
...
...
lib/llm/src/kv_router/sequence.rs
View file @
7731b024
...
...
@@ -293,7 +293,7 @@ impl ActiveSequencesMultiWorker {
pub
fn
new
(
component
:
Component
,
block_size
:
usize
,
workers_with_configs
:
HashMap
<
i
64
,
Option
<
ModelRuntimeConfig
>>
,
workers_with_configs
:
HashMap
<
u
64
,
Option
<
ModelRuntimeConfig
>>
,
replica_sync
:
bool
,
router_uuid
:
String
,
)
->
Self
{
...
...
@@ -557,7 +557,7 @@ impl ActiveSequencesMultiWorker {
/// Update the set of workers, adding and removing as needed
pub
fn
update_workers
(
&
self
,
new_workers_with_configs
:
HashMap
<
i
64
,
Option
<
ModelRuntimeConfig
>>
,
new_workers_with_configs
:
HashMap
<
u
64
,
Option
<
ModelRuntimeConfig
>>
,
)
{
let
current_workers
:
HashSet
<
WorkerWithDpRank
>
=
self
.senders
.iter
()
.map
(|
entry
|
*
entry
.key
())
.collect
();
...
...
lib/llm/src/kv_router/subscriber.rs
View file @
7731b024
...
...
@@ -63,7 +63,7 @@ impl SnapshotResources {
// Clean up stale workers before snapshot
// Get current worker IDs from instances_rx
let
current_instances
=
self
.instances_rx
.borrow
()
.clone
();
let
current_worker_ids
:
std
::
collections
::
HashSet
<
i
64
>
=
current_instances
let
current_worker_ids
:
std
::
collections
::
HashSet
<
u
64
>
=
current_instances
.iter
()
.map
(|
instance
|
instance
.instance_id
)
.collect
();
...
...
@@ -312,7 +312,7 @@ pub async fn start_kv_router_background(
};
// Parse as hexadecimal (base 16)
let
Ok
(
worker_id
)
=
i
64
::
from_str_radix
(
worker_id_str
,
16
)
else
{
let
Ok
(
worker_id
)
=
u
64
::
from_str_radix
(
worker_id_str
,
16
)
else
{
tracing
::
warn!
(
"Could not parse worker ID from instance key: {key}"
);
continue
;
};
...
...
lib/llm/src/local_model.rs
View file @
7731b024
...
...
@@ -3,16 +3,12 @@
use
std
::
fs
;
use
std
::
path
::{
Path
,
PathBuf
};
use
std
::
sync
::
Arc
;
use
dynamo_runtime
::
component
::
Endpoint
;
use
dynamo_runtime
::
protocols
::
EndpointId
;
use
dynamo_runtime
::
slug
::
Slug
;
use
dynamo_runtime
::
storage
::
key_value_store
::
Key
;
use
dynamo_runtime
::
traits
::
DistributedRuntimeProvider
;
use
dynamo_runtime
::{
component
::
Endpoint
,
storage
::
key_value_store
::{
EtcdStore
,
KeyValueStore
,
KeyValueStoreManager
},
};
use
crate
::
entrypoint
::
RouterConfig
;
use
crate
::
mocker
::
protocols
::
MockEngineArgs
;
...
...
@@ -414,18 +410,12 @@ impl LocalModel {
model_type
:
ModelType
,
model_input
:
ModelInput
,
)
->
anyhow
::
Result
<
()
>
{
// A static component doesn't have an etcd_client because it doesn't need to register
let
Some
(
etcd_client
)
=
endpoint
.drt
()
.etcd_client
()
else
{
anyhow
::
bail!
(
"Cannot attach to static endpoint"
);
};
self
.card.model_type
=
model_type
;
self
.card.model_input
=
model_input
;
// Publish the Model Deployment Card to KV store
let
kvstore
:
Box
<
dyn
KeyValueStore
>
=
Box
::
new
(
EtcdStore
::
new
(
etcd_client
.clone
()));
let
card_store
=
Arc
::
new
(
KeyValueStoreManager
::
new
(
kvstore
));
let
lease_id
=
endpoint
.drt
()
.primary_lease
()
.map
(|
l
|
l
.id
())
.unwrap_or
(
0
);
let
key
=
Key
::
from_raw
(
endpoint
.unique_path
(
lease_id
));
let
card_store
=
endpoint
.drt
()
.store
();
let
key
=
Key
::
from_raw
(
endpoint
.unique_path
(
card_store
.connection_id
()));
let
_
outcome
=
card_store
.publish
(
model_card
::
ROOT_PATH
,
None
,
&
key
,
&
mut
self
.card
)
...
...
lib/llm/src/protocols/common/preprocessor.rs
View file @
7731b024
...
...
@@ -50,7 +50,7 @@ pub struct PreprocessedRequest {
/// Targeted backend instance ID for the request
#[builder(default)]
pub
backend_instance_id
:
Option
<
i
64
>
,
pub
backend_instance_id
:
Option
<
u
64
>
,
/// Router configuration overrides for this specific request
#[builder(default)]
...
...
lib/llm/src/protocols/openai/nvext.rs
View file @
7731b024
...
...
@@ -39,7 +39,7 @@ pub struct NvExt {
/// If not set, the request will be routed to the best matching instance.
#[builder(default,
setter(strip_option))]
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
backend_instance_id
:
Option
<
i
64
>
,
pub
backend_instance_id
:
Option
<
u
64
>
,
/// Pre-tokenized data to use instead of tokenizing the prompt
/// If provided along with backend_instance_id, these tokens will be used directly
...
...
lib/llm/tests/block_manager.rs
View file @
7731b024
...
...
@@ -216,7 +216,7 @@ pub mod llm_kvbm {
impl
DynamoKvbmRuntimeConfigBuilder
{
pub
fn
build
(
self
)
->
Result
<
kvbm
::
config
::
KvManagerRuntimeConfig
>
{
let
(
runtime
,
nixl
)
=
self
.build_internal
()
?
.dissolve
();
let
worker_id
=
runtime
.primary_lease
()
.unwrap
()
.id
()
as
u64
;
let
worker_id
=
runtime
.primary_lease
()
.unwrap
()
.id
();
Ok
(
kvbm
::
config
::
KvManagerRuntimeConfig
::
builder
()
.worker_id
(
worker_id
)
.cancellation_token
(
runtime
.primary_token
()
.child_token
())
...
...
@@ -247,7 +247,7 @@ pub mod llm_kvbm {
impl
DynamoEventManager
{
pub
fn
new
(
component
:
Arc
<
KVBMDynamoRuntimeComponent
>
)
->
Self
{
let
(
tx
,
rx
)
=
mpsc
::
unbounded_channel
();
let
worker_id
=
component
.drt
()
.primary_lease
()
.unwrap
()
.id
()
as
u64
;
let
worker_id
=
component
.drt
()
.primary_lease
()
.unwrap
()
.id
();
component
.drt
()
.runtime
()
.secondary
()
.spawn
(
async
move
{
worker_task
(
component
,
rx
)
.await
;
});
...
...
@@ -296,7 +296,7 @@ pub mod llm_kvbm {
event_id
:
event_id_counter
,
dp_rank
:
0
,
};
let
router_event
=
RouterEvent
::
new
(
worker_identifier
as
i64
,
event
);
let
router_event
=
RouterEvent
::
new
(
worker_identifier
,
event
);
event_id_counter
+=
1
;
if
let
Err
(
e
)
=
component_clone
.batch_tx
...
...
@@ -316,7 +316,7 @@ pub mod llm_kvbm {
event_id
:
event_id_counter
,
dp_rank
:
0
,
};
let
router_event
=
RouterEvent
::
new
(
worker_identifier
as
i64
,
event
);
let
router_event
=
RouterEvent
::
new
(
worker_identifier
,
event
);
event_id_counter
+=
1
;
if
let
Err
(
e
)
=
component_clone
.batch_tx
...
...
lib/runtime/src/component.rs
View file @
7731b024
...
...
@@ -98,12 +98,12 @@ pub struct Instance {
pub
component
:
String
,
pub
endpoint
:
String
,
pub
namespace
:
String
,
pub
instance_id
:
i
64
,
pub
instance_id
:
u
64
,
pub
transport
:
TransportType
,
}
impl
Instance
{
pub
fn
id
(
&
self
)
->
i
64
{
pub
fn
id
(
&
self
)
->
u
64
{
self
.instance_id
}
pub
fn
endpoint_id
(
&
self
)
->
EndpointId
{
...
...
@@ -525,12 +525,12 @@ impl Endpoint {
}
/// The fully path of an instance in etcd
pub
fn
etcd_path_with_lease_id
(
&
self
,
lease_id
:
i
64
)
->
String
{
pub
fn
etcd_path_with_lease_id
(
&
self
,
lease_id
:
u
64
)
->
String
{
format!
(
"{INSTANCE_ROOT_PATH}/{}"
,
self
.unique_path
(
lease_id
))
}
/// Full path of this endpoint with forward slash separators, including lease id
pub
fn
unique_path
(
&
self
,
lease_id
:
i
64
)
->
String
{
pub
fn
unique_path
(
&
self
,
lease_id
:
u
64
)
->
String
{
let
ns
=
self
.component
.namespace
()
.name
();
let
cp
=
self
.component
.name
();
let
ep
=
self
.name
();
...
...
@@ -552,7 +552,7 @@ impl Endpoint {
}
}
pub
fn
name_with_id
(
&
self
,
lease_id
:
i
64
)
->
String
{
pub
fn
name_with_id
(
&
self
,
lease_id
:
u
64
)
->
String
{
if
self
.is_static
{
self
.name
.clone
()
}
else
{
...
...
@@ -565,7 +565,7 @@ impl Endpoint {
}
/// Subject to an instance of the [Endpoint] with a specific lease id
pub
fn
subject_to
(
&
self
,
lease_id
:
i
64
)
->
String
{
pub
fn
subject_to
(
&
self
,
lease_id
:
u
64
)
->
String
{
format!
(
"{}.{}"
,
self
.component
.service_name
(),
...
...
lib/runtime/src/component/client.rs
View file @
7731b024
...
...
@@ -32,7 +32,7 @@ enum MapState {
}
enum
EndpointEvent
{
Put
(
String
,
i
64
),
Put
(
String
,
u
64
),
Delete
(
String
),
}
...
...
@@ -43,9 +43,9 @@ pub struct Client {
// These are the remotes I know about from watching etcd
pub
instance_source
:
Arc
<
InstanceSource
>
,
// These are the instance source ids less those reported as down from sending rpc
instance_avail
:
Arc
<
ArcSwap
<
Vec
<
i
64
>>>
,
instance_avail
:
Arc
<
ArcSwap
<
Vec
<
u
64
>>>
,
// These are the instance source ids less those reported as busy (above threshold)
instance_free
:
Arc
<
ArcSwap
<
Vec
<
i
64
>>>
,
instance_free
:
Arc
<
ArcSwap
<
Vec
<
u
64
>>>
,
}
#[derive(Clone,
Debug)]
...
...
@@ -104,15 +104,15 @@ impl Client {
}
}
pub
fn
instance_ids
(
&
self
)
->
Vec
<
i
64
>
{
pub
fn
instance_ids
(
&
self
)
->
Vec
<
u
64
>
{
self
.instances
()
.into_iter
()
.map
(|
ep
|
ep
.id
())
.collect
()
}
pub
fn
instance_ids_avail
(
&
self
)
->
arc_swap
::
Guard
<
Arc
<
Vec
<
i
64
>>>
{
pub
fn
instance_ids_avail
(
&
self
)
->
arc_swap
::
Guard
<
Arc
<
Vec
<
u
64
>>>
{
self
.instance_avail
.load
()
}
pub
fn
instance_ids_free
(
&
self
)
->
arc_swap
::
Guard
<
Arc
<
Vec
<
i
64
>>>
{
pub
fn
instance_ids_free
(
&
self
)
->
arc_swap
::
Guard
<
Arc
<
Vec
<
u
64
>>>
{
self
.instance_free
.load
()
}
...
...
@@ -139,7 +139,7 @@ impl Client {
}
/// Mark an instance as down/unavailable
pub
fn
report_instance_down
(
&
self
,
instance_id
:
i
64
)
{
pub
fn
report_instance_down
(
&
self
,
instance_id
:
u
64
)
{
let
filtered
=
self
.instance_ids_avail
()
.iter
()
...
...
@@ -151,9 +151,9 @@ impl Client {
}
/// Update the set of free instances based on busy instance IDs
pub
fn
update_free_instances
(
&
self
,
busy_instance_ids
:
&
[
i
64
])
{
pub
fn
update_free_instances
(
&
self
,
busy_instance_ids
:
&
[
u
64
])
{
let
all_instance_ids
=
self
.instance_ids
();
let
free_ids
:
Vec
<
i
64
>
=
all_instance_ids
let
free_ids
:
Vec
<
u
64
>
=
all_instance_ids
.into_iter
()
.filter
(|
id
|
!
busy_instance_ids
.contains
(
id
))
.collect
();
...
...
@@ -173,7 +173,7 @@ impl Client {
InstanceSource
::
Dynamic
(
rx
)
=>
rx
.clone
(),
};
while
!
cancel_token
.is_cancelled
()
{
let
instance_ids
:
Vec
<
i
64
>
=
rx
let
instance_ids
:
Vec
<
u
64
>
=
rx
.borrow_and_update
()
.iter
()
.map
(|
instance
|
instance
.id
())
...
...
lib/runtime/src/discovery.rs
View file @
7731b024
...
...
@@ -27,48 +27,13 @@ impl DiscoveryClient {
}
/// Get the primary lease ID
pub
fn
primary_lease_id
(
&
self
)
->
i
64
{
pub
fn
primary_lease_id
(
&
self
)
->
u
64
{
self
.etcd_client
.lease_id
()
}
/// Create a [`Lease`] with a given time-to-live (TTL).
/// This [`Lease`] will be tied to the [`crate::Runtime`], but has its own independent [`crate::CancellationToken`].
pub
async
fn
create_lease
(
&
self
,
ttl
:
i
64
)
->
Result
<
Lease
>
{
pub
async
fn
create_lease
(
&
self
,
ttl
:
u
64
)
->
Result
<
Lease
>
{
self
.etcd_client
.create_lease
(
ttl
)
.await
}
// the following two commented out codes are not implemented, but are placeholders for proposed ectd usage patterns
// /// Create an ephemeral key/value pair tied to a lease_id.
// /// This is an atomic create. If the key already exists, this will fail.
// /// The [`etcd_client::KeyValue`] will be removed when the lease expires or is revoked.
// pub async fn create_ephemerial_key(&self, key: &str, value: &str, lease_id: i64) -> Result<()> {
// // self.etcd_client.create_ephemeral_key(key, value, lease_id).await
// unimplemented!()
// }
// /// Create a shared [`etcd_client::KeyValue`] which behaves similar to a C++ `std::shared_ptr` or a
// /// Rust [std::sync::Arc]. Instead of having one owner of the lease, multiple owners participate in
// /// maintaining the lease. In this manner, when the last member of the group sharing the lease is gone,
// /// the lease will be expired.
// ///
// /// Implementation notes: At the time of writing, it is unclear if we have atomics that control leases,
// /// so in our initial implementation, the last member of the group will not revoke the lease, so the object
// /// will live for upto the TTL after the last member is gone.
// ///
// /// Notes
// /// -----
// ///
// /// - Multiple members sharing the lease and contributing to the heartbeat might cause some overheads.
// /// The implementation will try to randomize the heartbeat intervals to avoid thundering herd problem,
// /// and with any luck, the heartbeat watchers will be able to detect when if a external member triggered
// /// the heartbeat checking this interval and skip unnecessary heartbeat messages.
// ///
// /// A new lease will be created for this object. If you wish to add an object to a shared group s
// ///
// /// The [`etcd_client::KeyValue`] will be removed when the lease expires or is revoked.
// pub async fn create_shared_key(&self, key: &str, value: &str, lease_id: i64) -> Result<()> {
// // self.etcd_client.create_ephemeral_key(key, value, lease_id).await
// unimplemented!()
// }
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment