Unverified Commit 7731b024 authored by Graham King's avatar Graham King Committed by GitHub
Browse files

chore: Use KeyValueStoreManager instead of etcd::Client (#3822)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent 6f9be594
...@@ -154,7 +154,7 @@ fn dynamo_create_kv_publisher( ...@@ -154,7 +154,7 @@ fn dynamo_create_kv_publisher(
{ {
Ok(drt) => { Ok(drt) => {
let backend = drt.namespace(namespace)?.component(component)?; let backend = drt.namespace(namespace)?.component(component)?;
KvEventPublisher::new(backend, worker_id, kv_block_size, None) KvEventPublisher::new(backend, worker_id as u64, kv_block_size, None)
} }
Err(e) => Err(e), Err(e) => Err(e),
} }
......
...@@ -771,7 +771,7 @@ impl Endpoint { ...@@ -771,7 +771,7 @@ impl Endpoint {
}) })
} }
fn lease_id(&self) -> i64 { fn lease_id(&self) -> u64 {
self.inner self.inner
.drt() .drt()
.primary_lease() .primary_lease()
...@@ -807,7 +807,7 @@ impl Namespace { ...@@ -807,7 +807,7 @@ impl Namespace {
impl Client { impl Client {
/// Get list of current instances. /// Get list of current instances.
/// Replaces endpoint_ids. /// Replaces endpoint_ids.
fn instance_ids(&self) -> Vec<i64> { fn instance_ids(&self) -> Vec<u64> {
self.router.client.instance_ids() self.router.client.instance_ids()
} }
...@@ -819,7 +819,7 @@ impl Client { ...@@ -819,7 +819,7 @@ impl Client {
inner inner
.wait_for_instances() .wait_for_instances()
.await .await
.map(|v| v.into_iter().map(|cei| cei.id()).collect::<Vec<i64>>()) .map(|v| v.into_iter().map(|cei| cei.id()).collect::<Vec<u64>>())
.map_err(to_pyerr) .map_err(to_pyerr)
}) })
} }
...@@ -920,7 +920,7 @@ impl Client { ...@@ -920,7 +920,7 @@ impl Client {
&self, &self,
py: Python<'p>, py: Python<'p>,
request: PyObject, request: PyObject,
instance_id: i64, instance_id: u64,
annotated: Option<bool>, annotated: Option<bool>,
context: Option<context::Context>, context: Option<context::Context>,
) -> PyResult<Bound<'p, PyAny>> { ) -> PyResult<Bound<'p, PyAny>> {
......
...@@ -322,7 +322,7 @@ pub(crate) struct OverlapScores { ...@@ -322,7 +322,7 @@ pub(crate) struct OverlapScores {
#[pymethods] #[pymethods]
impl OverlapScores { impl OverlapScores {
#[getter] #[getter]
fn scores(&self) -> HashMap<(i64, u32), u32> { fn scores(&self) -> HashMap<(u64, u32), u32> {
// Return scores with full WorkerWithDpRank granularity as (worker_id, dp_rank) tuples // Return scores with full WorkerWithDpRank granularity as (worker_id, dp_rank) tuples
self.inner self.inner
.scores .scores
......
...@@ -59,7 +59,10 @@ impl ControlClient { ...@@ -59,7 +59,10 @@ impl ControlClient {
} }
async fn execute<T: DeserializeOwned>(&self, message: ControlMessage) -> Result<T> { async fn execute<T: DeserializeOwned>(&self, message: ControlMessage) -> Result<T> {
let mut stream = self.client.direct(message.into(), self.instance_id).await?; let mut stream = self
.client
.direct(message.into(), self.instance_id as u64)
.await?;
let resp = stream let resp = stream
.next() .next()
.await .await
......
...@@ -567,19 +567,22 @@ impl ModelWatcher { ...@@ -567,19 +567,22 @@ impl ModelWatcher {
} }
/// All the registered ModelDeploymentCard with the EndpointId they are attached to, one per instance /// All the registered ModelDeploymentCard with the EndpointId they are attached to, one per instance
pub async fn all_cards(&self) -> anyhow::Result<Vec<(EndpointId, ModelDeploymentCard)>> { async fn all_cards(&self) -> anyhow::Result<Vec<(EndpointId, ModelDeploymentCard)>> {
let Some(etcd_client) = self.drt.etcd_client() else { let store = self.drt.store();
anyhow::bail!("all_cards: Missing etcd client");
//let kvs = etcd_client.kv_get_prefix(model_card::ROOT_PATH).await?;
let Some(card_bucket) = store.get_bucket(model_card::ROOT_PATH).await? else {
// no cards
return Ok(vec![]);
}; };
let kvs = etcd_client.kv_get_prefix(model_card::ROOT_PATH).await?; let entries = card_bucket.entries().await?;
let mut results = Vec::with_capacity(kvs.len());
for kv in kvs { let mut results = Vec::with_capacity(entries.len());
let maybe_convert = serde_json::from_slice::<ModelDeploymentCard>(kv.value()); for (key, card_bytes) in entries {
let r = match maybe_convert { let r = match serde_json::from_slice::<ModelDeploymentCard>(&card_bytes) {
Ok(card) => { Ok(card) => {
let maybe_endpoint_id = kv.key_str().map_err(|err| err.into()).and_then(|k| { let maybe_endpoint_id =
etcd_key_extract(k).map(|(endpoint_id, _instance_id)| endpoint_id) etcd_key_extract(&key).map(|(endpoint_id, _instance_id)| endpoint_id);
});
let endpoint_id = match maybe_endpoint_id { let endpoint_id = match maybe_endpoint_id {
Ok(eid) => eid, Ok(eid) => eid,
Err(err) => { Err(err) => {
...@@ -590,14 +593,8 @@ impl ModelWatcher { ...@@ -590,14 +593,8 @@ impl ModelWatcher {
(endpoint_id, card) (endpoint_id, card)
} }
Err(err) => { Err(err) => {
match kv.value_str() { let value = String::from_utf8_lossy(&card_bytes);
Ok(value) => { tracing::error!(%err, %value, "Invalid JSON in model card");
tracing::error!(%err, value, "Invalid JSON in model card");
}
Err(value_str_err) => {
tracing::error!(original_error=%err, %value_str_err, "Invalid UTF-8 string in model card, expected JSON");
}
}
continue; continue;
} }
}; };
......
...@@ -52,7 +52,7 @@ impl WorkerLoadState { ...@@ -52,7 +52,7 @@ impl WorkerLoadState {
/// Worker monitor for tracking KV cache usage and busy states /// Worker monitor for tracking KV cache usage and busy states
pub struct KvWorkerMonitor { pub struct KvWorkerMonitor {
client: Arc<Client>, client: Arc<Client>,
worker_load_states: Arc<RwLock<HashMap<i64, WorkerLoadState>>>, worker_load_states: Arc<RwLock<HashMap<u64, WorkerLoadState>>>,
busy_threshold: f64, busy_threshold: f64,
} }
...@@ -67,7 +67,7 @@ impl KvWorkerMonitor { ...@@ -67,7 +67,7 @@ impl KvWorkerMonitor {
} }
/// Get the worker load states for external access /// Get the worker load states for external access
pub fn load_states(&self) -> Arc<RwLock<HashMap<i64, WorkerLoadState>>> { pub fn load_states(&self) -> Arc<RwLock<HashMap<u64, WorkerLoadState>>> {
self.worker_load_states.clone() self.worker_load_states.clone()
} }
} }
...@@ -154,7 +154,7 @@ impl WorkerLoadMonitor for KvWorkerMonitor { ...@@ -154,7 +154,7 @@ impl WorkerLoadMonitor for KvWorkerMonitor {
// Recalculate all busy instances and update // Recalculate all busy instances and update
let states = worker_load_states.read().unwrap(); let states = worker_load_states.read().unwrap();
let busy_instances: Vec<i64> = states let busy_instances: Vec<u64> = states
.iter() .iter()
.filter_map(|(&id, state)| { .filter_map(|(&id, state)| {
state.is_busy(busy_threshold).then_some(id) state.is_busy(busy_threshold).then_some(id)
......
...@@ -64,10 +64,10 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul ...@@ -64,10 +64,10 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul
let http_service = match engine_config { let http_service = match engine_config {
EngineConfig::Dynamic(_) => { EngineConfig::Dynamic(_) => {
let distributed_runtime = DistributedRuntime::from_settings(runtime.clone()).await?; let distributed_runtime = DistributedRuntime::from_settings(runtime.clone()).await?;
let etcd_client = distributed_runtime.etcd_client();
// This allows the /health endpoint to query etcd for active instances // This allows the /health endpoint to query etcd for active instances
http_service_builder = http_service_builder.with_etcd_client(etcd_client.clone()); http_service_builder = http_service_builder.store(distributed_runtime.store().clone());
let http_service = http_service_builder.build()?; let http_service = http_service_builder.build()?;
let etcd_client = distributed_runtime.etcd_client();
match etcd_client { match etcd_client {
Some(ref etcd_client) => { Some(ref etcd_client) => {
let router_config = engine_config.local_model().router_config(); let router_config = engine_config.local_model().router_config();
...@@ -241,17 +241,7 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul ...@@ -241,17 +241,7 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul
http_service.custom_backend_registry.as_ref(), http_service.custom_backend_registry.as_ref(),
) { ) {
// Create DistributedRuntime for polling, matching the engine's mode // Create DistributedRuntime for polling, matching the engine's mode
// Check if we have etcd_client to determine if we're in dynamic or static mode let drt = DistributedRuntime::from_settings(runtime.clone()).await?;
let drt = if http_service.state().etcd_client().is_some() {
// Dynamic mode: use from_settings() which respects environment (includes etcd)
DistributedRuntime::from_settings(runtime.clone()).await?
} else {
// Static mode: no etcd
let dst_config =
dynamo_runtime::distributed::DistributedConfig::from_settings(true);
DistributedRuntime::new(runtime.clone(), dst_config).await?
};
tracing::info!( tracing::info!(
namespace_component_endpoint=%namespace_component_endpoint, namespace_component_endpoint=%namespace_component_endpoint,
polling_interval_secs=polling_interval, polling_interval_secs=polling_interval,
......
...@@ -20,10 +20,7 @@ use axum_server::tls_rustls::RustlsConfig; ...@@ -20,10 +20,7 @@ use axum_server::tls_rustls::RustlsConfig;
use derive_builder::Builder; use derive_builder::Builder;
use dynamo_runtime::logging::make_request_span; use dynamo_runtime::logging::make_request_span;
use dynamo_runtime::metrics::prometheus_names::name_prefix; use dynamo_runtime::metrics::prometheus_names::name_prefix;
use dynamo_runtime::storage::key_value_store::EtcdStore; use dynamo_runtime::storage::key_value_store::KeyValueStoreManager;
use dynamo_runtime::storage::key_value_store::KeyValueStore;
use dynamo_runtime::storage::key_value_store::MemoryStore;
use dynamo_runtime::transports::etcd;
use std::net::SocketAddr; use std::net::SocketAddr;
use tokio::task::JoinHandle; use tokio::task::JoinHandle;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
...@@ -33,8 +30,7 @@ use tower_http::trace::TraceLayer; ...@@ -33,8 +30,7 @@ use tower_http::trace::TraceLayer;
pub struct State { pub struct State {
metrics: Arc<Metrics>, metrics: Arc<Metrics>,
manager: Arc<ModelManager>, manager: Arc<ModelManager>,
etcd_client: Option<etcd::Client>, store: KeyValueStoreManager,
store: Arc<dyn KeyValueStore>,
flags: StateFlags, flags: StateFlags,
} }
...@@ -75,12 +71,11 @@ impl StateFlags { ...@@ -75,12 +71,11 @@ impl StateFlags {
} }
impl State { impl State {
pub fn new(manager: Arc<ModelManager>) -> Self { pub fn new(manager: Arc<ModelManager>, store: KeyValueStoreManager) -> Self {
Self { Self {
manager, manager,
metrics: Arc::new(Metrics::default()), metrics: Arc::new(Metrics::default()),
etcd_client: None, store,
store: Arc::new(MemoryStore::new()),
flags: StateFlags { flags: StateFlags {
chat_endpoints_enabled: AtomicBool::new(false), chat_endpoints_enabled: AtomicBool::new(false),
cmpl_endpoints_enabled: AtomicBool::new(false), cmpl_endpoints_enabled: AtomicBool::new(false),
...@@ -90,20 +85,6 @@ impl State { ...@@ -90,20 +85,6 @@ impl State {
} }
} }
pub fn new_with_etcd(manager: Arc<ModelManager>, etcd_client: etcd::Client) -> Self {
Self {
manager,
metrics: Arc::new(Metrics::default()),
store: Arc::new(EtcdStore::new(etcd_client.clone())),
etcd_client: Some(etcd_client),
flags: StateFlags {
chat_endpoints_enabled: AtomicBool::new(false),
cmpl_endpoints_enabled: AtomicBool::new(false),
embeddings_endpoints_enabled: AtomicBool::new(false),
responses_endpoints_enabled: AtomicBool::new(false),
},
}
}
/// Get the Prometheus [`Metrics`] object which tracks request counts and inflight requests /// Get the Prometheus [`Metrics`] object which tracks request counts and inflight requests
pub fn metrics_clone(&self) -> Arc<Metrics> { pub fn metrics_clone(&self) -> Arc<Metrics> {
self.metrics.clone() self.metrics.clone()
...@@ -117,12 +98,8 @@ impl State { ...@@ -117,12 +98,8 @@ impl State {
self.manager.clone() self.manager.clone()
} }
pub fn etcd_client(&self) -> Option<&etcd::Client> { pub fn store(&self) -> &KeyValueStoreManager {
self.etcd_client.as_ref() &self.store
}
pub fn store(&self) -> Arc<dyn KeyValueStore> {
self.store.clone()
} }
// TODO // TODO
...@@ -186,8 +163,8 @@ pub struct HttpServiceConfig { ...@@ -186,8 +163,8 @@ pub struct HttpServiceConfig {
#[builder(default = "None")] #[builder(default = "None")]
request_template: Option<RequestTemplate>, request_template: Option<RequestTemplate>,
#[builder(default = "None")] #[builder(default)]
etcd_client: Option<etcd::Client>, store: KeyValueStoreManager,
// DEPRECATED: To be removed after custom backends migrate to Dynamo backend. // DEPRECATED: To be removed after custom backends migrate to Dynamo backend.
#[builder(default = "None")] #[builder(default = "None")]
...@@ -335,10 +312,7 @@ impl HttpServiceConfigBuilder { ...@@ -335,10 +312,7 @@ impl HttpServiceConfigBuilder {
let config: HttpServiceConfig = self.build_internal()?; let config: HttpServiceConfig = self.build_internal()?;
let model_manager = Arc::new(ModelManager::new()); let model_manager = Arc::new(ModelManager::new());
let state = match config.etcd_client { let state = Arc::new(State::new(model_manager, config.store));
Some(etcd_client) => Arc::new(State::new_with_etcd(model_manager, etcd_client)),
None => Arc::new(State::new(model_manager)),
};
state state
.flags .flags
.set(&EndpointType::Chat, config.enable_chat_endpoints); .set(&EndpointType::Chat, config.enable_chat_endpoints);
...@@ -422,11 +396,6 @@ impl HttpServiceConfigBuilder { ...@@ -422,11 +396,6 @@ impl HttpServiceConfigBuilder {
self self
} }
pub fn with_etcd_client(mut self, etcd_client: Option<etcd::Client>) -> Self {
self.etcd_client = Some(etcd_client);
self
}
// DEPRECATED: To be removed after custom backends migrate to Dynamo backend. // DEPRECATED: To be removed after custom backends migrate to Dynamo backend.
pub fn with_custom_backend_config( pub fn with_custom_backend_config(
mut self, mut self,
......
...@@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize}; ...@@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize};
use uuid::Uuid; use uuid::Uuid;
/// A worker identifier. /// A worker identifier.
pub type WorkerId = i64; pub type WorkerId = u64;
/// A data parallel rank identifier. /// A data parallel rank identifier.
pub type DpRank = u32; pub type DpRank = u32;
......
...@@ -97,7 +97,7 @@ pub struct KvEventPublisher { ...@@ -97,7 +97,7 @@ pub struct KvEventPublisher {
impl KvEventPublisher { impl KvEventPublisher {
pub fn new( pub fn new(
component: Component, component: Component,
worker_id: i64, worker_id: u64,
kv_block_size: u32, kv_block_size: u32,
source_config: Option<KvEventSourceConfig>, source_config: Option<KvEventSourceConfig>,
) -> Result<Self> { ) -> Result<Self> {
...@@ -174,7 +174,7 @@ impl Drop for KvEventPublisher { ...@@ -174,7 +174,7 @@ impl Drop for KvEventPublisher {
async fn start_event_processor<P: EventPublisher + Send + Sync + 'static>( async fn start_event_processor<P: EventPublisher + Send + Sync + 'static>(
publisher: P, publisher: P,
worker_id: i64, worker_id: u64,
cancellation_token: CancellationToken, cancellation_token: CancellationToken,
mut rx: mpsc::UnboundedReceiver<KvCacheEvent>, mut rx: mpsc::UnboundedReceiver<KvCacheEvent>,
) { ) {
...@@ -801,7 +801,7 @@ impl WorkerMetricsPublisher { ...@@ -801,7 +801,7 @@ impl WorkerMetricsPublisher {
/// ///
/// This task monitors metric changes (specifically kv_active_blocks and num_requests_waiting) /// This task monitors metric changes (specifically kv_active_blocks and num_requests_waiting)
/// and publishes stable metrics to NATS after they've been unchanged for 1ms. /// and publishes stable metrics to NATS after they've been unchanged for 1ms.
fn start_nats_metrics_publishing(&self, namespace: Namespace, worker_id: i64) { fn start_nats_metrics_publishing(&self, namespace: Namespace, worker_id: u64) {
let nats_rx = self.rx.clone(); let nats_rx = self.rx.clone();
tokio::spawn(async move { tokio::spawn(async move {
......
...@@ -9,7 +9,7 @@ use std::collections::HashMap; ...@@ -9,7 +9,7 @@ use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct LoadEvent { pub struct LoadEvent {
pub worker_id: i64, pub worker_id: u64,
pub data: ForwardPassMetrics, pub data: ForwardPassMetrics,
} }
...@@ -23,8 +23,8 @@ pub struct Endpoint { ...@@ -23,8 +23,8 @@ pub struct Endpoint {
} }
impl Endpoint { impl Endpoint {
pub fn worker_id(&self) -> i64 { pub fn worker_id(&self) -> u64 {
i64::from_str_radix( u64::from_str_radix(
self.subject self.subject
.split("-") .split("-")
.last() .last()
...@@ -39,7 +39,7 @@ impl Endpoint { ...@@ -39,7 +39,7 @@ impl Endpoint {
#[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)] #[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)]
pub struct ProcessedEndpoints { pub struct ProcessedEndpoints {
pub endpoints: HashMap<i64, Endpoint>, pub endpoints: HashMap<u64, Endpoint>,
pub load_avg: f64, pub load_avg: f64,
pub load_std: f64, pub load_std: f64,
} }
...@@ -68,11 +68,11 @@ impl ProcessedEndpoints { ...@@ -68,11 +68,11 @@ impl ProcessedEndpoints {
} }
} }
pub fn worker_ids(&self) -> Vec<i64> { pub fn worker_ids(&self) -> Vec<u64> {
self.endpoints.keys().copied().collect() self.endpoints.keys().copied().collect()
} }
pub fn active_blocks(&self) -> HashMap<i64, usize> { pub fn active_blocks(&self) -> HashMap<u64, usize> {
self.endpoints self.endpoints
.iter() .iter()
.map(|(&worker_id, endpoint)| (worker_id, endpoint.data.kv_active_blocks() as usize)) .map(|(&worker_id, endpoint)| (worker_id, endpoint.data.kv_active_blocks() as usize))
......
...@@ -293,7 +293,7 @@ impl ActiveSequencesMultiWorker { ...@@ -293,7 +293,7 @@ impl ActiveSequencesMultiWorker {
pub fn new( pub fn new(
component: Component, component: Component,
block_size: usize, block_size: usize,
workers_with_configs: HashMap<i64, Option<ModelRuntimeConfig>>, workers_with_configs: HashMap<u64, Option<ModelRuntimeConfig>>,
replica_sync: bool, replica_sync: bool,
router_uuid: String, router_uuid: String,
) -> Self { ) -> Self {
...@@ -557,7 +557,7 @@ impl ActiveSequencesMultiWorker { ...@@ -557,7 +557,7 @@ impl ActiveSequencesMultiWorker {
/// Update the set of workers, adding and removing as needed /// Update the set of workers, adding and removing as needed
pub fn update_workers( pub fn update_workers(
&self, &self,
new_workers_with_configs: HashMap<i64, Option<ModelRuntimeConfig>>, new_workers_with_configs: HashMap<u64, Option<ModelRuntimeConfig>>,
) { ) {
let current_workers: HashSet<WorkerWithDpRank> = let current_workers: HashSet<WorkerWithDpRank> =
self.senders.iter().map(|entry| *entry.key()).collect(); self.senders.iter().map(|entry| *entry.key()).collect();
......
...@@ -63,7 +63,7 @@ impl SnapshotResources { ...@@ -63,7 +63,7 @@ impl SnapshotResources {
// Clean up stale workers before snapshot // Clean up stale workers before snapshot
// Get current worker IDs from instances_rx // Get current worker IDs from instances_rx
let current_instances = self.instances_rx.borrow().clone(); let current_instances = self.instances_rx.borrow().clone();
let current_worker_ids: std::collections::HashSet<i64> = current_instances let current_worker_ids: std::collections::HashSet<u64> = current_instances
.iter() .iter()
.map(|instance| instance.instance_id) .map(|instance| instance.instance_id)
.collect(); .collect();
...@@ -312,7 +312,7 @@ pub async fn start_kv_router_background( ...@@ -312,7 +312,7 @@ pub async fn start_kv_router_background(
}; };
// Parse as hexadecimal (base 16) // Parse as hexadecimal (base 16)
let Ok(worker_id) = i64::from_str_radix(worker_id_str, 16) else { let Ok(worker_id) = u64::from_str_radix(worker_id_str, 16) else {
tracing::warn!("Could not parse worker ID from instance key: {key}"); tracing::warn!("Could not parse worker ID from instance key: {key}");
continue; continue;
}; };
......
...@@ -3,16 +3,12 @@ ...@@ -3,16 +3,12 @@
use std::fs; use std::fs;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::Arc;
use dynamo_runtime::component::Endpoint;
use dynamo_runtime::protocols::EndpointId; use dynamo_runtime::protocols::EndpointId;
use dynamo_runtime::slug::Slug; use dynamo_runtime::slug::Slug;
use dynamo_runtime::storage::key_value_store::Key; use dynamo_runtime::storage::key_value_store::Key;
use dynamo_runtime::traits::DistributedRuntimeProvider; use dynamo_runtime::traits::DistributedRuntimeProvider;
use dynamo_runtime::{
component::Endpoint,
storage::key_value_store::{EtcdStore, KeyValueStore, KeyValueStoreManager},
};
use crate::entrypoint::RouterConfig; use crate::entrypoint::RouterConfig;
use crate::mocker::protocols::MockEngineArgs; use crate::mocker::protocols::MockEngineArgs;
...@@ -414,18 +410,12 @@ impl LocalModel { ...@@ -414,18 +410,12 @@ impl LocalModel {
model_type: ModelType, model_type: ModelType,
model_input: ModelInput, model_input: ModelInput,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
// A static component doesn't have an etcd_client because it doesn't need to register
let Some(etcd_client) = endpoint.drt().etcd_client() else {
anyhow::bail!("Cannot attach to static endpoint");
};
self.card.model_type = model_type; self.card.model_type = model_type;
self.card.model_input = model_input; self.card.model_input = model_input;
// Publish the Model Deployment Card to KV store // Publish the Model Deployment Card to KV store
let kvstore: Box<dyn KeyValueStore> = Box::new(EtcdStore::new(etcd_client.clone())); let card_store = endpoint.drt().store();
let card_store = Arc::new(KeyValueStoreManager::new(kvstore)); let key = Key::from_raw(endpoint.unique_path(card_store.connection_id()));
let lease_id = endpoint.drt().primary_lease().map(|l| l.id()).unwrap_or(0);
let key = Key::from_raw(endpoint.unique_path(lease_id));
let _outcome = card_store let _outcome = card_store
.publish(model_card::ROOT_PATH, None, &key, &mut self.card) .publish(model_card::ROOT_PATH, None, &key, &mut self.card)
......
...@@ -50,7 +50,7 @@ pub struct PreprocessedRequest { ...@@ -50,7 +50,7 @@ pub struct PreprocessedRequest {
/// Targeted backend instance ID for the request /// Targeted backend instance ID for the request
#[builder(default)] #[builder(default)]
pub backend_instance_id: Option<i64>, pub backend_instance_id: Option<u64>,
/// Router configuration overrides for this specific request /// Router configuration overrides for this specific request
#[builder(default)] #[builder(default)]
......
...@@ -39,7 +39,7 @@ pub struct NvExt { ...@@ -39,7 +39,7 @@ pub struct NvExt {
/// If not set, the request will be routed to the best matching instance. /// If not set, the request will be routed to the best matching instance.
#[builder(default, setter(strip_option))] #[builder(default, setter(strip_option))]
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub backend_instance_id: Option<i64>, pub backend_instance_id: Option<u64>,
/// Pre-tokenized data to use instead of tokenizing the prompt /// Pre-tokenized data to use instead of tokenizing the prompt
/// If provided along with backend_instance_id, these tokens will be used directly /// If provided along with backend_instance_id, these tokens will be used directly
......
...@@ -216,7 +216,7 @@ pub mod llm_kvbm { ...@@ -216,7 +216,7 @@ pub mod llm_kvbm {
impl DynamoKvbmRuntimeConfigBuilder { impl DynamoKvbmRuntimeConfigBuilder {
pub fn build(self) -> Result<kvbm::config::KvManagerRuntimeConfig> { pub fn build(self) -> Result<kvbm::config::KvManagerRuntimeConfig> {
let (runtime, nixl) = self.build_internal()?.dissolve(); let (runtime, nixl) = self.build_internal()?.dissolve();
let worker_id = runtime.primary_lease().unwrap().id() as u64; let worker_id = runtime.primary_lease().unwrap().id();
Ok(kvbm::config::KvManagerRuntimeConfig::builder() Ok(kvbm::config::KvManagerRuntimeConfig::builder()
.worker_id(worker_id) .worker_id(worker_id)
.cancellation_token(runtime.primary_token().child_token()) .cancellation_token(runtime.primary_token().child_token())
...@@ -247,7 +247,7 @@ pub mod llm_kvbm { ...@@ -247,7 +247,7 @@ pub mod llm_kvbm {
impl DynamoEventManager { impl DynamoEventManager {
pub fn new(component: Arc<KVBMDynamoRuntimeComponent>) -> Self { pub fn new(component: Arc<KVBMDynamoRuntimeComponent>) -> Self {
let (tx, rx) = mpsc::unbounded_channel(); let (tx, rx) = mpsc::unbounded_channel();
let worker_id = component.drt().primary_lease().unwrap().id() as u64; let worker_id = component.drt().primary_lease().unwrap().id();
component.drt().runtime().secondary().spawn(async move { component.drt().runtime().secondary().spawn(async move {
worker_task(component, rx).await; worker_task(component, rx).await;
}); });
...@@ -296,7 +296,7 @@ pub mod llm_kvbm { ...@@ -296,7 +296,7 @@ pub mod llm_kvbm {
event_id: event_id_counter, event_id: event_id_counter,
dp_rank: 0, dp_rank: 0,
}; };
let router_event = RouterEvent::new(worker_identifier as i64, event); let router_event = RouterEvent::new(worker_identifier, event);
event_id_counter += 1; event_id_counter += 1;
if let Err(e) = component_clone if let Err(e) = component_clone
.batch_tx .batch_tx
...@@ -316,7 +316,7 @@ pub mod llm_kvbm { ...@@ -316,7 +316,7 @@ pub mod llm_kvbm {
event_id: event_id_counter, event_id: event_id_counter,
dp_rank: 0, dp_rank: 0,
}; };
let router_event = RouterEvent::new(worker_identifier as i64, event); let router_event = RouterEvent::new(worker_identifier, event);
event_id_counter += 1; event_id_counter += 1;
if let Err(e) = component_clone if let Err(e) = component_clone
.batch_tx .batch_tx
......
...@@ -98,12 +98,12 @@ pub struct Instance { ...@@ -98,12 +98,12 @@ pub struct Instance {
pub component: String, pub component: String,
pub endpoint: String, pub endpoint: String,
pub namespace: String, pub namespace: String,
pub instance_id: i64, pub instance_id: u64,
pub transport: TransportType, pub transport: TransportType,
} }
impl Instance { impl Instance {
pub fn id(&self) -> i64 { pub fn id(&self) -> u64 {
self.instance_id self.instance_id
} }
pub fn endpoint_id(&self) -> EndpointId { pub fn endpoint_id(&self) -> EndpointId {
...@@ -525,12 +525,12 @@ impl Endpoint { ...@@ -525,12 +525,12 @@ impl Endpoint {
} }
/// The fully path of an instance in etcd /// The fully path of an instance in etcd
pub fn etcd_path_with_lease_id(&self, lease_id: i64) -> String { pub fn etcd_path_with_lease_id(&self, lease_id: u64) -> String {
format!("{INSTANCE_ROOT_PATH}/{}", self.unique_path(lease_id)) format!("{INSTANCE_ROOT_PATH}/{}", self.unique_path(lease_id))
} }
/// Full path of this endpoint with forward slash separators, including lease id /// Full path of this endpoint with forward slash separators, including lease id
pub fn unique_path(&self, lease_id: i64) -> String { pub fn unique_path(&self, lease_id: u64) -> String {
let ns = self.component.namespace().name(); let ns = self.component.namespace().name();
let cp = self.component.name(); let cp = self.component.name();
let ep = self.name(); let ep = self.name();
...@@ -552,7 +552,7 @@ impl Endpoint { ...@@ -552,7 +552,7 @@ impl Endpoint {
} }
} }
pub fn name_with_id(&self, lease_id: i64) -> String { pub fn name_with_id(&self, lease_id: u64) -> String {
if self.is_static { if self.is_static {
self.name.clone() self.name.clone()
} else { } else {
...@@ -565,7 +565,7 @@ impl Endpoint { ...@@ -565,7 +565,7 @@ impl Endpoint {
} }
/// Subject to an instance of the [Endpoint] with a specific lease id /// Subject to an instance of the [Endpoint] with a specific lease id
pub fn subject_to(&self, lease_id: i64) -> String { pub fn subject_to(&self, lease_id: u64) -> String {
format!( format!(
"{}.{}", "{}.{}",
self.component.service_name(), self.component.service_name(),
......
...@@ -32,7 +32,7 @@ enum MapState { ...@@ -32,7 +32,7 @@ enum MapState {
} }
enum EndpointEvent { enum EndpointEvent {
Put(String, i64), Put(String, u64),
Delete(String), Delete(String),
} }
...@@ -43,9 +43,9 @@ pub struct Client { ...@@ -43,9 +43,9 @@ pub struct Client {
// These are the remotes I know about from watching etcd // These are the remotes I know about from watching etcd
pub instance_source: Arc<InstanceSource>, pub instance_source: Arc<InstanceSource>,
// These are the instance source ids less those reported as down from sending rpc // These are the instance source ids less those reported as down from sending rpc
instance_avail: Arc<ArcSwap<Vec<i64>>>, instance_avail: Arc<ArcSwap<Vec<u64>>>,
// These are the instance source ids less those reported as busy (above threshold) // These are the instance source ids less those reported as busy (above threshold)
instance_free: Arc<ArcSwap<Vec<i64>>>, instance_free: Arc<ArcSwap<Vec<u64>>>,
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
...@@ -104,15 +104,15 @@ impl Client { ...@@ -104,15 +104,15 @@ impl Client {
} }
} }
pub fn instance_ids(&self) -> Vec<i64> { pub fn instance_ids(&self) -> Vec<u64> {
self.instances().into_iter().map(|ep| ep.id()).collect() self.instances().into_iter().map(|ep| ep.id()).collect()
} }
pub fn instance_ids_avail(&self) -> arc_swap::Guard<Arc<Vec<i64>>> { pub fn instance_ids_avail(&self) -> arc_swap::Guard<Arc<Vec<u64>>> {
self.instance_avail.load() self.instance_avail.load()
} }
pub fn instance_ids_free(&self) -> arc_swap::Guard<Arc<Vec<i64>>> { pub fn instance_ids_free(&self) -> arc_swap::Guard<Arc<Vec<u64>>> {
self.instance_free.load() self.instance_free.load()
} }
...@@ -139,7 +139,7 @@ impl Client { ...@@ -139,7 +139,7 @@ impl Client {
} }
/// Mark an instance as down/unavailable /// Mark an instance as down/unavailable
pub fn report_instance_down(&self, instance_id: i64) { pub fn report_instance_down(&self, instance_id: u64) {
let filtered = self let filtered = self
.instance_ids_avail() .instance_ids_avail()
.iter() .iter()
...@@ -151,9 +151,9 @@ impl Client { ...@@ -151,9 +151,9 @@ impl Client {
} }
/// Update the set of free instances based on busy instance IDs /// Update the set of free instances based on busy instance IDs
pub fn update_free_instances(&self, busy_instance_ids: &[i64]) { pub fn update_free_instances(&self, busy_instance_ids: &[u64]) {
let all_instance_ids = self.instance_ids(); let all_instance_ids = self.instance_ids();
let free_ids: Vec<i64> = all_instance_ids let free_ids: Vec<u64> = all_instance_ids
.into_iter() .into_iter()
.filter(|id| !busy_instance_ids.contains(id)) .filter(|id| !busy_instance_ids.contains(id))
.collect(); .collect();
...@@ -173,7 +173,7 @@ impl Client { ...@@ -173,7 +173,7 @@ impl Client {
InstanceSource::Dynamic(rx) => rx.clone(), InstanceSource::Dynamic(rx) => rx.clone(),
}; };
while !cancel_token.is_cancelled() { while !cancel_token.is_cancelled() {
let instance_ids: Vec<i64> = rx let instance_ids: Vec<u64> = rx
.borrow_and_update() .borrow_and_update()
.iter() .iter()
.map(|instance| instance.id()) .map(|instance| instance.id())
......
...@@ -27,48 +27,13 @@ impl DiscoveryClient { ...@@ -27,48 +27,13 @@ impl DiscoveryClient {
} }
/// Get the primary lease ID /// Get the primary lease ID
pub fn primary_lease_id(&self) -> i64 { pub fn primary_lease_id(&self) -> u64 {
self.etcd_client.lease_id() self.etcd_client.lease_id()
} }
/// Create a [`Lease`] with a given time-to-live (TTL). /// Create a [`Lease`] with a given time-to-live (TTL).
/// This [`Lease`] will be tied to the [`crate::Runtime`], but has its own independent [`crate::CancellationToken`]. /// This [`Lease`] will be tied to the [`crate::Runtime`], but has its own independent [`crate::CancellationToken`].
pub async fn create_lease(&self, ttl: i64) -> Result<Lease> { pub async fn create_lease(&self, ttl: u64) -> Result<Lease> {
self.etcd_client.create_lease(ttl).await self.etcd_client.create_lease(ttl).await
} }
// the following two commented out codes are not implemented, but are placeholders for proposed ectd usage patterns
// /// Create an ephemeral key/value pair tied to a lease_id.
// /// This is an atomic create. If the key already exists, this will fail.
// /// The [`etcd_client::KeyValue`] will be removed when the lease expires or is revoked.
// pub async fn create_ephemerial_key(&self, key: &str, value: &str, lease_id: i64) -> Result<()> {
// // self.etcd_client.create_ephemeral_key(key, value, lease_id).await
// unimplemented!()
// }
// /// Create a shared [`etcd_client::KeyValue`] which behaves similar to a C++ `std::shared_ptr` or a
// /// Rust [std::sync::Arc]. Instead of having one owner of the lease, multiple owners participate in
// /// maintaining the lease. In this manner, when the last member of the group sharing the lease is gone,
// /// the lease will be expired.
// ///
// /// Implementation notes: At the time of writing, it is unclear if we have atomics that control leases,
// /// so in our initial implementation, the last member of the group will not revoke the lease, so the object
// /// will live for upto the TTL after the last member is gone.
// ///
// /// Notes
// /// -----
// ///
// /// - Multiple members sharing the lease and contributing to the heartbeat might cause some overheads.
// /// The implementation will try to randomize the heartbeat intervals to avoid thundering herd problem,
// /// and with any luck, the heartbeat watchers will be able to detect when if a external member triggered
// /// the heartbeat checking this interval and skip unnecessary heartbeat messages.
// ///
// /// A new lease will be created for this object. If you wish to add an object to a shared group s
// ///
// /// The [`etcd_client::KeyValue`] will be removed when the lease expires or is revoked.
// pub async fn create_shared_key(&self, key: &str, value: &str, lease_id: i64) -> Result<()> {
// // self.etcd_client.create_ephemeral_key(key, value, lease_id).await
// unimplemented!()
// }
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment