Unverified Commit 81162dfe authored by Graham King's avatar Graham King Committed by GitHub
Browse files

chore(discovery): Watch/publish ModelDeploymentCard instead of ModelEntry (#3350)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent ddbb4f50
...@@ -149,7 +149,7 @@ impl EtcdBucket { ...@@ -149,7 +149,7 @@ impl EtcdBucket {
tracing::trace!("etcd create: {k}"); tracing::trace!("etcd create: {k}");
// Use atomic transaction to check and create in one operation // Use atomic transaction to check and create in one operation
let put_options = PutOptions::new(); let put_options = PutOptions::new().with_lease(self.client.primary_lease().id());
// Build transaction that creates key only if it doesn't exist // Build transaction that creates key only if it doesn't exist
let txn = Txn::new() let txn = Txn::new()
...@@ -217,9 +217,12 @@ impl EtcdBucket { ...@@ -217,9 +217,12 @@ impl EtcdBucket {
// So we do too in etcd. // So we do too in etcd.
} }
let put_options = PutOptions::new()
.with_lease(self.client.primary_lease().id())
.with_prev_key();
let mut put_resp = self let mut put_resp = self
.client .client
.kv_put_with_options(k, value, Some(PutOptions::new().with_prev_key())) .kv_put_with_options(k, value, Some(put_options))
.await .await
.map_err(|e| StorageError::EtcdError(e.to_string()))?; .map_err(|e| StorageError::EtcdError(e.to_string()))?;
Ok(match put_resp.take_prev_key() { Ok(match put_resp.take_prev_key() {
......
...@@ -67,12 +67,12 @@ where ...@@ -67,12 +67,12 @@ where
/// ///
/// # Example /// # Example
/// ```ignore /// ```ignore
/// // Watch for ModelEntry objects and extract runtime_config field /// // Watch for ModelDeploymentCard objects and extract runtime_config field
/// let watcher = watch_prefix_with_extraction( /// let watcher = watch_prefix_with_extraction(
/// etcd_client, /// etcd_client,
/// "models/", /// "mdc/",
/// |kv| Some(kv.lease()), // Use lease_id as key /// |kv| Some(kv.lease()), // Use lease_id as key
/// |entry: ModelEntry| entry.runtime_config, // Extract runtime_config field /// |card: ModelDeploymentCard| card.runtime_config, // Extract runtime_config field
/// cancellation_token, /// cancellation_token,
/// ).await?; /// ).await?;
/// ``` /// ```
......
...@@ -16,7 +16,6 @@ use tokio_stream::StreamExt; ...@@ -16,7 +16,6 @@ use tokio_stream::StreamExt;
// Constants for monitoring configuration // Constants for monitoring configuration
const KV_METRICS_SUBJECT: &str = "kv_metrics"; const KV_METRICS_SUBJECT: &str = "kv_metrics";
const MODEL_ROOT_PATH: &str = "models";
// Internal structs for deserializing metrics events // Internal structs for deserializing metrics events
#[derive(serde::Deserialize)] #[derive(serde::Deserialize)]
...@@ -35,11 +34,6 @@ struct KvStats { ...@@ -35,11 +34,6 @@ struct KvStats {
kv_active_blocks: u64, kv_active_blocks: u64,
} }
#[derive(serde::Deserialize)]
struct ModelEntry {
runtime_config: Option<RuntimeConfig>,
}
#[derive(serde::Deserialize)] #[derive(serde::Deserialize)]
struct RuntimeConfig { struct RuntimeConfig {
total_kv_blocks: Option<u64>, total_kv_blocks: Option<u64>,
...@@ -95,11 +89,18 @@ impl WorkerMonitor { ...@@ -95,11 +89,18 @@ impl WorkerMonitor {
return Ok(()); return Ok(());
}; };
// WorkerMonitor is in the wrong crate. It deals with LLM things (KV) so it should be in
// dynamo-llm not dynamo-runtime.
// That means we cannot use ModelDeploymentCard, so use serde_json::Value for now .
let runtime_configs_watcher = watch_prefix_with_extraction( let runtime_configs_watcher = watch_prefix_with_extraction(
etcd_client, etcd_client,
MODEL_ROOT_PATH, "mdc/", // should be model_card::ROOT_PREFIX but wrong crate
key_extractors::lease_id, key_extractors::lease_id,
|entry: ModelEntry| entry.runtime_config.and_then(|rc| rc.total_kv_blocks), |card: serde_json::Value| {
card.get("runtime_config")
.and_then(|rc| rc.get("total_kv_blocks"))
.and_then(|t_kv| t_kv.as_u64())
},
component.drt().child_token(), component.drt().child_token(),
) )
.await?; .await?;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment