Unverified Commit 81162dfe authored by Graham King's avatar Graham King Committed by GitHub
Browse files

chore(discovery): Watch/publish ModelDeploymentCard instead of ModelEntry (#3350)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent ddbb4f50
......@@ -149,7 +149,7 @@ impl EtcdBucket {
tracing::trace!("etcd create: {k}");
// Use atomic transaction to check and create in one operation
let put_options = PutOptions::new();
let put_options = PutOptions::new().with_lease(self.client.primary_lease().id());
// Build transaction that creates key only if it doesn't exist
let txn = Txn::new()
......@@ -217,9 +217,12 @@ impl EtcdBucket {
// So we do too in etcd.
}
let put_options = PutOptions::new()
.with_lease(self.client.primary_lease().id())
.with_prev_key();
let mut put_resp = self
.client
.kv_put_with_options(k, value, Some(PutOptions::new().with_prev_key()))
.kv_put_with_options(k, value, Some(put_options))
.await
.map_err(|e| StorageError::EtcdError(e.to_string()))?;
Ok(match put_resp.take_prev_key() {
......
......@@ -67,12 +67,12 @@ where
///
/// # Example
/// ```ignore
/// // Watch for ModelEntry objects and extract runtime_config field
/// // Watch for ModelDeploymentCard objects and extract runtime_config field
/// let watcher = watch_prefix_with_extraction(
/// etcd_client,
/// "models/",
/// "mdc/",
/// |kv| Some(kv.lease()), // Use lease_id as key
/// |entry: ModelEntry| entry.runtime_config, // Extract runtime_config field
/// |card: ModelDeploymentCard| card.runtime_config, // Extract runtime_config field
/// cancellation_token,
/// ).await?;
/// ```
......
......@@ -16,7 +16,6 @@ use tokio_stream::StreamExt;
// Constants for monitoring configuration
const KV_METRICS_SUBJECT: &str = "kv_metrics";
const MODEL_ROOT_PATH: &str = "models";
// Internal structs for deserializing metrics events
#[derive(serde::Deserialize)]
......@@ -35,11 +34,6 @@ struct KvStats {
kv_active_blocks: u64,
}
#[derive(serde::Deserialize)]
struct ModelEntry {
runtime_config: Option<RuntimeConfig>,
}
#[derive(serde::Deserialize)]
struct RuntimeConfig {
total_kv_blocks: Option<u64>,
......@@ -95,11 +89,18 @@ impl WorkerMonitor {
return Ok(());
};
// WorkerMonitor is in the wrong crate. It deals with LLM things (KV) so it should be in
// dynamo-llm not dynamo-runtime.
// That means we cannot use ModelDeploymentCard, so use serde_json::Value for now .
let runtime_configs_watcher = watch_prefix_with_extraction(
etcd_client,
MODEL_ROOT_PATH,
"mdc/", // should be model_card::ROOT_PREFIX but wrong crate
key_extractors::lease_id,
|entry: ModelEntry| entry.runtime_config.and_then(|rc| rc.total_kv_blocks),
|card: serde_json::Value| {
card.get("runtime_config")
.and_then(|rc| rc.get("total_kv_blocks"))
.and_then(|t_kv| t_kv.as_u64())
},
component.drt().child_token(),
)
.await?;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment